[weboob] [PATCH 1/2] Fix empty fields in dailymotion plugin

Vincent Texier vit at free.fr
Sun Jan 12 12:06:01 CET 2014


Change extension from flv to mp4, cause all quality formats are h264/mp4

Signed-off-by: Vincent Texier <vit at free.fr>
---
 modules/dailymotion/pages.py |   33 +++++++++++++++++++++++++++++----
 modules/dailymotion/video.py |    2 +-
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/modules/dailymotion/pages.py b/modules/dailymotion/pages.py
index ec3e0ab..a640d43 100644
--- a/modules/dailymotion/pages.py
+++ b/modules/dailymotion/pages.py
@@ -86,12 +86,37 @@ class VideoPage(BasePage):
         if video is None:
             video = DailymotionVideo(self.group_dict['id'])
 
-        div = self.parser.select(self.document.getroot(), 'div#content', 1)
+        head = self.parser.select(self.document.getroot(), 'head', 1)
 
-        video.title = unicode(self.parser.select(div, 'div, meta[itemprop=name]', 1).get("content")).strip()
-        video.author = unicode(self.parser.select(div, 'div, meta[itemprop=author]', 1).get("content")).strip()
+        video.title = unicode(self.parser.select(head, 'meta[property="og:title"]', 1).get("content")).strip()
+        video.author = unicode(self.parser.select(head, 'meta[name="author"]', 1).get("content")).strip()
+        
+        url = unicode(self.parser.select(head, 'meta[property="og:image"]', 1).get("content")).strip()
+        # remove the useless anti-caching
+        url = re.sub('\?\d+', '', url)
+        video.thumbnail = BaseImage(url)
+        video.thumbnail.url = video.thumbnail.id
+
+        try:
+            parts = self.parser.select(head, 'meta[property="video:duration"]', 1).get("content").strip().split(':')
+        except BrokenPageError:
+            # it's probably a live, np.
+            video.duration = NotAvailable
+        else:
+            if len(parts) == 1:
+                seconds = parts[0]
+                hours = minutes = 0
+            elif len(parts) == 2:
+                minutes, seconds = parts
+                hours = 0
+            elif len(parts) == 3:
+                hours, minutes, seconds = parts
+            else:
+                raise BrokenPageError('Unable to parse duration %r' % self.parser.select(div, 'div.duration', 1).text)
+            video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
+        
         try:
-            video.description = html2text(self.parser.tostring(self.parser.select(div, 'div, meta[itemprop=description]', 1))).strip() or unicode()
+            video.description = html2text(self.parser.select(head, 'meta[property="og:description"]', 1).get("content")).strip() or unicode()
         except BrokenPageError:
             video.description = u''
 
diff --git a/modules/dailymotion/video.py b/modules/dailymotion/video.py
index 1ff2e84..8e2f084 100644
--- a/modules/dailymotion/video.py
+++ b/modules/dailymotion/video.py
@@ -27,7 +27,7 @@ __all__ = ['DailymotionVideo']
 class DailymotionVideo(BaseVideo):
     def __init__(self, *args, **kwargs):
         BaseVideo.__init__(self, *args, **kwargs)
-        self.ext = u'flv'
+        self.ext = u'mp4'
 
     @classmethod
     def id2url(cls, _id):
-- 
1.7.9.5




More information about the weboob mailing list