[weboob] [PATCH 1/1] Add a video module for gdcvault.com

François Revol revol at free.fr
Fri Aug 31 16:29:45 CEST 2012


For now it only fetches the speaker video, but each page can have
both a speaker and slides video feed.
TODO: search

Signed-off-by: François Revol <revol at free.fr>
---
 modules/gdcvault/__init__.py |    3 +
 modules/gdcvault/backend.py  |   82 ++++++++++++++++++++++
 modules/gdcvault/browser.py  |   64 +++++++++++++++++
 modules/gdcvault/favicon.png |  Bin 0 -> 3482 bytes
 modules/gdcvault/favicon.xcf |  Bin 0 -> 5024 bytes
 modules/gdcvault/pages.py    |  159 ++++++++++++++++++++++++++++++++++++++++++
 modules/gdcvault/test.py     |   42 +++++++++++
 modules/gdcvault/video.py    |   44 ++++++++++++
 8 files changed, 394 insertions(+)
 create mode 100644 modules/gdcvault/__init__.py
 create mode 100644 modules/gdcvault/backend.py
 create mode 100644 modules/gdcvault/browser.py
 create mode 100644 modules/gdcvault/favicon.png
 create mode 100644 modules/gdcvault/favicon.xcf
 create mode 100644 modules/gdcvault/pages.py
 create mode 100644 modules/gdcvault/test.py
 create mode 100644 modules/gdcvault/video.py

diff --git a/modules/gdcvault/__init__.py b/modules/gdcvault/__init__.py
new file mode 100644
index 0000000..c6833af
--- /dev/null
+++ b/modules/gdcvault/__init__.py
@@ -0,0 +1,3 @@
+from .backend import GDCVaultBackend
+
+__all__ = ['GDCVaultBackend']
diff --git a/modules/gdcvault/backend.py b/modules/gdcvault/backend.py
new file mode 100644
index 0000000..f031d0e
--- /dev/null
+++ b/modules/gdcvault/backend.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from __future__ import with_statement
+
+from weboob.capabilities.video import ICapVideo, BaseVideo
+from weboob.tools.backend import BaseBackend
+from weboob.capabilities.collection import ICapCollection, CollectionNotFound
+
+from .browser import GDCVaultBrowser
+from .video import GDCVaultVideo
+
+
+__all__ = ['GDCVaultBackend']
+
+
+class GDCVaultBackend(BaseBackend, ICapVideo, ICapCollection):
+    NAME = 'gdcvault'
+    MAINTAINER = u'François Revol'
+    EMAIL = 'revol at free.fr'
+    VERSION = '0.d'
+    DESCRIPTION = 'Game Developers Conferences Vault video streaming website'
+    LICENSE = 'AGPLv3+'
+    BROWSER = GDCVaultBrowser
+
+    def get_video(self, _id):
+        with self.browser:
+            return self.browser.get_video(_id)
+
+    SORTBY = ['relevance', 'rating', 'views', 'time']
+
+    # def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
+    #     with self.browser:
+    #         return self.browser.search_videos(pattern, self.SORTBY[sortby])
+
+    def fill_video(self, video, fields):
+        if fields != ['thumbnail']:
+            # if we don't want only the thumbnail, we probably want also every fields
+            with self.browser:
+                video = self.browser.get_video(GDCVaultVideo.id2url(video.id), video)
+        if 'thumbnail' in fields and video.thumbnail:
+            with self.browser:
+                video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
+
+        return video
+
+    def iter_resources(self, objs, split_path):
+        if BaseVideo in objs:
+            collection = self.get_collection(objs, split_path)
+            if collection.path_level == 0:
+                yield self.get_collection(objs, [u'latest'])
+            if collection.split_path == [u'latest']:
+                for video in self.browser.latest_videos():
+                    yield video
+
+    def validate_collection(self, objs, collection):
+        if collection.path_level == 0:
+            return
+        if BaseVideo in objs and collection.split_path == [u'latest']:
+            collection.title = u'Latest GDCVault videos'
+            return
+        raise CollectionNotFound(collection.split_path)
+
+    OBJECTS = {GDCVaultVideo: fill_video}
diff --git a/modules/gdcvault/browser.py b/modules/gdcvault/browser.py
new file mode 100644
index 0000000..f762a1d
--- /dev/null
+++ b/modules/gdcvault/browser.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from weboob.tools.browser import BaseBrowser
+from weboob.tools.browser.decorators import id2url
+
+#from .pages.index import IndexPage
+from .pages import VideoPage
+from .video import GDCVaultVideo
+
+
+__all__ = ['GDCVaultBrowser']
+
+
+class XMLParser(object):
+    def parse(self, data, encoding=None):
+        if encoding is None:
+            parser = None
+        else:
+            parser = lxml.etree.XMLParser(encoding=encoding, strip_cdata=False)
+        return lxml.etree.XML(data.get_data(), parser)
+
+
+class GDCVaultBrowser(BaseBrowser):
+    DOMAIN = 'gdcvault.com'
+    ENCODING = 'utf-8'
+    #ENCODING = None
+    #We need lxml.etree.XMLParser for read CDATA
+    PARSER = XMLParser()
+    PAGES = {r'http://[w\.]*gdcvault.com/play/(?P<id>[\d]+)/?.*': VideoPage,
+            }
+
+    @id2url(GDCVaultVideo.id2url)
+    def get_video(self, url, video=None):
+        self.location(url)
+        return self.page.get_video(video)
+
+    # def search_videos(self, pattern, sortby):
+    #     return None
+    #     self.location(self.buildurl('http://gdcvault.com/en/search%s' % sortby, query=pattern.encode('utf-8')))
+    #     assert self.is_on_page(IndexPage)
+    #     return self.page.iter_videos()
+
+    # def latest_videos(self):
+    #     self.home()
+    #     assert self.is_on_page(IndexPage)
+    #     return self.page.iter_videos()
diff --git a/modules/gdcvault/favicon.png b/modules/gdcvault/favicon.png
new file mode 100644
index 0000000000000000000000000000000000000000..70ef33e20bb41c825a43e51a42a0cdfc8c89dd36
GIT binary patch
literal 3482
zcmV;L4Q29)P)<h;3K|Lk000e1NJLTq002M$002M;1^@s6s%dfF00006VoOIv0A~QD
z0KwsbeUAVD010qNS#tmY3ljhU3ljkVnw%H_000McNliru+z1~EH82=m4KM%z02y>e
zSad^gZEa<4bO1wgWnpw>WFU8GbZ8()Nlj2!fese{01X03L_t(|+U=Ujk6hPv#(#Hu
zucq#5b~nl9Ad2E3OHnL9HX_FkA}jI2%OEhai1P<znN|LPEWF4+kWGN>vI~MBKn!RX
zBeoF<v1Q4!EK?9EifZ*l_Ef`r?@lL+SK>TKhRN7bU7&$3)O}U=e)pd5eCIo*{`#MP
zAbf#`<_kXN0L%fH126|*4!|6MIRJA2KG#tHj{vX_+oVzot-xr7Ga98dRzuE$PeRC<
zlrjJvYbcz?83P#<f&!A#7zJwfmw?768t-ZHU8kYiZ&nDJc79f%G9-cgBmww`zkCG&
zvIsFHnwXh1p53Wu>u}1Y?R_@(E5=PEq)4x0`OZt{InwQ*H3*6<?FModQqJTo#FROl
zR($`RkBE&&qy at OtnzO5meD&-xP8=C9umzdKxbkd{YJRzKgSW2j;PaEZ0jj+PS}7D3
zr4m||^vZ%0!#>Zg9pSyLeO|w^$=+0ON)uwn=Y&(5p0#wl7N-<-%8aVOqzNFII%KTX
z=%BGmvD|a~$@5S1{IMfgXCMh8i6k)S7F;MDM;A-p+uFz1;qe2YXs??F1o+#3`5^)<
z^$ISm4*AA~6CCN6{Qk)mjDa6r+N2JF*WSHK$688fSzGM#%Ei+x_AJvn at YAcC+!)nN
zL*lUUgpjaGv)nIu_2MbMbZm$=n)QPTZ+!4EoBIu!%%F4}8<Z>$y6D-V$m0gUYK0^L
zqew}JS at 2n?4r_L&H6L6b^WCqV=jcL at 3oA=pTbb~y&0TiK4ML5x2B#H=ji*~!4r<T#
zVZ}$gW2PaKX3;z-4ChuBd1_?=tKpOVF|WOImCf-K-$b&2F`AA2ip(BOk|&b|jyTOI
zw~MSLqxG$6O at tfOl>ff6g$OKm9p_dTDU2a0p-DoDnIxH%Gf^@z2_a?bknk}OLV_#|
zoa3n at 3v``k5;O0tZ?jv~OzJ?L1usG!69;v}sP;4=B2NN<-k at MqMWVp8Np$U<)0!Y$
z-Z>;?p|F-C{Ss@<jKNzvR|;}&2{306$tZ**?G at Gt9j93+9Y!0bbzpbusqbPrnT2Ei
z4PX5S0#GO at ouh6%K4m7Ar(c+R9;};)Rtlpv1LxYsiXec at HhFu%w*RU-n8s?=k|7X8
zh;jDd6ZEL`-Q-sq=xI$8B9)I!L!hwc9<u0`3S-*GLvFA5?WZQvW)iIxO5b~eiB?NQ
z1X19WqA<E8I`w}9fK}j(!CHflnQ0xF`b6O@*4$cE*TLB%LkeRNfg97B%17diq+0DD
zSil-Zp>DeovynxZguuA+q at 3v$j$yY$uP`XBpO{9scjtcN3`#3}4Ajw)B-1S%gR;Qn
zh*O4P$MNcOXRsQYD7<}bhpE4npHt5GlrTnNltL+mh$7#{;wi(p_I$W=z%#2udQS6u
zr&idWOi3b48c#}~wP3AjKQjtZ9y0*#V!@|O%$dSyjt_fSrAaxl)GzqTnPV(3l%$+_
zZ)=Zt)_3qp-FCXPH$ZE&24(Jlp3|m%{`|2e&L3am_)>>&UOdG+>pSdDJef?#Dh6di
z#})ka!%c3~@$ms5a!cn=LuBeBR>SGV9w!#M6wb2LDd-oL$|v62*x_G)c9}Ze8?+&3
zl+s8Bawe0JyQU>zRJ8)?TUqh+%7Ep5$=A=FBuXMB+H{(Pok@)~216u0E&#HCkBL#`
zxjwDQ0;MrnB`Bo`BGf)Gsw2PL*yZx>m=N#3oJk!xY-;KlxG||1R{^;Xy?cS3NyC5q
z`~%Ld4mp2($gopTC_~CZ9V2^<=i2V%p5o~@G1Tpi5d{=R(JL(dPC>_5oM~gDO;Z^k
z5 at R2zVkA5yAgzL4Vd)hGF-NK-)J@<)VooVVVHE at C=sAngZAQqnhcXR`sSo&+A7cmr
zO%i+^7&QrVvmOwVHi!sWa+~iTK}J#Fu!$Tt5u;||^^o}@f)B!eY&d8FN~xK45D;lA
zN_os#;C3#ElDUI!@~=-mpLOB#>D-igQe$q3%>now9BO8t>Bq$ZzpbM%uuwR<odO>t
zQ=h4Qn_0AK-K#t!vCqmocL1O at 6xvW&eNzqknD}k^0YTR}zVp>*>6V(!!-oI5w2sdi
zl`@^e(sPEw7{*Ox;v0PWoB&v*`R>;)aB4Upi}0WCt at D$Qcc{a?to+9p*7&QJpCtoV
zZ;bf+*WP^mH^3T(J;!RlLrgH}IBrZEFp5E8`SuG>v$EJD_`pB?=v{nDpN|8SQha!Q
zM7OXkcT2u}dWC-B?&$+q;2Y=GZoYo~@+Rg<RBlQE3a#5B%NTIF6<v$ng5$$JXNCio
z2VI=ipFad*%Dlh5&mTT>oKC;Xv4t*ci#^7Tr<tif1)Nywa%y$4Eokb%+v_&~a0W{2
zu-aghLL(#vAtihWH0k!!gwi;y(fAOlW4a at 5HQ<13+M*!>N~>0g<yqy5qFY)DV_E8S
z>6Hbgg}$@&N=Jx^+TW#T9tJ=X;jj*Tv^!$4ThMWa3rB~nA53VXZ#U`S<!9E=Gtu&k
ztv#BMF-G&!nN?oBa0=@TDG5zVOq;;w;gnyjUuSz%GYJt87CR+>@s;x|^*hu)@{d1!
zmvPfBwX}-A`?If8Xu~u#eDC`|A^X%S!_2Ix5{yy|jOFELPV&srB^I2SfvfqOKYfuX
z5Mt(g|NaL4v!=m1MlNmbQ~8Y5nx|F<44lJhjRlSr1s9JmBLbr)^7hpoM1+nptS$6g
z_a|kLL}?VOgAUKHE%EBJr#QLPqvI^r7?wIEgHFlOeo4m}28Fwg><To{bBa<KIyX((
z)-F;at^24z83m^G4Q9d_5rt%kF;j<#ar(2T!4QP2yJPmIHNB$GQm5d|%7DYV0nwa0
zI%L5)Vivad$9!@yA&O9FVeCCWzPv@%1fpd6rQ`hSkh3d`99!z~?Aj80Rl~TEHbzAd
z6d`4-HRzf7S!hF?$>Ed*ZFFm|DM4wuWi5#?sRM6Z-eBwCkZ)dWO`1_1`M0+&vo)$1
z*ZvM8=8*toVA3S6+!%3ep+~2*oLw34@!l9Mii;;#aR#PU!~53{2ziFOQoO&t&th4i
zv_hkZQ8=h-3Nxgu3swg`I at U6tkwHcv3n>X!t9GMVa840Yn|^%;W7^XeEoh}tvqobQ
zn8ZjG6TQN5P}MAyhQr!3ssr1jDL&@UFNAyw{Az2DFP~m!*eN-=IN;<$4{hN1phFhe
zuRI^!7!h)!Fq%PWIeTP*)5{AimJVYzPHPsrj?!t2ZY}WEamu7Dvo>fO(^`X3imah9
znwS$QL&y=U^ekQt8bKR{)|!}wPwj}9X1&KO#GJAErb++n007&gimii+h0-zXmRwjH
zqE+h^c*%TnIOd at 8#H1ONh8NaWc=6OK-O`ehP=|obm|B#<8bin0n+=nY5>~YvX)}gG
zD}~jXzO`ggq at 0K;;oxRz-rlaZuMm^8Kq?_gMn1KaXV#wP(XF%xO5w^!-rwBi?DBwK
zS#WM;5v8E(4EvLYjlBsIACa8txRSF+7g<>>kqmFFZ}Gv-0Wk^7eaD}_^eje0r_}h+
zF1q#+nXR{9I=a at -a|Y`S$A=xP*5nK+3EG$zDAVpnXxsKV3o56ZF0(<<8m!ioMpHUX
zLP#k;P#QlFfF@=x?Huw)F)=9N=t36}sA6VR2iEs1DxZn8t#6|hPH9B9o2z`p8pR8z
zRxz_vb6OK55;KfLVp0cWQCR3$UO2go($Fs)&z)GoY2BJ6F{7<Q8-o!&vkDYh6e5W{
z+gOO1kTMye=PbvEy|%43X&8mb<N<EfLyMuq%5!CB%yPHD8Vd at V5ZE{zb5J!jDUx$$
zTzjtWPB=O2)3pWPdhQIbd?^vLaA|W7-Tv}{jxF$^wa=?2 at UxGvb82}&sap+kVQq;?
z-LNxmIJ(ej^SETlpizicG&xhHK$9{#tD9}Jx(;lPYL*8boYK5_dWF+V12zt){Lj at L
zY9C4Wv9TTsKob&gUftlru>qqdlB00YG`zd9$3aySQ=0)Mb>PSEuM=bB#gj)_?iP$w
z;H_)d`N5 at iUV8di8^ua7ZsewA_WG3#l!8AvbCf}+;IQ)ie0`T|yOXwOXf!*cnyHWE
zEL1+SeJ~+}#O|b~iUF+^ISZ4Nc=O5 at XvJEui?twHV^y0%<pE5f9`$Q+r@%Q&VGLHc
zZ0mDkdRG%u0ak-ipmXc?#@pWG0c50Rve+1l0b&-kR+PrjaTcE<l~4GT={iflv~-=N
z@{v&!sAlD`QBXR|pfL2DrQ=!;bJ7I%rw!96_w^(n=?4UX;3HGwBmKOPAw1x{hdtLs
zAl~`;6C%*Gq(1AJPO at hjTEp~iqcllN+&)J3lyARNZg~UpyK0EYZHrGgT~m1=n62*l
z><-4<6PyDu2Vf4s9Dq3ha{%T5%mJ7KFb7}`z#M=%0AIlIze{Bb-m5q9z5oCK07*qo
IM6N<$g5_6x-2eap

literal 0
HcmV?d00001

diff --git a/modules/gdcvault/favicon.xcf b/modules/gdcvault/favicon.xcf
new file mode 100644
index 0000000000000000000000000000000000000000..32695aaa5cfdb5863c913b5e077dbbc5859cf43d
GIT binary patch
literal 5024
zcmZ{m*>@Y)dB6oqc5F#*-PBtWr%~RV)7r;5$8qaErR_scA9_yz1x1zyJ9kklDT$IP
zS(QXdj;bU|oY)#{1h_9E#YGp(U<W}EBmsg8NbKuiHp~Km(>t`(I;Za7T+BCje&4;{
zdI3bCX*qW0#HnMa6zY=);PuPrLz4dr4w(#o{WTn~{B!@IN78`u8*m(g<8?URdM-=c
zn~>MQ`)deX_Wcvh8qG;<2Yfytz5M>aw4FTOaq`n+U5XC*v48!yfBz5Z&FlN9LUSBB
z`99L7_*9yMwCuop?Hy;;Cy%u^s}-OA?C^W1kQ2 at 7=C)(+?cd&SLw<Gq*oPl{xPN_z
z at bl-deg;*4^&P4&-XHt_1xfWxd&}_?3N3Q%=P&kxlV`tc^7-8Romxf5F@^So{A9bN
z-+by+`^k>~F;LR}zf<4L{JE+WhnJD#8qM+N5jpV2haY_O3;6lulTZHP!;e1t<m12p
z_!rW<=a($}LW1{SJ|9xtkDfYyT5+OT`$3BqfsY58q%U~A42P6N5sp`$%M$<Ua~a+a
zy!$Qr at VDe6-;$4hOMdNJ@?Sz(TKC2EJt#=~N!N=4dSMHkYU<yA^S`!3w(0So&ozxp
z&pl1IkUcr_F_M-mRND5AGpAcra(T<?^XD&IJa at 6X=R&6zX+mTQUa8io)C%MWNOZ4J
zV=JlU*#{R})#v<M@$Jo6bTu5bo>3~)?f0?mLT=OXNQ1nCG>IE^j>L<}*=rp=uE=Jl
z!j$4G!I=x{mMgYmrGgi8x%Cd at ZDdW}5cgVJE<N7DvkO-*T)xq7NYjbI-`(x)YPn*~
zmsh^*yLMMUeOivZg{-OTVoie}+RL#@;;Zw`+V)>Z at bJTPEgH?)$J^!bt<Gjm%bC;7
z5-VK9MXDK*A#v^3IAOo6Mq2Kz6oU8Llyc2AkWCoQD-lGlP{^STLf>vJcLw^Jxm;vd
zuBqiM_m^{ld+i9)d at E3hK4@1W?;^bB_BylGD^X|5!p=1n(tHn6_uC+KH&hDWgVc`@
zR^IV&xfs5of;(rf5W>bK6{5WxE-c<{MG*DPKr#BTQw6!0`s^eBuIElOg0y_a^6oD9
z_ErR6>4%u|k~Npm_rQq%5J_t;{NB0le0aXC>$BY&{X6KrqAy0SLPZ%uc6^m6E#1A)
zs%`0LRUvO8745}GVAVhL=r_g~&H0+4v3&0~z4i^QJm<(}gOBg^KYVOH2Wdic{=t+l
z=ye6Qas^u}auDIW<J8(MDeI)B8^m`vqpQoor`<~A4TMm2-uQZA_8G8Y)BRe71h<DG
z_RAV5QHs+KpL&9R$M}DBNh~GTp6R{W-+#03a;F-33E@?pJ?Gn$$RUK0YtLQnyU};K
zO9SIxm8-NZ&1w})!mCI_sX$(a at l&eQ8ns%9^vGmL9VQ6*1fezOdM~!C<Vv+ti5x+A
zMRR9|R*oD)sMG${`O6x0NB7k;8ssp{kvqBjRWmfH_E>Ce!7Zo at 8<o&4t>%(*eWe`|
zoYq*P7thII^mx^l*w8Cstx%UX$d&%~*4tZL<_v@~%Bwp}>Yf%B$W>mX&MMXIUFR;G
z?a)FkLDO*tx*|i03awf#zr0SY-oJ3=4_T4C(yoGTxKG>g(j-i$W~NqmwQ2sawYwb&
zOy2BND$fVw5m-}NoGG*@uS6 at W53bK|Ga^TnvKr(S#I5~(npo~ttGc%Z{I{*0liTs_
z?cHtfoi^>w4JO`$AYFt=o`IUg+JhVOJIo$i$;DeFO-1)gCGn at u&Oa8b(My^x6Z+`k
z*I0UIqPwksgO2qokc%Y|Kc~974mG>4>(hMxx4m6jXsxe(yifqw`W!@Y{Ji?~wfkQ@
z_;RX<yFY8|U#BB|D&>{zUhbl*cY|EJ-`;s|joP at Sff?{=ZtYZJU;b$W-*|Ak?FtGO
zLu*N at y4u%zD at Lu|(5kN|Yst&1Yg=^mPIL3^6+CiRiyTFKU7m8@=1AwPeQn*ASaK&C
z$&kX<Rqgd{X8mSM+x=ZG{+YIKyBfQx*4zx^k$Ww2xSLmt=IK-#-!@)o{mfr3O?}b-
z71c<0t1fL-Q(v6xHl?eZ*IPc{W;SnVG&k0Wjo)->q&o5T#H#{d3Eghj^ad$%=;j at N
zRV<uEI+hu7;n#l*6_~Zp+iq<zvFj?;)exQ at zI&xp3DfUwT_6QMH{GLFc8%mIvXrxN
zBGrzlzAiJBM8uXMLZ5g1=SngBnF6^mlgw at T#(Jdc%PVguQi<>#XtcV^5-aVDw$mI1
zOC_Ox98IklZU&d<F17V81m`cRkk%VBp;hnHCAC!0>ejCFU9GU~s&aKR>|{hPhc%p(
zYg;>7+Rk1+-=^-obmI~{V3e&F`|jNCZC4<F3tL4YH4AJNsZmsFl}rUs^PWR5Aw5k_
zl+fvoX3LPpWI$2l;K-0{h#MXm9UHUg(I)huuC6!e&>y0*y;wu2;;A(-V$zT77D}aJ
zsgTPg*9Y}FgK2&Xr>Oj<U5~zl!UG+i{Z$%|Oj$;^a>WWSu;ol*Wz=XI-=sK(rYNFd
z*#~9DtubezO2#IJ$Dho3D+0b~otw0nCf2EH4x5~ubFU1dZ=+d#U99V&bz^z9>>4&0
z%nP|{1`L^GCUV$SV$w6v2&2Vff~;T`#F`P6p=p!5%5IDqQNw(ONzR*es9_>X65bIV
z`eRghLhR8N=wD`>u4Sf-sA(ZZ?arA{)G!mLb6`M+zKim at xqPiK2C1xtX%wdnsBs}p
zenZVB=`6&5jPk~X6qBCXUy5tw$00PIp_BVdO(&V$Gq}``&<&m0wo{7F!VsGrwR(II
zZuB(6rsf7vR6ic0aQBD-Vrl(|V>7pH9kL8gmKwYb;<_h!E)Df$C~3xUIypaV)EUgs
zn>W#lVbs2!*<G>$UtXx~8lVmQL{Z2=x(TI*HYhsbnwtYIzXigCei*FmB(}E_1%i$Z
zpa)TYti)wzjS{OHT`!gMxpZoGZCGM;<|p3pS|qx;xiSm4g6u##C&lt0O6x5k65rX5
zg{_0=TPST9oSdGWn|(57F`_TQkeNq^EjsiN%IHi(<C9ZUV}k~ml|7xlNnbM>^g8ra
z_<+8GHgpD~%-Ddn)gx<?rv7n%tx>ZM)#)L21Qql~i`j_&6s0UX+4XUg-ZDA?7dwpd
zhB#ivOwgE`TNI0Md1|CkP0gB2V_U`afJ7L547)ol5d~57NW^G?OD~uQW=l142ojub
zvc%!@QlStNqR0-R2J_(1(6B{{v|uz3?YoV`1dQjanT4 at QJ1w%4(%PQESe0HLf{9!e
z_u?i4mWTgPcEi(Vy?!K7&YEGFaso4ej%V4-v&pq0FV;Agf^NG__DVH9u1AMUdn_;x
zEEkJ~V!60IYc@?6_|hnf4sv4Ef{shIJ-uG$MWITSO;FP_ny!|e<^cy;&x{%@evi%S
z-mKu^G4ouW&rhQ0FeB2#x`{$H^K9gaw^Ac*lY at Gw=o?t3nDxntZH8JNHVjV9KYQi~
zQ%rmcDzcd;2HgZHl4JU*BAZ>XSmrZq{;3g0-e;J_*{lsK(D`SB0~4<Gom8g637IDY
zPxEYc#%Op_X;j9cnvkC}nVzPY?3 at ujiu#A*RU)=QP#aSNqy8*j%w#KkqcCNBQs8ql
zrh!?!R)Sh(AwOd<&Sa`N82+QE*XSa-GF~lsM~qXuRXV&d>*PdoR6kl2D)U3*UV_h0
z8D~mD;fYQ+o#6^#&?uFMcdX3UglgjHfN>(pRs++syMjoMpq8Y-MV?_vic3E=PZxyz
z6pBtHxk_+u6c*-T)N6{e^*XUKYS39i6kBDeSY71IsLoa8s+H^p!KS9oPjhT$0!1xr
z6`~jqjp^Zj+zehRreMFwP{tC-66K`<q0Ymngx;OO)86SsIx+ at Nu4HsXkD4Z<iOlXY
zO!g7j69a?87OCU*kJ4Asw9#xfnTN(l1`L+*=`pF|2Sz5J&cOcqJCxPIo;Mg_Uo^oE
zFi0KKbLeFh9 at Ic)t=Jq+C+4sLV8`5Um&<d|v*!edz at 8QS1js~Uuf|iQ#Igsox(XCU
z%jgnCRLUtQ09Jb_OA9QKTLnJ=2gS|0z;krc=kVkzBwMQsOr at A`+wATvUx($&Rx22I
z2Q-N?+m<^*aBR}&^aMgNw#IC(gnXF8n`UdoO2D at i*}=fuK(OwK&;}WZ`*5C)I&D_R
zGEu|BF3fIsMky8#Vpgl&<#b4_9UkU($Y!h&p3iu!z_x_*<&Xman=j2UQ5X0dAb2>D
z$3PRvdbmc}Zv*xvoGS<I(2+oat1M#@vq&~5x5QxZub1EYoRQX9guNZwXO_x**(Wj1
znn-&08NM8Zj5Sc?Dq#%#7;yG*ktqZv^NCea%)@Beg18`2GQhdhRc6C&1Mh;0b0tIM
zR$O+cj~4e<B%b$IYWR}m3}stmSUlvi!Y!=OOTx91!VBAL;q^+bQG5n1;C*C`fNT?>
zunfzVBB9{Y+NJ}%1xROjr%=dci)58c+rdG=d*~XzC`q&{OVcEdmx at UjBq@6!x|2*K
zGnvGa1LmD~6!Emj1|`N3PUMP(%+9LB(N at eC3@!x&Ud#$!1~rG*iGi0u&1!S`0<u8e
z>#|AdwbiT+Xzg1-u-WZ4E9jB^6R2T67X}@YS^omm>=<STKLs>aEN6UnE9UmOz!AXP
zvNRR3LGx-+Q6!<al$+qoOAebSN0sg1FyQSmPAIq}f)K at ZiSUpD4v8Q<MlncmRxeqj
zLsAe)k0=UG05G at 5?Z%)^9dp^i_W&vV9|*bqQTT)IwMnaDDURKNeE^nsMKR~Ft&t2(
z6n6vANm%c=l-1fEYX?xVyzI}g4Y9%VEO-^Ttt(Z&<g)=ME!M(zY?q`6k|uLO%)Uqp
zqz3?3O%yQTBY1o{kg3+iJ)Wg(P}Acs^Hjv)SmpO{x6QE;TMb9ERVwawED^Pe9{?{e
z at -AsdXlekSEZnN!X@!bDY)5Jvf1YRKE*s_xEr%m<me2dKCCCPBR$o=*-M~ZEDj^59
zj0+_E9X|^EK}x8su97^l>~MIZ=|Tyo`5GRu2MMTUw*@GX at d7_lCzl);Eba=dD5&Mk
zb8ISIWmA5qXOm!P9B1kx<+J+9dS%gph3Gm7wa7XVwA&YPp%Rj`e6~onPEkC$?smYg
z;&wtyt95C4H(6&wF7JA^P6TXzvOz2Y5GV^|7?VtFc&NIpz9f{EoOWMP;5P${`G&|k
z0amK<$<SJfgT;$2l6#~N0AG=>ZY_B*ST<gJhHo^O9ayi}4%>joOnOgjSOJLgHI6Q&
z8LqVG3|4q4s?H=`CGv6DutxwFpcuNe-=7S&NpjRSJVEwg6(pZU%XB#wER<4S%$F*q
zT(J54sdBlv>jkd?!Qpa2gTD`C4$AIu`CJa0)4S-EikpLvivhP4d<g1RSonuPleOp2
LE1>7~1LA)J3ci%9

literal 0
HcmV?d00001

diff --git a/modules/gdcvault/pages.py b/modules/gdcvault/pages.py
new file mode 100644
index 0000000..9166521
--- /dev/null
+++ b/modules/gdcvault/pages.py
@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from weboob.tools.mech import ClientForm
+ControlNotFoundError = ClientForm.ControlNotFoundError
+
+from weboob.tools.browser import BasePage
+
+import re
+import datetime
+from dateutil.parser import parse as parse_dt
+
+from weboob.capabilities.base import NotAvailable
+from weboob.tools.browser import BrokenPageError
+
+from .video import GDCVaultVideo
+
+import lxml.etree
+
+
+
+
+__all__ = ['VideoPage']
+
+class VideoPage(BasePage):
+    def get_video(self, video=None):
+        if video is None:
+            video = GDCVaultVideo(self.group_dict['id'])
+
+        # the config file has it too, but in CDATA
+        obj = self.parser.select(self.document.getroot(), 'title')
+        if len(obj) > 0:
+            title = obj[0].text.strip()
+            m = re.match('GDC Vault\s+-\s+(.*)', title)
+            if m:
+                title = m.group(1)
+        video.title = unicode(title)
+
+        # get the config file for the rest
+        obj = self.parser.select(self.document.getroot(), 'iframe', 1)
+        if obj is None:
+            return None
+        iframe_url = obj.attrib['src']
+        m = re.match('(http:.*)player.html\?.*xmlURL=([^&]+)\&token=([^&]+)', iframe_url)
+        if not m:
+            return None
+        config_url = m.group(1) + m.group(2)
+
+        #config = self.browser.openurl(config_url).read()
+        config = self.browser.get_document(self.browser.openurl(config_url))
+
+        obj = self.parser.select(config.getroot(), 'akamaihost', 1)
+        host = obj.text
+        if host is None:
+            raise BrokenPageError('Missing tag in xml config file')
+
+        videos = {}
+
+        obj = self.parser.select(config.getroot(), 'speakervideo', 1)
+        videos['speaker'] = 'rtmp://' + host + '/' + obj.text
+
+        obj = self.parser.select(config.getroot(), 'slidevideo', 1)
+        videos['slides'] = 'rtmp://' + host + '/' + obj.text
+
+        #print videos
+
+        obj = self.parser.select(config.getroot(), 'date', 1)
+        video.date = parse_dt(obj.text)
+
+        obj = self.parser.select(config.getroot(), 'duration', 1)
+        m = re.match('(\d\d):(\d\d):(\d\d)', obj.text)
+        if m:
+            video.duration = datetime.timedelta(hours = int(m.group(1)),
+                                                minutes = int(m.group(2)),
+                                                seconds = int(m.group(3)))
+
+        obj = self.parser.select(config.getroot(), 'speaker', 1)
+        #print obj.text_content()
+
+        #TODO: speaker as CDATA
+        #video.author = u'European Parliament'
+
+        #XXX
+        video.url = unicode(videos['speaker'])
+        #self.set_details(video)
+
+        video.set_empty_fields(NotAvailable)
+        return video
+
+        obj = self.parser.select(self.document.getroot(), 'title')
+        if len(obj) < 1:
+            return None
+        title = obj[0].text.strip()
+        m = re.match('GDC Vault\s+-\s+(.*)', title)
+        if m:
+            title = m.group(1)
+
+    def set_details(self, v):
+        obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1)
+        if obj is not None:
+            value = obj.attrib['content']
+            m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value)
+            if not m:
+                raise BrokenPageError('Unable to parse datetime: %r' % value)
+            day = m.group(1)
+            month = m.group(2)
+            year = m.group(3)
+            hour = m.group(4)
+            minute = m.group(5)
+            v.date = datetime.datetime(year=int(year),
+                                       month=int(month),
+                                       day=int(day),
+                                       hour=int(hour),
+                                       minute=int(minute))
+            
+        obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1)
+        if obj is not None:
+            span = self.parser.select(obj, 'span.ep_date', 1)
+            value = span.text
+            m = re.match('(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value)
+            if not m:
+                raise BrokenPageError('Unable to parse datetime: %r' % value)
+            bhour = m.group(1)
+            bminute = m.group(2)
+            ehour = m.group(3)
+            eminute = m.group(4)
+            day = m.group(5)
+            month = m.group(6)
+            year = m.group(7)
+            
+            start = datetime.datetime(year=int(year),
+                                      month=int(month),
+                                      day=int(day),
+                                      hour=int(bhour),
+                                      minute=int(bminute))
+            end = datetime.datetime(year=int(year),
+                                    month=int(month),
+                                    day=int(day),
+                                    hour=int(ehour),
+                                    minute=int(eminute))
+
+            v.duration = end - start
diff --git a/modules/gdcvault/test.py b/modules/gdcvault/test.py
new file mode 100644
index 0000000..5429044
--- /dev/null
+++ b/modules/gdcvault/test.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.tools.test import BackendTest
+#from weboob.capabilities.video import BaseVideo
+
+
+class GDCVaultTest(BackendTest):
+    BACKEND = 'gdcvault'
+
+    # def test_search(self):
+    #     l = list(self.backend.search_videos('linux'))
+    #     self.assertTrue(len(l) > 0)
+    #     v = l[0]
+    #     self.backend.fillobj(v, ('url',))
+    #     self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+    #     self.backend.browser.openurl(v.url)
+
+    # def test_latest(self):
+    #     l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
+    #     self.assertTrue(len(l) > 0)
+    #     v = l[0]
+    #     self.backend.fillobj(v, ('url',))
+    #     self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
diff --git a/modules/gdcvault/video.py b/modules/gdcvault/video.py
new file mode 100644
index 0000000..a65fd47
--- /dev/null
+++ b/modules/gdcvault/video.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Roger Philibert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.capabilities.video import BaseVideo
+
+import re
+
+__all__ = ['GDCVaultVideo']
+
+
+class GDCVaultVideo(BaseVideo):
+    def __init__(self, *args, **kwargs):
+        BaseVideo.__init__(self, *args, **kwargs)
+        self.ext = u'flv'
+
+    @classmethod
+    def id2url(cls, _id):
+        # attempt to enlarge the id namespace to differentiate
+        # videos from the same page
+        m = re.match('\d+#speaker', _id)
+        if m:
+            return u'http://www.gdcvault.com/play/%s#speaker' % _id
+        m = re.match('\d+#slides', _id)
+        if m:
+            return u'http://www.gdcvault.com/play/%s#slides' % _id
+        return u'http://www.gdcvault.com/play/%s' % _id
+
-- 
1.7.10.4




More information about the weboob mailing list