[weboob] [PATCH 1/1] Add module TricTrac.tv

Benjamin Drieu bdrieu at april.org
Thu May 3 18:20:39 CEST 2012


From: Benjamin Drieu <benjamin at drieu.org>


Signed-off-by: Benjamin Drieu <bdrieu at april.org>
---
 modules/trictractv/__init__.py |    3 +
 modules/trictractv/backend.py  |   62 ++++++++++++++++++++
 modules/trictractv/browser.py  |   78 +++++++++++++++++++++++++
 modules/trictractv/favicon.png |  Bin 0 -> 3134 bytes
 modules/trictractv/pages.py    |  124 ++++++++++++++++++++++++++++++++++++++++
 modules/trictractv/test.py     |   31 ++++++++++
 modules/trictractv/video.py    |   34 +++++++++++
 7 files changed, 332 insertions(+)
 create mode 100644 modules/trictractv/__init__.py
 create mode 100644 modules/trictractv/backend.py
 create mode 100644 modules/trictractv/browser.py
 create mode 100644 modules/trictractv/favicon.png
 create mode 100644 modules/trictractv/pages.py
 create mode 100644 modules/trictractv/test.py
 create mode 100644 modules/trictractv/video.py

diff --git a/modules/trictractv/__init__.py b/modules/trictractv/__init__.py
new file mode 100644
index 0000000..ee002cb
--- /dev/null
+++ b/modules/trictractv/__init__.py
@@ -0,0 +1,3 @@
+from .backend import TricTracTVBackend
+
+__all__ = ['TricTracTVBackend']
diff --git a/modules/trictractv/backend.py b/modules/trictractv/backend.py
new file mode 100644
index 0000000..7dad975
--- /dev/null
+++ b/modules/trictractv/backend.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Romain Bignon
+# Copyright(C) 2012  Benjamin Drieu
+#
+# This file is *not yet* part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from __future__ import with_statement
+
+from weboob.capabilities.video import ICapVideo
+from weboob.tools.backend import BaseBackend
+
+from .browser import TricTracTVBrowser
+from .video import TricTracTVVideo
+
+
+__all__ = ['TricTracTVBackend']
+
+
+class TricTracTVBackend(BaseBackend, ICapVideo):
+    NAME = 'trictractv'
+    MAINTAINER = 'Benjamin Drieu'
+    EMAIL = 'benjamin at drieu.org'
+    VERSION = '0.c'
+    DESCRIPTION = u'TricTrac.tv video website'
+    LICENSE = 'AGPLv3+'
+    BROWSER = TricTracTVBrowser
+
+    def get_video(self, _id):
+        with self.browser:
+            return self.browser.get_video(_id)
+
+    def search_videos(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
+        with self.browser:
+            return self.browser.search_videos(pattern)
+
+    def fill_video(self, video, fields):
+        if fields != ['thumbnail']:
+            # if we don't want only the thumbnail, we probably want also every fields
+            with self.browser:
+                video = self.browser.get_video(TricTracTVVideo.id2url(video.id), video)
+        if 'thumbnail' in fields and video.thumbnail:
+            with self.browser:
+                video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
+
+        return video
+
+    OBJECTS = {TricTracTVVideo: fill_video}
diff --git a/modules/trictractv/browser.py b/modules/trictractv/browser.py
new file mode 100644
index 0000000..b7ba8a3
--- /dev/null
+++ b/modules/trictractv/browser.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011-2012  Romain Bignon, Laurent Bachelier, Benjamin Drieu
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+
+from weboob.tools.browser import BaseBrowser
+from weboob.tools.browser.decorators import id2url
+
+from .pages import IndexPage, VideoPage
+from .video import TricTracTVVideo
+
+
+__all__ = ['TricTracTVBrowser']
+
+
+class TricTracTVBrowser(BaseBrowser):
+    DOMAIN = 'trictrac.tv'
+    ENCODING = 'ISO-8859-1'
+    PAGES = {r'http://[w\.]*trictrac.tv/': IndexPage,
+             r'http://[w\.]*trictrac.tv/home/listing.php.*': IndexPage,
+             r'http://[w\.]*trictrac.tv/video-(.+)': VideoPage,
+            }
+
+    @id2url(TricTracTVVideo.id2url)
+    def get_video(self, url, video=None):
+        self.location(url)
+        assert self.is_on_page(VideoPage)
+
+        _id = self.page.get_id()
+        if video is None:
+            video = TricTracTVVideo(_id)
+
+        infourl = self.page.get_info_url()
+        if infourl is not None:
+            self.parse_info(self.openurl(infourl).read(), video)
+
+        return video
+
+    def home(self):
+        self.location(self.buildurl('http://www.trictrac.tv/home/listing.php', mot='%'))
+
+    def search_videos(self, pattern):
+        if not pattern:
+            self.home()
+        else:
+            self.location(self.buildurl('http://www.trictrac.tv/home/listing.php', mot=pattern.encode('utf-8')))
+
+        assert self.is_on_page(IndexPage)
+        return self.page.iter_videos()
+
+    def parse_info(self, data, video):
+        m = re.match ( '.*fichier=(.*?)&', data )
+        video.url = unicode ( r'http://src.povcon.net/videos/%s' % m.group ( 1 ) )
+
+        video.description = self.page.get_descriptif()
+        video.duration = self.page.get_duration()
+        video.title = self.page.get_title()
+        video.date = self.page.get_date()
+        video.rating = self.page.get_rating()
+        video.rating_max = 5
+
+        return video
diff --git a/modules/trictractv/favicon.png b/modules/trictractv/favicon.png
new file mode 100644
index 0000000000000000000000000000000000000000..c6125f02306945453eeae34610b77299c4a951ca
GIT binary patch
literal 3134
zcmY+Gc{tQv8^?c^?90#-g^*=PmZ4E&j3tdNVrY_TP)vwoXfT)}OPXZqk!5U+CmBnY
z$<u3V?DE(Wg)%}3NwyveiT8~6ulI9Zzd2{-`kguVeV_aO-iL;Buof4Q6M-N|+|~x`
z3`S+{0}}wBs$Yw0z#!mhXN}$2McDP{82Ck)WOM!o7;SbRt1f?22oj~*Vo%_G#~03D
z#HEeMxNn@){cN?3q#feF;TT8SC$#4t!9YioH#AP_g_Bh2+fXX$(zlaqeFr*61w(R6
z#0C%eb#~)PI9)Fvtlyq1I5{U)%ze*&e8F8Nal}UnkL)iXTUlSijV5}!$9F~}E7!_z
zo{toq+ at YAbYRe?{oGpuuiHY$=#<!l+lcr&5G%MAV&M9>@3g*PW6pzp=K0ZFOvL5X0
z%*@Pl&dwf7WwRudg6=s at V)grL$8!2T{G#5qiVR{3olEd#I!8n$B_&7ZMme<PAj0DT
zU&;%mTUm5Eo#l$qR^{@zYWb!V0)xS9eWSY=n*dA~$@j1-1NZiMvaB8t6f%FVg<}*Y
zQ%2sdRticdc6gQ`jp%J3Nk}>7nX)0jkovWGOO*$jNF+v~4Pjqqsi_Vy3~3Yiju}s%
zYNT5+-^_``_k?V%uS|u6xCa83$}hU>(XcA<@rUr|0cX6Pzw7|KEGpC<)|2_t_73pH
zH&8Em3YBSWbU;Z-=@C7An;OMsr!B!Ao at UGBdzg`tu{ajKIMxt6mhcbY-Mg`|y1F`}
zgjoCGIWqdmpV+TUOG`=$Y7b&!V`GgJqPGC6WZ(kJ_31F3UITA7mV~Eu at uNaYnM~&F
z%xRI1h0q2cpQ}iP&Y)xRldbWhvK52^9Mx{tO$uRXXgIwlvv0r`nQd+kw|nAT;wnb^
z_KxLc_M4x9*GdmIihcvORnYTDzRyKWLYEG;QpFzQH2)A#Rf%z6zBwSK5LQR1pwoLY
zH2 at xs<OU^E>N<(Y&#0=ZLTCsw%D9L!ii?YVr(Ry?WN?Be2D9_>@+ at ssY#kgNhUe}c
z*!-^EoM#-iNN5nixx2eJUgon%kT{4Ye|cTu)(C+;yD=1uq8 at x^(e1XmGEVdBS4Fd1
zTNKLr%xSIkQ&(7b5*+i;A*C!SMr~~^>bPt(@cBc4=gUi3xY|_Qqg*9j29<4k0*Y_-
zbSlAXr5%tj8<}5oa)M at G<r$BROEh~|zyJ6V5{PYKjHq&xPFFUzW%l0<w40AcPM>!|
z+}(~uX>*i;apDwek$h{VV|{(SmgDAH4C3Gy;thY)KYK=#>F7}xJl*}=zdh)mY3ZE3
ziF;uC0DI5iA7%m4LuT%SkQP1p(3CQT%1 at mnrP`?dd1^YC2hvR6d8n1%(*bt0wGC|m
zSq|Q+S5H6nr0m?tP}g*DoqZ8LISG&gY|3+By6_R|$#a9x{UQJ1r{~jh#U_L62m#0M
zJm?|@ySiSK2R0w2Ys)0GeXMf*Lj?Sfa|v?vuQSOY`F)WBC&5<(gM738?(S|a-Tz!@
z0Q}nQ^#<<0_LK7S_xEq09ynzBsDET$%F}?}d$tUw^1w!|PT>#)vGyMV5l>3C66y at X
zn3z-*f(Ya#`^w7Pf!(>B<!rlJ=6qh8-$^LXB=X0#GkXj4V9IaD0HgO_x5Nrc-uk6u
zAtn&Z^4n)2tLOfLvhj>_9kE|b;G~ligF2lqlGy{C{T2!gr);kF=tV|EL|9ozZNK+>
ze&PBt10w?igW%~?(Fct~zpZcjd)~;-Rm*_oh%=n8A`c&i<dUq?G6b55&s<Rw(U<rC
zC^|VfwGv^X2$c<8%g=%+%a=&YU(ZG}9j&dCtYk!?=8sTuMMcH-qA^r})5MoR;CN1{
z at Ms=Be0byQ5zoNx9=`o at a&q;NHRa{p7&SDoZw4rec8b^fd+om*Oe7LNXB(QB5Og!;
zrT@(OvKGN6qnp>}M=F1=@e~i0xjbR=wYOufqrMA6u`R^!6i5+>o_zA%`N2tJI~z4=
zn!P~Ph4NvVaCBqi_;?XKCe>_P*`QU at j9*AxTzvXpo(ll9G0!N6!)e+XUl?ac{#@)g
z>+eMZPam at y<omq3_}7)wx`=)uRxd$HqG)diE`#h<luUv at h)d{fhk%V&&f=jLjlZTl
z*XEmk55I;GXf%3s!VxyT0)6Au1bSHRF((iRxp+ThMmiUD^}biCml(40uR-NsKbA%V
z*`wbZ;8Sg+Pykcb(J{IC>&MU8TFH`<_FCTCxMDnSZ(m=ZQ66Lj=<4$F5|mItktUln
zbP|mkiM(+Z1-OFZ;wo@{DT1BV)#7(bwI)_pOSdPO%bY^X#*@pZ7o>QB)b&0sR_=D!
zYaV!PoiaRZ8vgwYcc%EEsp;(sZ-8ARKYttIe&?_4NES+4+t5%A3s39WyVoP>MgL?8
zz^5P5mZ^SER0&AOjDNaAVGD(`p{U<$t8>keISG;X%3Bbg4Q%;Bh7#HW!q7Y*`&K at 6
zrKQ3^L50=H$=xMVR`94vLl<S35YX|~fEv#AV~UPtNq1lS33qyAdTgkKO<9HgOflo_
z?X`C_i;_sC)AQIjG@`p!KgAE&V3$JdUV=Q;(BQGMx|7gQ4_Tb-__R)Gm7r^r8i0t`
z(S&273l7O70 at ciW_4^mC68>hM-9WmXS`#R!77aQ;Yz#`Fa%5f|Oh~z%qM{e6eUWU%
zlo31ZLo9D?cr|mjT^Qn???E>ze0_OqquQrwXED<*{zE_wAO0-+d|7>Vs=ar!N%Y;n
zIj2)^r+x^iKJD1Ffw2~fwtD=AZV!2`y!h?D9ME!U{nj325@>lA7L*qZ21A+lx!<hZ
zr7Ru0IT>51#=i9i4$<ukT-lqC;&|4X+uxpY7D&+QkMY`|Rn6x55dV>)B3^FByYEco
zU=Uo)3&)(Cuzwvz@{>Nb)AnjOyeO?40`br>Kg(qw3qLah`Rzij`Jf(W1ysm%v$#nK
zwAgaEvm0IP5Vz%O2o~h#hRi$UV}0Kg_2uMb!$$wJP~AVuks5t=<=2m4m$+JjLS`4N
zw33T;U=k29z`6OGt9lO;B)Pq(%A=CSC8%IrngPUgU?4pmh>n!}{F+v}*3e9!5YWkI
zE_P(K2g>cBaoXAnsl(yw3lk}??}*DE&o_#N0Uhcl3fY1h#m3;sT>kdZ#fETMD~rIw
zQ(nL2^3dZ^=hO-X)8u7p0?N>Uz7 at ee=(@VV6A&oTfw`6z1G>G8yfJG3!GrP*K9Uwj
zM*buD7+NZ>*3;5KBFKn#PX+B#B2<EdVfFXlR(%e8vdir?V^F4O^wuU&0>Ji?)Gpgg
z5<nYC%6}+u+0x=kgj?tWV?mDw9`aO$Elp0cBM%;QV1gS#mOUiU)~U4go)3pxo?}i>
z`4~9O^av9uvBP@`#Tv%stN+W^Oja-o{``+rRQUXG`Nf)mwM<dK&Nh}a%0<K+#u5`y
z0~8is<H$%$OaJ9!tUzrvzl2c669~@Vd-Q~WXk_ke9${(Znx(*U`aasI^&ID;4uYFo
zpLdk;ry;ol8W-WHo40mh+}hsbQbDM5F6n(WLqg>i7t at 5rRMUq1;>h*I#cmC-3r&qx
z%d1y+;I(7?<WMe~oS|VZn?0s6<752>(eKEbyIu9^(_{&7-?@K3H?IRBCMv4xAdbGd
zA$&K5izL+#d7Mj=ATM>9J<aW;1Vt$)?iC|!DOM2Z=u4usqeqXeIpc at v=V>l%YhIbw
zpZu1DlEn|AM4=VxLgt=b>@*m=N7FHpZ<CcVy3KC!C6u0AI;Z<_KS5D at _C&3M@*)bd
zqex#=T;G1oc{UxjfWBRiwis{xt at KIzNN2_%9S<(#>SRk-GVwC`3fe393K$>*gua0e
z!cYfs6t90AjX<FFQAZF4VDjVQtBe1);CdkOs&_bdfxZp`rK7Kp*GHg_BG5>~|1H3-
Sm0E%YkgcTyw!-34%zpt}ug$Rl

literal 0
HcmV?d00001

diff --git a/modules/trictractv/pages.py b/modules/trictractv/pages.py
new file mode 100644
index 0000000..d8681c7
--- /dev/null
+++ b/modules/trictractv/pages.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011-2012  Romain Bignon, Laurent Bachelier
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import datetime
+import re
+
+from weboob.tools.capabilities.thumbnail import Thumbnail
+from weboob.tools.browser import BasePage, BrokenPageError
+
+
+from .video import TricTracTVVideo
+
+
+__all__ = ['IndexPage', 'VideoPage']
+
+
+class IndexPage(BasePage):
+    def iter_videos(self):
+        for div in self.parser.select(self.document.getroot(), 'li#contentsearch'):
+            title = self.parser.select(div, '#titlesearch span', 1)
+
+            a = self.parser.select(div, 'a', 1)
+            url = a.attrib['href']
+            m = re.match('/video-(.*)', url)
+            if not m:
+                print ':( %s' % url
+                continue
+            _id = m.group(1)
+            video = TricTracTVVideo(_id)
+            video.title = unicode(title.text)
+
+            url = self.parser.select(div, 'img', 1).attrib['src']
+            stars = self.parser.select(div, '.etoile_on')
+            video.rating = len(stars)
+            video.rating_max = 5
+
+            video.thumbnail = Thumbnail ( unicode ( 'http://www.trictrac.tv/%s' % url ) )
+            
+            yield video
+
+
+class VideoPage(BasePage):
+    def on_loaded(self):
+        p = self.parser.select(self.document.getroot(), 'p.alert')
+        if len(p) > 0:
+            raise Exception(p[0].text)
+
+    def get_info_url(self):
+        try:
+            div = self.parser.select(self.document.getroot(), '#Content_Video object', 1)
+        except BrokenPageError:
+            return None
+        else:
+            for param in self.parser.select(div, 'param', None):
+                if param.get('name') == 'flashvars':
+                    m = re.match('varplaymedia=([0-9]*)', param.attrib['value'])
+                    if m:
+                        return r'http://www.trictrac.tv/swf/listelement.php?idfile=%s' % m.group(1)
+
+    def get_title(self):
+        try:
+            title = self.parser.select(self.document.getroot(), 'title', 1)
+        except BrokenPageError:
+            return None
+        else:
+            return title.text
+
+    def get_descriptif(self):
+        try:
+            descriptif = self.parser.select(self.document.getroot(), '.video_descriptif p', 1)
+        except BrokenPageError:
+            return None
+        else:
+            return descriptif.text
+
+    def get_duration(self):
+        try:
+            details = self.parser.select(self.document.getroot(), 'div#video_detail div')
+        except BrokenPageError:
+            return None
+        else:
+            duration = details[2]
+            hours, minutes, seconds = duration.text [ duration.text.find(':') : ] . split(':')
+            if len(hours) > 0:
+                return datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
+            else:
+                return datetime.timedelta(minutes=int(minutes), seconds=int(seconds))
+
+    def get_date(self):
+        try:
+            date = self.parser.select(self.document.getroot(), 'div#video_detail div.date', 1)
+        except BrokenPageError:
+            return None
+        else:
+            string = date.text
+            string = string [ string.rfind('le ') : ]
+            return datetime.datetime.strptime(string, 'le %d %b %Y, %H:%M:%S')
+
+    def get_rating(self):
+        try:
+            stars = self.parser.select(self.document.getroot(), '#video_info .etoile_on')
+        except BrokenPageError:
+            return None
+        else:
+            return len(stars)
+
+    def get_id(self):
+        return self.groups[0]
diff --git a/modules/trictractv/test.py b/modules/trictractv/test.py
new file mode 100644
index 0000000..1fa835f
--- /dev/null
+++ b/modules/trictractv/test.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011-2012  Romain Bignon, Laurent Bachelier
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.tools.test import BackendTest
+
+class TricTracTVTest(BackendTest):
+    BACKEND = 'trictractv'
+
+    def test_trictractv(self):
+        l = list(self.backend.search_videos('TricTrac'))
+        self.assertTrue(len(l) > 0)
+        v = l[0]
+        self.backend.fillobj(v, ('url',))
+        self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
diff --git a/modules/trictractv/video.py b/modules/trictractv/video.py
new file mode 100644
index 0000000..8e6e1d5
--- /dev/null
+++ b/modules/trictractv/video.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.capabilities.video import BaseVideo
+
+
+__all__ = ['TricTracTVVideo']
+
+
+class TricTracTVVideo(BaseVideo):
+    def __init__(self, *args, **kwargs):
+        BaseVideo.__init__(self, *args, **kwargs)
+        self.ext = u'flv'
+
+    @classmethod
+    def id2url(cls, _id):
+        return 'http://www.trictrac.tv/video-%s' % _id
-- 
1.7.9.5



More information about the weboob mailing list