[weboob] [PATCH 1/3] [btdigg] Ported to browser2

Matthieu Weber mweber+weboob at free.fr
Wed Mar 9 18:02:24 CET 2016


Signed-off-by: Matthieu Weber <mweber+weboob at free.fr>
---
 modules/btdigg/browser.py        |  37 +++++--------
 modules/btdigg/module.py         |   8 ---
 modules/btdigg/pages/index.py    |   4 +-
 modules/btdigg/pages/torrents.py | 113 ++++++++++++++-------------------------
 modules/btdigg/test.py           |  12 ++---
 5 files changed, 59 insertions(+), 115 deletions(-)

diff --git a/modules/btdigg/browser.py b/modules/btdigg/browser.py
index 359722d..4f4c78e 100644
--- a/modules/btdigg/browser.py
+++ b/modules/btdigg/browser.py
@@ -1,8 +1,6 @@
 # -*- coding: utf-8 -*-
 
-import urllib
-
-from weboob.deprecated.browser import Browser
+from weboob.browser import PagesBrowser, URL
 
 from .pages.index import IndexPage
 from .pages.torrents import TorrentsPage, TorrentPage
@@ -11,33 +9,24 @@ from .pages.torrents import TorrentsPage, TorrentPage
 __all__ = ['BTDiggBrowser']
 
 
-class BTDiggBrowser(Browser):
-    DOMAIN = 'btdigg.org'
-    PROTOCOL = 'https'
-    ENCODING = 'utf-8'
-    USER_AGENT = Browser.USER_AGENTS['wget']
-    PAGES = {'https://btdigg.org/': IndexPage,
-             'https://btdigg.org/search?.*q=[^?]*': TorrentsPage,
-             'https://btdigg.org/search?.*info_hash=[^?]*': TorrentPage,
-             }
+class BTDiggBrowser(PagesBrowser):
+    BASEURL = 'https://btdigg.org'
+
+    index_page = URL('/$', IndexPage)
+    torrents_page = URL('/search\?.*q=(?P<query>.+)', TorrentsPage)
+    torrent_page = URL('/search\?.*info_hash=(?P<hash>.+)', TorrentPage)
 
     def home(self):
-        return self.location('https://btdigg.org')
+        return self.index_page.go()
 
     def iter_torrents(self, pattern):
-        self.location('https://btdigg.org/search?q=%s' % urllib.quote_plus(pattern.encode('utf-8')))
-
-        assert self.is_on_page(TorrentsPage)
+        self.torrents_page.go(query=pattern)
         return self.page.iter_torrents()
 
     def get_torrent(self, id):
-        self.location('https://btdigg.org/search?info_hash=%s' % id)
-
-        assert self.is_on_page(TorrentPage)
-        return self.page.get_torrent(id)
+        self.torrent_page.go(hash=id)
+        return self.page.get_torrent()
 
     def get_torrent_file(self, id):
-        self.location('https://btdigg.org/search?info_hash=%s' % id)
-
-        assert self.is_on_page(TorrentPage)
-        return self.page.get_torrent_file(id)
+        self.torrent_page.go(hash=id)
+        return self.page.get_torrent_file()
diff --git a/modules/btdigg/module.py b/modules/btdigg/module.py
index 913499a..4f4d8dc 100644
--- a/modules/btdigg/module.py
+++ b/modules/btdigg/module.py
@@ -29,11 +29,3 @@ class BTDiggModule(Module, CapTorrent):
 
     def iter_torrents(self, pattern):
         return self.browser.iter_torrents(pattern.replace(' ', '+'))
-
-    #def fill_torrent(self, torrent, fields):
-    #    if 'description' in fields or fields == None:
-    #        return self.get_torrent(torrent.id)
-
-    #OBJECTS = {
-    #    Torrent:fill_torrent
-    #}
diff --git a/modules/btdigg/pages/index.py b/modules/btdigg/pages/index.py
index 7f948f7..d522186 100644
--- a/modules/btdigg/pages/index.py
+++ b/modules/btdigg/pages/index.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
-from weboob.deprecated.browser import Page
+from weboob.browser.pages import HTMLPage
 
 
-class IndexPage(Page):
+class IndexPage(HTMLPage):
     pass
diff --git a/modules/btdigg/pages/torrents.py b/modules/btdigg/pages/torrents.py
index 962db29..e310dc5 100644
--- a/modules/btdigg/pages/torrents.py
+++ b/modules/btdigg/pages/torrents.py
@@ -1,93 +1,58 @@
 # -*- coding: utf-8 -*-
 
 from datetime import datetime, timedelta
-from urlparse import urlparse, parse_qs
 
 from weboob.tools.misc import get_bytes_size
-from weboob.deprecated.browser import Page,BrokenPageError
+from weboob.browser.pages import HTMLPage
+from weboob.browser.elements import ItemElement, ListElement, method
 from weboob.capabilities.torrent import Torrent, MagnetOnly
-from weboob.capabilities.base import NotAvailable
+from weboob.browser.filters.standard import CleanText, Regexp
 
 
-class TorrentsPage(Page):
+class TorrentsPage(HTMLPage):
+    @method
+    class iter_torrents(ListElement):
+        item_xpath = '//div[@id="search_res"]/table/tr'
 
-    def iter_torrents(self):
-        try:
-            table = self.document.getroot().cssselect('table.torrent_name_tbl')
-        except BrokenPageError:
-            return
-        for i in range(0, len(table), 2):
-            # Title
-            title = table[i].cssselect('td.torrent_name a')[0]
-            name = unicode(title.text)
-            url = unicode(title.attrib['href'])
+        class item(ItemElement):
+            klass = Torrent
 
-            # Other elems
-            elems = table[i+1].cssselect('td')
+            obj_id = Regexp(CleanText('./td/table/tr/td[@class="torrent_name"]/a/@href'),
+                            r'info_hash=([0-9a-f]+)', '\\1')
+            obj_name = CleanText('./td/table/tr/td[@class="torrent_name"]')
+            obj_magnet = CleanText('./td/table/tr/td[@class="ttth"]/a/@href')
 
-            magnet = unicode(elems[0].cssselect('a')[0].attrib['href'])
+            def obj_date(self):
+                valueago, valueunit, _ = CleanText('./td/table/tr/td[5]/span[@class="attr_val"]')(self).split()
+                delta = timedelta(**{valueunit: float(valueago)})
+                return datetime.now() - delta
 
-            query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
-            btih = parse_qs(query)['xt'][0] # urn:btih:<...>
-            ih = btih.split(':')[-1]
+            def obj_size(self):
+                value, unit = CleanText('./td/table/tr/td[2]/span[@class="attr_val"]')(self).split()
+                return get_bytes_size(float(value), unit)
 
-            value, unit = elems[2].cssselect('span.attr_val')[0].text.split()
 
-            valueago, valueunit, _ = elems[5].cssselect('span.attr_val')[0].text.split()
-            delta = timedelta(**{valueunit: float(valueago)})
-            date = datetime.now() - delta
-
-            url = unicode('https://btdigg.org/search?info_hash=%s' % ih)
-
-            torrent = Torrent(ih, name)
-            torrent.url = url
-            torrent.size = get_bytes_size(float(value), unit)
-            torrent.magnet = magnet
-            torrent.seeders = NotAvailable
-            torrent.leechers = NotAvailable
-            torrent.description = NotAvailable
-            torrent.files = NotAvailable
-            torrent.date = date
-            yield torrent
-
-
-class TorrentPage(Page):
-    def get_torrent(self, id):
-        trs = self.document.getroot().cssselect('table.torrent_info_tbl tr')
+class TorrentPage(HTMLPage):
+    @method
+    class get_torrent(ItemElement):
+        klass = Torrent
+        ROOT = '//table[@class="torrent_info_tbl"]'
 
-        # magnet
-        download = trs[2].cssselect('td a')[0]
-        if download.attrib['href'].startswith('magnet:'):
-            magnet = unicode(download.attrib['href'])
+        obj_id = Regexp(CleanText(ROOT + '/tr[3]/td[2]/a/@href'),  r'urn:btih:([0-9a-f]+)', '\\1')
+        obj_name = CleanText(ROOT + '/tr[4]/td[2]')
+        obj_magnet = CleanText(ROOT + '/tr[3]/td[2]/a/@href')
 
-            query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
-            btih = parse_qs(query)['xt'][0] # urn:btih:<...>
-            ih = btih.split(':')[-1]
+        def obj_files(self):
+            return [_.text for _ in self.xpath(self.ROOT + '/tr[position() > 15]/td[2]')]
 
-        name = unicode(trs[3].cssselect('td')[1].text)
-
-        value, unit  = trs[5].cssselect('td')[1].text.split()
-
-        valueago, valueunit, _ = trs[6].cssselect('td')[1].text.split()
-        delta = timedelta(**{valueunit: float(valueago)})
-        date = datetime.now() - delta
-
-        files = []
-        for tr in trs[15:]:
-            files.append(unicode(tr.cssselect('td')[1].text))
-
-        torrent = Torrent(ih, name)
-        torrent.url = unicode(self.url)
-        torrent.size = get_bytes_size(float(value), unit)
-        torrent.magnet = magnet
-        torrent.seeders = NotAvailable
-        torrent.leechers = NotAvailable
-        torrent.description = NotAvailable
-        torrent.files = files
-        torrent.filename = NotAvailable
-        torrent.date = date
+        def obj_date(self):
+            valueago, valueunit, _ = CleanText(self.ROOT + '/tr[7]/td[2]')(self).split()
+            delta = timedelta(**{valueunit: float(valueago)})
+            return datetime.now() - delta
 
-        return torrent
+        def obj_size(self):
+            value, unit = CleanText(self.ROOT + '/tr[6]/td[2]')(self).split()
+            return get_bytes_size(float(value), unit)
 
-    def get_torrent_file(self, id):
-        raise MagnetOnly(self.get_torrent(id).magnet)
+    def get_torrent_file(self):
+        raise MagnetOnly(self.get_torrent().magnet)
diff --git a/modules/btdigg/test.py b/modules/btdigg/test.py
index c47289d..4aa387c 100644
--- a/modules/btdigg/test.py
+++ b/modules/btdigg/test.py
@@ -15,7 +15,6 @@ class BTDiggTest(BackendTest):
         self.assertTrue(len(l) == 10)
         for torrent in l:
             assert torrent.name
-            assert torrent.url
             assert torrent.size
             assert torrent.magnet
             assert torrent.date
@@ -32,11 +31,10 @@ class BTDiggTest(BackendTest):
             assert e.magnet == full_torrent.magnet
 
     def test_get_special_torrent(self):
-        torrent = self.backend.get_torrent("c2e018a16bf28520687e400580be08934d00373a")
-        assert torrent.name == u'Ubuntu Linux Toolbox - 1000+ Commands for Ubuntu and Debian Power Users~tqw~_darksiderg'
+        torrent = self.backend.get_torrent("abd1d2648c97958789d62f6a6a1f5d33f4eff5be")
+        assert torrent.name == u'Ubuntu Linux Toolbox - 1000+ Commands for Ubuntu and Debian Power Users'
         assert len(torrent.files) == 3
-        assert torrent.size == float(3376414.72)
-        assert torrent.url == "https://btdigg.org/search?info_hash=c2e018a16bf28520687e400580be08934d00373a"
+        assert torrent.size == float(7004487.68)
         dt = torrent.date
-        assert dt.year == 2011
-        assert dt.month == 2
+        assert dt.year == 2013
+        assert dt.month == 12
-- 
2.1.4




More information about the weboob mailing list