[weboob] [PATCH 1/1] [btdigg] Add btdigg backend

Matthieu Rakotojaona matthieu.rakotojaona at gmail.com
Sun Jan 26 19:10:06 CET 2014


A resubmit of the previous one, this time with understandable name.
Copied the original submission text.


This is a simple backend for btdigg.org. This site is especially
interesting because it is not an indexer where uploaders add their
torrents; it crawls the DHT and listens to all infohashes being
exchanged by the nodes.

Because of this, btdigg.org provides no description and no torrent
files, only magnets. Moreover, there are no seeders and leechers
(although there is the number of peers in the swarms)

Note that there is no icon.

Signed-off-by: Matthieu Rakotojaona <matthieu.rakotojaona at gmail.com>
---
 modules/bitedick/__init__.py       |   3 ++
 modules/bitedick/backend.py        |  38 ++++++++++++++
 modules/bitedick/browser.py        |  43 ++++++++++++++++
 modules/bitedick/pages/__init__.py |   0
 modules/bitedick/pages/index.py    |   9 ++++
 modules/bitedick/pages/torrents.py | 102 +++++++++++++++++++++++++++++++++++++
 modules/bitedick/test.py           |  43 ++++++++++++++++
 7 files changed, 238 insertions(+)
 create mode 100644 modules/bitedick/__init__.py
 create mode 100644 modules/bitedick/backend.py
 create mode 100644 modules/bitedick/browser.py
 create mode 100644 modules/bitedick/pages/__init__.py
 create mode 100644 modules/bitedick/pages/index.py
 create mode 100644 modules/bitedick/pages/torrents.py
 create mode 100644 modules/bitedick/test.py

diff --git a/modules/bitedick/__init__.py b/modules/bitedick/__init__.py
new file mode 100644
index 0000000..b3e1205
--- /dev/null
+++ b/modules/bitedick/__init__.py
@@ -0,0 +1,3 @@
+from .backend import BTDiggBackend
+
+__all__ = ['BTDiggBackend']
diff --git a/modules/bitedick/backend.py b/modules/bitedick/backend.py
new file mode 100644
index 0000000..c45d088
--- /dev/null
+++ b/modules/bitedick/backend.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+from weboob.capabilities.torrent import ICapTorrent
+from weboob.tools.backend import BaseBackend
+
+from .browser import BTDiggBrowser
+
+
+__all__ = ['BTDiggBackend']
+
+class BTDiggBackend(BaseBackend, ICapTorrent):
+    NAME = 'btdigg'
+    MAINTAINER = u'Matthieu Rakotojaona'
+    EMAIL = 'matthieu.rakotojaona at gmail.com'
+    VERSION = '0.i'
+    DESCRIPTION = 'The BitTorrent DHT search engine.'
+    LICENSE = 'CC0'
+    BROWSER = BTDiggBrowser
+
+    def create_default_browser(self):
+        return self.create_browser()
+
+    def get_torrent(self, id):
+        return self.browser.get_torrent(id)
+
+    def get_torrent_file(self, id):
+        return self.browser.get_torrent_file(id)
+
+    def iter_torrents(self, pattern):
+        return self.browser.iter_torrents(pattern.replace(' ', '+'))
+
+    #def fill_torrent(self, torrent, fields):
+    #    if 'description' in fields or fields == None:
+    #        return self.get_torrent(torrent.id)
+
+    #OBJECTS = {
+    #    Torrent:fill_torrent
+    #}
diff --git a/modules/bitedick/browser.py b/modules/bitedick/browser.py
new file mode 100644
index 0000000..c48d6dc
--- /dev/null
+++ b/modules/bitedick/browser.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+import urllib
+
+from weboob.tools.browser import BaseBrowser
+
+from .pages.index import IndexPage
+from .pages.torrents import TorrentsPage, TorrentPage
+
+
+__all__ = ['BTDiggBrowser']
+
+
+class BTDiggBrowser(BaseBrowser):
+    DOMAIN = 'btdigg.org'
+    PROTOCOL = 'https'
+    ENCODING = 'utf-8'
+    USER_AGENT = BaseBrowser.USER_AGENTS['wget']
+    PAGES = {'https://btdigg.org/': IndexPage,
+             'https://btdigg.org/search?.*q=[^?]*': TorrentsPage,
+             'https://btdigg.org/search?.*info_hash=[^?]*': TorrentPage,
+             }
+
+    def home(self):
+        return self.location('https://btdigg.org')
+
+    def iter_torrents(self, pattern):
+        self.location('https://btdigg.org/search?q=%s' % urllib.quote_plus(pattern.encode('utf-8')))
+
+        assert self.is_on_page(TorrentsPage)
+        return self.page.iter_torrents()
+
+    def get_torrent(self, id):
+        self.location('https://btdigg.org/search?info_hash=%s' % id)
+
+        assert self.is_on_page(TorrentPage)
+        return self.page.get_torrent(id)
+
+    def get_torrent_file(self, id):
+        self.location('https://btdigg.org/search?info_hash=%s' % id)
+
+        assert self.is_on_page(TorrentPage)
+        return self.page.get_torrent_file(id)
diff --git a/modules/bitedick/pages/__init__.py b/modules/bitedick/pages/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modules/bitedick/pages/index.py b/modules/bitedick/pages/index.py
new file mode 100644
index 0000000..6f3833f
--- /dev/null
+++ b/modules/bitedick/pages/index.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+
+from weboob.tools.browser import BasePage
+
+__all__ = ['IndexPage']
+
+
+class IndexPage(BasePage):
+    pass
diff --git a/modules/bitedick/pages/torrents.py b/modules/bitedick/pages/torrents.py
new file mode 100644
index 0000000..0839e1a
--- /dev/null
+++ b/modules/bitedick/pages/torrents.py
@@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+
+from datetime import datetime, timedelta
+from urlparse import urlparse, parse_qs
+
+from weboob.tools.browser import BasePage,BrokenPageError
+from weboob.capabilities.torrent import Torrent, MagnetOnly
+from weboob.capabilities.base import NotAvailable
+
+__all__ = ['TorrentsPage', 'TorrentPage']
+
+def fullsize(n, u):
+    m = {'B': 1,
+         'KB': 1024,
+         'MB': 1024 * 1024,
+         'GB': 1024 * 1024 * 1024,
+         'TB': 1024 * 1024 * 1024 * 1024,
+        }
+    return float(n * m[u])
+
+class TorrentsPage(BasePage):
+
+    def iter_torrents(self):
+        try:
+            table = self.document.getroot().cssselect('table.torrent_name_tbl')
+        except BrokenPageError:
+            return
+        for i in range(0, len(table), 2):
+            # Title
+            title = table[i].cssselect('td.torrent_name a')[0]
+            name = unicode(title.text)
+            url = unicode(title.attrib['href'])
+
+            # Other elems
+            elems = table[i+1].cssselect('td')
+
+            magnet = unicode(elems[0].cssselect('a')[0].attrib['href'])
+
+            query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
+            btih = parse_qs(query)['xt'][0] # urn:btih:<...>
+            ih = btih.split(':')[-1]
+
+            value, unit = elems[2].cssselect('span.attr_val')[0].text.split()
+
+            valueago, valueunit, _ = elems[5].cssselect('span.attr_val')[0].text.split()
+            delta = timedelta(**{valueunit: float(valueago)})
+            date = datetime.now() - delta
+
+            url = unicode('https://btdigg.org/search?info_hash=%s' % ih)
+
+            torrent = Torrent(ih, name)
+            torrent.url = url
+            torrent.size = fullsize(float(value), unit)
+            torrent.magnet = magnet
+            torrent.seeders = NotAvailable
+            torrent.leechers = NotAvailable
+            torrent.description = NotAvailable
+            torrent.files = NotAvailable
+            torrent.date = date
+            yield torrent
+
+class TorrentPage(BasePage):
+    def get_torrent(self, id):
+        trs = self.document.getroot().cssselect('table.torrent_info_tbl tr')
+
+        # magnet
+        download = trs[2].cssselect('td a')[0]
+        if download.attrib['href'].startswith('magnet:'):
+            magnet = unicode(download.attrib['href'])
+
+            query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
+            btih = parse_qs(query)['xt'][0] # urn:btih:<...>
+            ih = btih.split(':')[-1]
+
+        name = unicode(trs[3].cssselect('td')[1].text)
+
+        value, unit  = trs[5].cssselect('td')[1].text.split()
+
+        valueago, valueunit, _ = trs[6].cssselect('td')[1].text.split()
+        delta = timedelta(**{valueunit: float(valueago)})
+        date = datetime.now() - delta
+
+
+        files = []
+        for tr in trs[15:]:
+            files.append(unicode(tr.cssselect('td')[1].text))
+
+        torrent = Torrent(ih, name)
+        torrent.url = unicode(self.url)
+        torrent.size = fullsize(float(value), unit)
+        torrent.magnet = magnet
+        torrent.seeders = NotAvailable
+        torrent.leechers = NotAvailable
+        torrent.description = NotAvailable
+        torrent.files = files
+        torrent.filename = NotAvailable
+        torrent.date = date
+
+        return torrent
+
+    def get_torrent_file(self, id):
+        raise MagnetOnly(self.get_torrent(id).magnet)
diff --git a/modules/bitedick/test.py b/modules/bitedick/test.py
new file mode 100644
index 0000000..4065776
--- /dev/null
+++ b/modules/bitedick/test.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+from weboob.tools.test import BackendTest
+from weboob.capabilities.torrent import MagnetOnly
+
+from random import choice
+
+__all__ = ['BTDiggTest']
+
+class BTDiggTest(BackendTest):
+    BACKEND = 'btdigg'
+
+    def test_iter_torrents(self):
+        # try something popular so we sometimes get a magnet-only torrent
+        l = list(self.backend.iter_torrents('ubuntu linux'))
+        self.assertTrue(len(l) == 10)
+        for torrent in l:
+            assert torrent.name
+            assert torrent.url
+            assert torrent.size
+            assert torrent.magnet
+            assert torrent.date
+
+            self.assertEquals(40, len(torrent.id))
+
+    def test_get_random_torrentfile(self):
+        torrent = choice(list(self.backend.iter_torrents('ubuntu linux')))
+        full_torrent = self.backend.get_torrent(torrent.id)
+        try:
+            self.backend.get_torrent_file(torrent.id)
+        except MagnetOnly as e:
+            assert e.magnet.startswith("magnet:")
+            assert e.magnet == full_torrent.magnet
+
+    def test_get_special_torrent(self):
+        torrent = self.backend.get_torrent("c2e018a16bf28520687e400580be08934d00373a")
+        assert torrent.name == u'Ubuntu Linux Toolbox - 1000+ Commands for Ubuntu and Debian Power Users~tqw~_darksiderg'
+        assert len(torrent.files) == 3
+        assert torrent.size == float(3376414.72)
+        assert torrent.url == "https://btdigg.org/search?info_hash=c2e018a16bf28520687e400580be08934d00373a"
+        dt = torrent.date
+        assert dt.year == 2011
+        assert dt.month == 2
-- 
1.8.4.2




More information about the weboob mailing list