[weboob] [PATCH 1/1] First implementation of « presseurop » module (europeans news aggregator and translator).

Florent weboob at flo.fourcot.fr
Sat Feb 18 16:23:17 CET 2012


Signed-off-by: Florent <weboob at flo.fourcot.fr>
---
  modules/presseurop/__init__.py       |   24 +++++++++++++++
  modules/presseurop/backend.py        |   53 
++++++++++++++++++++++++++++++++++
  modules/presseurop/browser.py        |   45 ++++++++++++++++++++++++++++
  modules/presseurop/pages/article.py  |   39 +++++++++++++++++++++++++
  modules/presseurop/test.py           |   32 ++++++++++++++++++++
  modules/presseurop/tools.py          |   41 ++++++++++++++++++++++++++
  6 files changed, 234 insertions(+), 0 deletions(-)
  create mode 100644 modules/presseurop/__init__.py
  create mode 100644 modules/presseurop/backend.py
  create mode 100644 modules/presseurop/browser.py
  create mode 100644 modules/presseurop/pages/__init__.py
  create mode 100644 modules/presseurop/pages/article.py
  create mode 100644 modules/presseurop/test.py
  create mode 100644 modules/presseurop/tools.py

diff --git a/modules/presseurop/__init__.py b/modules/presseurop/__init__.py
new file mode 100644
index 0000000..225006b
--- /dev/null
+++ b/modules/presseurop/__init__.py
@@ -0,0 +1,24 @@
+"NewspaperPresseuropBackend init"
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012  Florent Fourcot
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as 
published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from .backend import NewspaperPresseuropBackend
+
+
+__all__ = ['NewspaperPresseuropBackend']
diff --git a/modules/presseurop/backend.py b/modules/presseurop/backend.py
new file mode 100644
index 0000000..816f808
--- /dev/null
+++ b/modules/presseurop/backend.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012  Florent Fourcot
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as 
published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+"backend for http://www.presseurop.eu"
+
+from weboob.capabilities.messages import ICapMessages, Thread
+from weboob.tools.capabilities.messages.GenericBackend import 
GenericNewspaperBackend
+from weboob.tools.backend import BackendConfig
+from weboob.tools.value import Value
+from .browser import NewspaperPresseuropBrowser
+from .tools import rssid
+from weboob.tools.newsfeed import Newsfeed
+
+
+class NewspaperPresseuropBackend(GenericNewspaperBackend, ICapMessages):
+    MAINTAINER = 'Florent Fourcot'
+    EMAIL = 'weboob at flo.fourcot.fr'
+    VERSION = '0.b'
+    LICENSE = 'AGPLv3+'
+    STORAGE = {'seen': {}}
+    NAME = 'presseurop'
+    DESCRIPTION = u'Presseurop website'
+    BROWSER = NewspaperPresseuropBrowser
+    RSSID = rssid
+    CONFIG = BackendConfig(Value('lang', label='Lang of articles',
+                           choices={'fr': 'fr', 'de': 'de', 'en': 'en', 
'cs': 'cs', 'es' : 'es', 'it' : 'it', 'nl' : 'nl', 'pl' : 'pl', 'pt' : 
'pt', 'ro' : 'ro'}, default='fr'))
+
+    def __init__(self, *args, **kwargs):
+        GenericNewspaperBackend.__init__(self, *args, **kwargs)
+        self.RSS_FEED = 'http://www.presseurop.eu/%s/rss.xml' % 
(self.config['lang'].get())
+
+    def iter_threads(self):
+        for article in Newsfeed(self.RSS_FEED, self.RSSID).iter_entries():
+            thread = Thread(article.link)
+            thread.title =  article.title
+            thread.date = article.datetime
+            yield(thread)
+
diff --git a/modules/presseurop/browser.py b/modules/presseurop/browser.py
new file mode 100644
index 0000000..52e94ea
--- /dev/null
+++ b/modules/presseurop/browser.py
@@ -0,0 +1,45 @@
+"browser for presseurop website"
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012  Florent Fourcot
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as 
published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from .pages.article import ArticlePage
+from weboob.tools.browser import BaseBrowser
+
+
+class NewspaperPresseuropBrowser(BaseBrowser):
+    "NewspaperPresseuropBrowser class"
+    PAGES = {
+             "http://www.presseurop.eu/.*": ArticlePage,
+            }
+
+    def is_logged(self):
+        return False
+
+    def login(self):
+        pass
+
+    def fillobj(self, obj, fields):
+        pass
+
+    def get_content(self, _id):
+        "return page article content"
+        print _id
+        self.location(_id)
+        return self.page.get_article(_id)
+
diff --git a/modules/presseurop/pages/__init__.py 
b/modules/presseurop/pages/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modules/presseurop/pages/article.py 
b/modules/presseurop/pages/article.py
new file mode 100644
index 0000000..09c7080
--- /dev/null
+++ b/modules/presseurop/pages/article.py
@@ -0,0 +1,39 @@
+"ArticlePage object for Presseurope"
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Florent Fourcot
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as 
published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from weboob.tools.capabilities.messages.genericArticle import 
GenericNewsPage
+class ArticlePage(GenericNewsPage):
+    "ArticlePage object for presseurop"
+
+    def on_loaded(self):
+        self.main_div = self.document.getroot()
+        self.element_title_selector = "title"
+        self.element_author_selector    = "div.content-author>a"
+        self.element_body_selector      = "div.block"
+
+    def get_body(self):
+        element_body = self.get_element_body()
+
+        return self.parser.tostring(element_body)
+
+    def get_title(self):
+        title = GenericNewsPage.get_title(self)
+        title = title.split('|')[0]
+        return title
diff --git a/modules/presseurop/test.py b/modules/presseurop/test.py
new file mode 100644
index 0000000..f2c397c
--- /dev/null
+++ b/modules/presseurop/test.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012 Florent Fourcot
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as 
published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.tools.test import BackendTest
+
+
+__all__ = ['PresseuropTest']
+
+
+class PresseuropTest(BackendTest):
+    BACKEND = 'presseurop'
+
+    def test_new_messages(self):
+        for message in self.backend.iter_unread_messages():
+            pass
diff --git a/modules/presseurop/tools.py b/modules/presseurop/tools.py
new file mode 100644
index 0000000..745c12d
--- /dev/null
+++ b/modules/presseurop/tools.py
@@ -0,0 +1,41 @@
+"tools for presseurop backend"
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012  Florent Fourcot
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as 
published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+import re
+
+def url2id(url):
+    "return an id from an url"
+    regexp = re.compile(".*/([0-9]+)-.*")
+    id = regexp.match(url).group(1)
+    return id
+
+def rssid(self, entry):
+    return url2id(entry.link)
-- 
1.7.2.5




More information about the weboob mailing list