[weboob] [PATCH 1/1] new module openedx

Simon Lipp laiquo at hwold.net
Sat Feb 13 11:11:53 CET 2016


I would appreciate an advice on how to do properly what I did in an ugly 
way in browser.py:62

(basically, how to add a specific header (X-Requested-With) only for two 
specific URLs (browser.threads and brower.messages)

On 13/02/2016 11:08, Simon Lipp wrote:
> Signed-off-by: Simon Lipp <laiquo at hwold.net>
> ---
>   modules/openedx/__init__.py |  24 +++++++
>   modules/openedx/browser.py  |  77 +++++++++++++++++++++
>   modules/openedx/module.py   | 164 ++++++++++++++++++++++++++++++++++++++++++++
>   modules/openedx/test.py     |  37 ++++++++++
>   4 files changed, 302 insertions(+)
>   create mode 100644 modules/openedx/__init__.py
>   create mode 100644 modules/openedx/browser.py
>   create mode 100644 modules/openedx/module.py
>   create mode 100644 modules/openedx/test.py
>
> diff --git a/modules/openedx/__init__.py b/modules/openedx/__init__.py
> new file mode 100644
> index 0000000..a254697
> --- /dev/null
> +++ b/modules/openedx/__init__.py
> @@ -0,0 +1,24 @@
> +# -*- coding: utf-8 -*-
> +
> +# Copyright(C) 2016      Simon Lipp
> +#
> +# This file is part of weboob.
> +#
> +# weboob is free software: you can redistribute it and/or modify
> +# it under the terms of the GNU Affero General Public License as published by
> +# the Free Software Foundation, either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# weboob is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU Affero General Public License for more details.
> +#
> +# You should have received a copy of the GNU Affero General Public License
> +# along with weboob. If not, see <http://www.gnu.org/licenses/>.
> +
> +
> +from .module import OpenEDXModule
> +
> +
> +__all__ = ['OpenEDXModule']
> diff --git a/modules/openedx/browser.py b/modules/openedx/browser.py
> new file mode 100644
> index 0000000..2fb5bd2
> --- /dev/null
> +++ b/modules/openedx/browser.py
> @@ -0,0 +1,77 @@
> +# -*- coding: utf-8 -*-
> +
> +# Copyright(C) 2016      Simon Lipp
> +#
> +# This file is part of weboob.
> +#
> +# weboob is free software: you can redistribute it and/or modify
> +# it under the terms of the GNU Affero General Public License as published by
> +# the Free Software Foundation, either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# weboob is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU Affero General Public License for more details.
> +#
> +# You should have received a copy of the GNU Affero General Public License
> +# along with weboob. If not, see <http://www.gnu.org/licenses/>.
> +
> +from requests import Request
> +
> +from weboob.browser import LoginBrowser, URL, need_login
> +from weboob.browser.pages import RawPage, JsonPage, HTMLPage
> +from weboob.browser.exceptions import ClientError
> +from weboob.exceptions import BrowserIncorrectPassword
> +
> +class LoginPage(HTMLPage):
> +    def login(self, username, password):
> +        params = {
> +            "email": username,
> +            "password": password,
> +            "remember": "false"
> +        }
> +        req = Request("post", "/user_api/v1/account/login_session/", data = params)
> +        req.headers.setdefault("Referer", self.url)
> +        try:
> +            self.browser.open(req)
> +        except ClientError as e:
> +            if e.response.status_code == 403:
> +                raise BrowserIncorrectPassword()
> +            else:
> +                raise
> +
> +        self.logged = True
> +
> +class OpenEDXBrowser(LoginBrowser):
> +    login = URL('/login', LoginPage)
> +    login_result = URL("/user_api/v1/account/login_session/", RawPage)
> +    threads = URL(r'/courses/(?P<course>.+)/discussion/forum\?ajax=1&page=(?P<page>\d+)&sort_key=date&sort_order=desc', JsonPage)
> +    messages = URL(r'/courses/(?P<course>.+)/discussion/forum/(?P<topic>.+)/threads/(?P<id>.+)\?ajax=1&resp_skip=(?P<skip>\d+)&resp_limit=100', JsonPage)
> +    thread = URL(r'/courses/(?P<course>.+)/discussion/forum/(?P<topic>.+)/threads/(?P<id>.+)', HTMLPage)
> +
> +    def __init__(self, url, course, *args, **kwargs):
> +        self.BASEURL = url
> +        self.course = course
> +        LoginBrowser.__init__(self, *args, **kwargs)
> +
> +    def prepare_request(self, req):
> +        token = self.session.cookies.get("csrftoken")
> +        if token:
> +            req.headers.setdefault("X-CSRFToken", token)
> +        if 'ajax=1' in req.url: ## TODO: how to *cleanly* bind an header to a page ?
> +            req.headers.setdefault("X-Requested-With", "XMLHttpRequest")
> +        return LoginBrowser.prepare_request(self, req)
> +
> +    def do_login(self):
> +        self.login.stay_or_go()
> +        self.page.login(self.username, self.password)
> +
> +    @need_login
> +    def get_threads(self, page=1):
> +        return self.threads.open(course = self.course, page = page)
> +
> +    @need_login
> +    def get_thread(self, topic, id, skip):
> +        return self.messages.open(course = self.course,
> +                topic = topic, id = id, skip = skip)
> diff --git a/modules/openedx/module.py b/modules/openedx/module.py
> new file mode 100644
> index 0000000..79ec8a4
> --- /dev/null
> +++ b/modules/openedx/module.py
> @@ -0,0 +1,164 @@
> +# -*- coding: utf-8 -*-
> +
> +# Copyright(C) 2016      Simon Lipp
> +#
> +# This file is part of weboob.
> +#
> +# weboob is free software: you can redistribute it and/or modify
> +# it under the terms of the GNU Affero General Public License as published by
> +# the Free Software Foundation, either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# weboob is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU Affero General Public License for more details.
> +#
> +# You should have received a copy of the GNU Affero General Public License
> +# along with weboob. If not, see <http://www.gnu.org/licenses/>.
> +
> +import dateutil.parser
> +
> +from subprocess import Popen, PIPE
> +
> +from weboob.tools.backend import Module, BackendConfig
> +from weboob.tools.value import Value, ValueBool, ValueBackendPassword
> +from weboob.capabilities.messages import CapMessages, Thread, Message
> +from weboob.capabilities.base import StringField
> +
> +from .browser import OpenEDXBrowser
> +
> +__all__ = ['OpenEDXModule']
> +
> +class OpenEDXModule(Module, CapMessages):
> +    NAME = 'openedx'
> +    DESCRIPTION = u'Discussions on OpenEDX-powered coursewares'
> +    MAINTAINER = u'Simon Lipp'
> +    EMAIL = 'laiquo at hwold.net'
> +    LICENSE = 'AGPLv3+'
> +    VERSION = '1.2'
> +
> +    CONFIG = BackendConfig(Value('username',                label='Username', default=''),
> +                           ValueBackendPassword('password', label='Password', default=''),
> +                           Value('url',                     label='Site URL', default='https://courses.edx.org/'),
> +                           Value('course',                  label='Course ID', default='edX/DemoX.1/2014'))
> +
> +    BROWSER = OpenEDXBrowser
> +
> +    STORAGE = {'seen': {}}
> +
> +    def __init__(self, *args, **kwargs):
> +        Module.__init__(self, *args, **kwargs)
> +
> +        def pandoc_formatter(text):
> +            return Popen(["pandoc", "-f", "markdown", "-t", "html", "--mathml", "-"],
> +                    stdin=PIPE, stdout=PIPE).communicate(text.encode('utf-8'))[0].decode('utf-8')
> +
> +        try:
> +            from markdown import Markdown
> +        except ImportError:
> +            Markdown = None
> +
> +        self.default_flags = Message.IS_HTML
> +        try:
> +            Popen(["pandoc", "-v"], stdout=PIPE, stderr=PIPE).communicate()
> +            self.formatter = pandoc_formatter
> +        except OSError:
> +            if Markdown:
> +                self.formatter = Markdown().convert
> +            else:
> +                self.formatter = (lambda text: text)
> +                self.default_flags = 0
> +
> +    def create_default_browser(self):
> +        return self.create_browser(self.config['url'].get(), self.config['course'].get(),
> +                self.config['username'].get(), self.config['password'].get())
> +
> +    def _build_thread(self, data):
> +        thread = Thread("%s.%s" % (data["commentable_id"], data["id"]))
> +        thread.title = data["title"]
> +        thread.date = dateutil.parser.parse(data["created_at"])
> +        thread.url = self.browser.thread.build(course=self.browser.course, topic=data["commentable_id"], id=data["id"])
> +        thread.root = self._build_message(data, thread)
> +        thread._messages_count = data["comments_count"] + 1
> +        return thread
> +
> +    def _build_message(self, data, thread, parent = None):
> +        flags = self.default_flags
> +        if data["id"] not in self.storage.get("seen", thread.id, default=[]):
> +            flags |= Message.IS_UNREAD
> +
> +        message = Message(thread = thread,
> +                id = data["id"],
> +                title = None,
> +                sender = data.get("username"),
> +                receivers = None,
> +                date = dateutil.parser.parse(data["created_at"]),
> +                content = self.formatter(data["body"]),
> +                flags = flags,
> +                parent = parent,
> +                url = thread.url)
> +        self._append_children(data, message, thread)
> +        return message
> +
> +    def _append_children(self, data, message, thread):
> +        if "endorsed_responses" in data or "children" in data or "non_endorsed_responses" in data:
> +            message.children = []
> +            for child in data.get("endorsed_responses", []) + data.get("children", []) + data.get('non_endorsed_responses', []):
> +                message.children.append(self._build_message(child, thread, message))
> +
> +    def fill_message(self, message, fields):
> +        # The only unfilled messages are the root messages of threads returned
> +        # by iter_threads(). Only `children` in unfilled.
> +
> +        if 'children' in fields and message.thread.root.id == message.id:
> +            message.children = self.get_thread(thread.id).root.children
> +
> +        return message
> +
> +    #### CapMessages ##############################################
> +
> +    def get_thread(self, id):
> +        topic, id = id.rsplit(".", 1)
> +        thread = None
> +        skip = 0
> +
> +        while True:
> +            data = self.browser.get_thread(topic, id, skip).doc["content"]
> +            if thread is None:
> +                thread = self._build_thread(data)
> +            else:
> +                self._append_children(data, thread.root, thread)
> +
> +            if data["resp_skip"] + data["resp_limit"] >= data["resp_total"]:
> +                return thread
> +            else:
> +                skip += 100
> +
> +    def iter_threads(self):
> +        page = 1
> +        while True:
> +            tlist = self.browser.get_threads(page).doc
> +            for data in tlist["discussion_data"]:
> +                yield self._build_thread(data)
> +
> +            if tlist["page"] < tlist["num_pages"]:
> +                page += 1
> +            else:
> +                break
> +
> +    def iter_unread_messages(self):
> +        for thread in self.iter_threads():
> +            if thread._messages_count > len(self.storage.get('seen', thread.id, default=[])):
> +                thread = self.get_thread(thread.id)
> +                for m in thread.iter_all_messages():
> +                    if m.flags & m.IS_UNREAD:
> +                        yield m
> +
> +    def set_message_read(self, message):
> +        thread_seen = self.storage.get('seen', message.thread.id, default=[])
> +        thread_seen.append(message.id)
> +        self.storage.set('seen', message.thread.id, thread_seen)
> +        self.storage.save()
> +
> +    OBJECTS = {Message: fill_message}
> diff --git a/modules/openedx/test.py b/modules/openedx/test.py
> new file mode 100644
> index 0000000..15e8790
> --- /dev/null
> +++ b/modules/openedx/test.py
> @@ -0,0 +1,37 @@
> +# -*- coding: utf-8 -*-
> +
> +# Copyright(C) 2016      Simon Lipp
> +#
> +# This file is part of weboob.
> +#
> +# weboob is free software: you can redistribute it and/or modify
> +# it under the terms of the GNU Affero General Public License as published by
> +# the Free Software Foundation, either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# weboob is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU Affero General Public License for more details.
> +#
> +# You should have received a copy of the GNU Affero General Public License
> +# along with weboob. If not, see <http://www.gnu.org/licenses/>.
> +
> +
> +from weboob.tools.test import BackendTest
> +
> +
> +class OpenEDXTest(BackendTest):
> +    MODULE = 'openedx'
> +
> +    def test_openedx(self):
> +        thread = next(self.backend.iter_threads())
> +        thread = self.backend.get_thread(thread.id)
> +        self.assertTrue(thread.id)
> +        self.assertTrue(thread.title)
> +        self.assertTrue(thread.url)
> +        self.assertTrue(thread.root.id)
> +        self.assertTrue(thread.root.content)
> +        self.assertTrue(thread.root.children)
> +        self.assertTrue(thread.root.url)
> +        self.assertTrue(thread.root.date)



More information about the weboob mailing list