[weboob] [PATCH 1/1] cragr: CA Centre website has changed (18_04_2012) - update accordingly

Richard Genoud richard.genoud at gmail.com
Thu Apr 19 16:25:05 CEST 2012


Now, the website on account page is like that :
<div id="accountListSection">
<div class="headline" style="background-color:#ffffff; border-bottom:1px solid black; text-align:center;">
  <br />
  <a href="/accounting/showAccountDetail?accountIndex=0">
    <b>
      <span style=" color:#222222;"> CCHQ </span>
    </b>
  </a>
  <br /> MONSIEUR XXXX XXXX <br />
  <span style="color:#7d7d7d;"> 12345678910 </span>
  <br />
  <img alt="separator" src="/images/img/evol/img_accounts _separator.gif/297/99/GIF" class="&#xA;inlineImg&#xA; " />
  <br />
  <b>
    <big> 999 999,99 €</big>
  </b>
  <br />
  <br />
  <div class="clr">
  </div>
</div>

And the history page is :
<div class="dv" style="background-color:#E2E9E9;">
    <span style="color:#046366;">
      COMPTE N° 12345678910 </span>
    <span style="color:#13484a;">
      au 19/04/12 </span>
    <div class="clr">
    </div>
</div>

Signed-off-by: Richard Genoud <richard.genoud at gmail.com>
---
 modules/cragr/pages/accounts_list.py |   18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/modules/cragr/pages/accounts_list.py b/modules/cragr/pages/accounts_list.py
index b9b3dc6..b5794d1 100644
--- a/modules/cragr/pages/accounts_list.py
+++ b/modules/cragr/pages/accounts_list.py
@@ -46,12 +46,15 @@ class AccountsList(CragrBasePage):
         l = []
 
         for div in self.document.getiterator('div'):
-            if div.attrib.get('class', '') == 'dv' and div.getchildren()[0].tag in ('a', 'br'):
+            if div.attrib.get('class', '') in ('dv', 'headline') and div.getchildren()[0].tag in ('a', 'br'):
                 account = Account()
                 account._link_id = None
                 if div.getchildren()[0].tag == 'a':
                     # This is at least present on CA Nord-Est
                     # Note: we do not know yet how history-less accounts are displayed by this layout
+                    if len(div.getchildren()[0].get('href')) < 2 :
+                        # CA centre has a href="/" link, not interesting there
+                        continue
                     account.label = ' '.join(div.find('a').text.split()[:-1])
                     account._link_id = div.find('a').get('href', '')
                     account.id = div.find('a').text.split()[-1]
@@ -59,16 +62,21 @@ class AccountsList(CragrBasePage):
                 else:
                     # This is at least present on CA Toulouse
                     first_link = div.find('a')
+                    account.id = div.findall('br')[1].tail.strip()
                     if first_link is not None:
                         account.label   = first_link.text.strip()
                         account._link_id = first_link.get('href', '')
-                        s = div.find('div').find('b').text
+                        s_node = div.find('div').find('b')
+                        if s_node is None:
+                            # This is present on CA Centre
+                            s_node = div.findall('b')[0].find('big')
+                            account.id = div.find('span').text.strip()
+                        s = s_node.text
                     else:
                         # there is no link to any history page for accounts like "PEA" or "TITRES"
                         account.label   = div.findall('br')[0].tail.strip()
                         account._link_id = None
                         s = div.xpath('following-sibling::div//b')[0].text
-                    account.id = div.findall('br')[1].tail.strip()
                 account.balance = clean_amount(s)
                 if account.label:
                     l.append(account)
@@ -92,7 +100,7 @@ class AccountsList(CragrBasePage):
         title_spans = self.document.xpath('/html/body//div[@class="dv"]/span')
         for title_span in title_spans:
             title_text = title_span.text_content().strip().replace("\n", '')
-            if (re.match('.*Compte.*n.*[0-9]+.*au.*', title_text)):
+            if (re.match('.*Compte.*n.*[0-9]+', title_text, flags=re.IGNORECASE)):
                 return True
         return False
 
@@ -136,7 +144,7 @@ class AccountsList(CragrBasePage):
             or False if the link is not present.
         """
         # tested on CA centre france
-        a = self.document.xpath('/html/body//div[@class="navlink"]//a[contains(text(), "Voir les 25 suivants")]')
+        a = self.document.xpath('/html/body//div[@class="headline"]//a[contains(text(), "Voir les 25 suivants")]')
         if not a:
             return False
         else:
-- 
1.7.10



More information about the weboob mailing list