[weboob] [PATCH 1/1] Fix date translation

Simon Lipp laiquo at hwold.net
Tue Jan 3 16:47:52 CET 2017


This does not works as intended:

	re.sub(u'\bdéc\b', 'december', '31 déc, 21h02')

It is because u'\b' is passed to the regexp engine as '\x08' (which is
NOT the word boundary location match, it just matches the ASCII backspace
non-printable character). The correct way to pass a \b to the regexp
engine is either u'\\b' or ur'\b'.

Signed-off-by: Simon Lipp <laiquo at hwold.net>
---
 weboob/tools/date.py | 118 +++++++++++++++++++++++++--------------------------
 1 file changed, 59 insertions(+), 59 deletions(-)

diff --git a/weboob/tools/date.py b/weboob/tools/date.py
index 4f075d40d..6213911b1 100644
--- a/weboob/tools/date.py
+++ b/weboob/tools/date.py
@@ -230,66 +230,66 @@ class ChaoticDateGuesser(LinearDateGuesser):
             raise ValueError("%s is inferior to min_date %s" % (parsed_date, self.min_date))
 
 
-DATE_TRANSLATE_FR = [(re.compile(u'janvier', re.I),   u'january'),
-                     (re.compile(u'février', re.I),   u'february'),
-                     (re.compile(u'mars', re.I),      u'march'),
-                     (re.compile(u'avril', re.I),     u'april'),
-                     (re.compile(u'mai', re.I),       u'may'),
-                     (re.compile(u'juin', re.I),      u'june'),
-                     (re.compile(u'juillet', re.I),   u'july'),
-                     (re.compile(u'août?', re.I),      u'august'),
-                     (re.compile(u'septembre', re.I), u'september'),
-                     (re.compile(u'octobre', re.I),   u'october'),
-                     (re.compile(u'novembre', re.I),  u'november'),
-                     (re.compile(u'décembre', re.I),  u'december'),
-                     (re.compile(u'jan\.', re.I),   u'january'),
-                     (re.compile(u'janv\.', re.I),   u'january'),
-                     (re.compile(u'\bjan\b', re.I),   u'january'),
-                     (re.compile(u'fév\.', re.I),   u'february'),
-                     (re.compile(u'févr\.', re.I),   u'february'),
-                     (re.compile(u'\bfév\b', re.I),   u'february'),
-                     (re.compile(u'avr\.', re.I),     u'april'),
-                     (re.compile(u'\bavr\b', re.I),     u'april'),
-                     (re.compile(u'juil\.', re.I),   u'july'),
-                     (re.compile(u'juill\.', re.I),   u'july'),
-                     (re.compile(u'\bjuil\b', re.I),   u'july'),
-                     (re.compile(u'sep\.', re.I), u'september'),
-                     (re.compile(u'sept\.', re.I), u'september'),
-                     (re.compile(u'\bsep\b', re.I), u'september'),
-                     (re.compile(u'oct\.', re.I),   u'october'),
-                     (re.compile(u'\boct\b', re.I),   u'october'),
-                     (re.compile(u'nov\.', re.I),  u'november'),
-                     (re.compile(u'\bnov\b', re.I),  u'november'),
-                     (re.compile(u'déc\.', re.I),  u'december'),
-                     (re.compile(u'\bdéc\b', re.I),  u'december'),
-                     (re.compile(u'lundi', re.I),     u'monday'),
-                     (re.compile(u'mardi', re.I),     u'tuesday'),
-                     (re.compile(u'mercredi', re.I),  u'wednesday'),
-                     (re.compile(u'jeudi', re.I),     u'thursday'),
-                     (re.compile(u'vendredi', re.I),  u'friday'),
-                     (re.compile(u'samedi', re.I),    u'saturday'),
-                     (re.compile(u'dimanche', re.I),  u'sunday')]
-
-
-DATE_TRANSLATE_IT = [(re.compile(u'gennaio', re.I),   u'january'),
+DATE_TRANSLATE_FR = [(re.compile(ur'janvier', re.I),   u'january'),
+                     (re.compile(ur'février', re.I),   u'february'),
+                     (re.compile(ur'mars', re.I),      u'march'),
+                     (re.compile(ur'avril', re.I),     u'april'),
+                     (re.compile(ur'mai', re.I),       u'may'),
+                     (re.compile(ur'juin', re.I),      u'june'),
+                     (re.compile(ur'juillet', re.I),   u'july'),
+                     (re.compile(ur'août?', re.I),      u'august'),
+                     (re.compile(ur'septembre', re.I), u'september'),
+                     (re.compile(ur'octobre', re.I),   u'october'),
+                     (re.compile(ur'novembre', re.I),  u'november'),
+                     (re.compile(ur'décembre', re.I),  u'december'),
+                     (re.compile(ur'jan\.', re.I),   u'january'),
+                     (re.compile(ur'janv\.', re.I),   u'january'),
+                     (re.compile(ur'\bjan\b', re.I),   u'january'),
+                     (re.compile(ur'fév\.', re.I),   u'february'),
+                     (re.compile(ur'févr\.', re.I),   u'february'),
+                     (re.compile(ur'\bfév\b', re.I),   u'february'),
+                     (re.compile(ur'avr\.', re.I),     u'april'),
+                     (re.compile(ur'\bavr\b', re.I),     u'april'),
+                     (re.compile(ur'juil\.', re.I),   u'july'),
+                     (re.compile(ur'juill\.', re.I),   u'july'),
+                     (re.compile(ur'\bjuil\b', re.I),   u'july'),
+                     (re.compile(ur'sep\.', re.I), u'september'),
+                     (re.compile(ur'sept\.', re.I), u'september'),
+                     (re.compile(ur'\bsep\b', re.I), u'september'),
+                     (re.compile(ur'oct\.', re.I),   u'october'),
+                     (re.compile(ur'\boct\b', re.I),   u'october'),
+                     (re.compile(ur'nov\.', re.I),  u'november'),
+                     (re.compile(ur'\bnov\b', re.I),  u'november'),
+                     (re.compile(ur'déc\.', re.I),  u'december'),
+                     (re.compile(ur'\bdéc\b', re.I),  u'december'),
+                     (re.compile(ur'lundi', re.I),     u'monday'),
+                     (re.compile(ur'mardi', re.I),     u'tuesday'),
+                     (re.compile(ur'mercredi', re.I),  u'wednesday'),
+                     (re.compile(ur'jeudi', re.I),     u'thursday'),
+                     (re.compile(ur'vendredi', re.I),  u'friday'),
+                     (re.compile(ur'samedi', re.I),    u'saturday'),
+                     (re.compile(ur'dimanche', re.I),  u'sunday')]
+
+
+DATE_TRANSLATE_IT = [(re.compile(ur'gennaio', re.I),   u'january'),
                      (re.compile(u'febbraio', re.I),   u'february'),
-                     (re.compile(u'marzo', re.I),      u'march'),
-                     (re.compile(u'aprile', re.I),     u'april'),
-                     (re.compile(u'maggio', re.I),       u'may'),
-                     (re.compile(u'giugno', re.I),      u'june'),
-                     (re.compile(u'luglio', re.I),   u'july'),
-                     (re.compile(u'agosto', re.I),      u'august'),
-                     (re.compile(u'settembre', re.I), u'september'),
-                     (re.compile(u'ottobre', re.I),   u'october'),
-                     (re.compile(u'novembre', re.I),  u'november'),
-                     (re.compile(u'dicembre', re.I),  u'december'),
-                     (re.compile(u'luned[iì]', re.I),     u'monday'),
-                     (re.compile(u'marted[iì]', re.I),     u'tuesday'),
-                     (re.compile(u'mercoled[iì]', re.I),  u'wednesday'),
-                     (re.compile(u'gioved[iì]', re.I),     u'thursday'),
-                     (re.compile(u'venerd[iì]', re.I),  u'friday'),
-                     (re.compile(u'sabato', re.I),    u'saturday'),
-                     (re.compile(u'domenica', re.I),  u'sunday')]
+                     (re.compile(ur'marzo', re.I),      u'march'),
+                     (re.compile(ur'aprile', re.I),     u'april'),
+                     (re.compile(ur'maggio', re.I),       u'may'),
+                     (re.compile(ur'giugno', re.I),      u'june'),
+                     (re.compile(ur'luglio', re.I),   u'july'),
+                     (re.compile(ur'agosto', re.I),      u'august'),
+                     (re.compile(ur'settembre', re.I), u'september'),
+                     (re.compile(ur'ottobre', re.I),   u'october'),
+                     (re.compile(ur'novembre', re.I),  u'november'),
+                     (re.compile(ur'dicembre', re.I),  u'december'),
+                     (re.compile(ur'luned[iì]', re.I),     u'monday'),
+                     (re.compile(ur'marted[iì]', re.I),     u'tuesday'),
+                     (re.compile(ur'mercoled[iì]', re.I),  u'wednesday'),
+                     (re.compile(ur'gioved[iì]', re.I),     u'thursday'),
+                     (re.compile(ur'venerd[iì]', re.I),  u'friday'),
+                     (re.compile(ur'sabato', re.I),    u'saturday'),
+                     (re.compile(ur'domenica', re.I),  u'sunday')]
 
 
 def parse_french_date(date, **kwargs):
-- 
2.11.0




More information about the weboob mailing list