from core import DataMigrator, EnumMapper
from _mysql_exceptions import IntegrityError
from MySQLdb.constants.ER import DUP_ENTRY
+import re
class ItemsCopyMigrator(DataMigrator):
"""Perform a preliminary raw copy of the item table"""
def process(self):
EnumMapper.process(self, 'Phono', 'Cote_Phono', 'media_items', self.map)
+class ItemsDateConverter(DataMigrator):
+ """Convert items recording dates"""
+
+ implements(IDataMigrator)
+
+ def get_name(self):
+ return "items:date"
+
+ def process(self):
+
+ names = {
+ 1: ur'jan(vier|v\.?|\.?)',
+ 2: ur'f[eé]v(rier|r\.?|\.?)',
+ 3: ur'mar(s|\.?)',
+ 4: ur'avr(il|\.?)',
+ 5: ur'mai',
+ 6: ur'juin',
+ 7: ur'juil(let|\.?)',
+ 8: ur'a[ôo][ûu]t',
+ 9: ur'sep(tembre|t\.?|\.?)',
+ 10: ur'oct(obre|\.?)',
+ 11: ur'd[ée]c(embre|\.?)',
+ 12: ur'nov(embre|\.?)'
+ }
+
+ namepattern = u'|'.join([names[i] for i in names])
+
+ patterns = [
+ # month only
+ ur'^ *(?P<name>' + namepattern + ') *$',
+
+ # day and month, no year
+ ur'^ *(?P<day>[0-9]{1,2}) *(?P<name>' + namepattern + ') *$',
+
+ # from and until month
+ ur'^ *(?P<name>' + namepattern + ') *[,-] *(?P<until_name>' + namepattern + ') *$',
+
+ # from year only
+ ur'^ *(été|) *(?P<year>[0-9]{2,4})[ ?]*$',
+
+ # from and until year
+ ur'^ *(?P<year>[0-9]{4}) *[/,-] *(?P<until_year>[0-9]{2,4}) *$',
+
+ # month and year
+ ur'^ *(?P<month>[0-9]{1,2}) *[_./-] *(?P<year>[0-9]{2,4}) *$',
+ ur'^ *(?P<name>' + namepattern + ') *\.? *(?P<year>[0-9]{2,4}) *-'
+ ' *(?P<until_name>' + namepattern + ') *\.? *(?P<until_year>[0-9]{2,4}) *$',
+ ur'^ *(début|mi|fin|) *(?P<name>' + namepattern + ') *(?P<year>[0-9]{2,4})[ ?]*$',
+
+ # from month, until month, and year
+ ur'^ *(?P<month>[0-9]{1,2}) *[aà] *(?P<until_month>[0-9]{1,2}) *- *(?P<year>[0-9]{4}) *$',
+ ur'^ *(?P<name>' + namepattern + ') *[,/-] *(?P<until_name>' + namepattern + ') *[./]? *(?P<year>[0-9]{2,4}) *$',
+
+ # from month+year, until month+year
+ ur'^ *(?P<month>[0-9]{1,2}) *[_./-] *(?P<year>[0-9]{2,4}) *; *(?P<until_month>[0-9]{1,2}) *[_./-] *(?P<until_year>[0-9]{2,4}) *$',
+ ur'^ *(?P<month>[0-9]{1,2}) */ *(?P<year>[0-9]{2,4}) *- *(?P<until_month>[0-9]{1,2}) */ *(?P<until_year>[0-9]{2,4}) *$',
+
+ # day, month and year
+ ur'^ *(?P<day>[0-9]{1,2}) *[_./-] *0?(?P<month>[0-9]{1,2}) *[_./-] *(?P<year>[0-9]{1,4}) *$',
+ ur'^ *(?P<year>[0-9]{4}) *[_./-] *(?P<month>[0-9]{2}) *[_./-] *(?P<day>[0-9]{2}) *$',
+ ur'^ *(?P<day>[0-9]{1,2}) *(?P<name>' + namepattern + ') *(?P<year>[0-9]{2,4})[? ]*$',
+
+ # from+until day, single month and year
+ ur'^ *(?P<day>[0-9]{1,2})[ &+,-]+(?P<until_day>[0-9]{1,2}) *[ _./-] *(?P<month>[0-9]{1,2}) *[ _./-] *(?P<year>[0-9]{2,4}) *$',
+ ur'^ *(?P<day>[0-9]{1,2}) *(-|/|et|au) *(?P<until_day>[0-9]{1,2}) *(?P<name>' + namepattern + ') *(?P<year>[0-9]{2,4})[? ]*$',
+
+ ]
+
+ self.src_cursor.execute("SELECT COUNT(*) FROM Phono")
+ self.stats = {
+ 'total' : self.src_cursor.fetchone()[0],
+ 'matched' : 0,
+ 'empty' : 0,
+ 'unsignificant' : 0,
+ 'unparsed' : 0
+ }
+
+ self.src_cursor.execute("SELECT COUNT(*) FROM Phono WHERE Dates_Enregistr REGEXP '^ *$'")
+ self.stats['empty'] = self.src_cursor.fetchone()[0]
+
+ self.src_cursor.execute("SELECT Dates_Enregistr FROM Phono WHERE Dates_Enregistr NOT REGEXP '^ *$'")
+ while True:
+ row = self.src_cursor.fetchone()
+ if not row:
+ break
+
+ recognized = False
+ if re.match('^ *(nn?|=|id|idem|\?+|[-_]?1|[0 ]*) *$', row[0], re.IGNORECASE):
+ self.stats['unsignificant'] += 1
+ recognized = True
+ else:
+ for p in patterns:
+ if re.match(p, row[0], re.IGNORECASE):
+ self.stats['matched'] += 1
+ recognized = True
+ break
+
+ if not recognized:
+ #print '|%s|' % row[0]
+ self.stats['unparsed'] += 1
+
+
+
+
+
+