--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# CREM Database migrator
+
+# Copyright (C) 2009 Samalyse SARL
+# Author: Olivier Guilyardi <olivier samalyse com>
+
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from telemeta.core import *
+from api import IDataMigrator
+from core import DataMigrator
+import re
+from datetime import date, datetime
+
+class CollectionsYearConverter(DataMigrator):
+ """Perform a preliminary conversion of the collections recording years"""
+
+ implements(IDataMigrator)
+
+ def get_name(self):
+ return "dates:collections"
+
+ def process(self):
+
+ self.stats = {
+ 'total': 0,
+ 'unsignificant': 0,
+ 'unparsed': 0,
+ 'migrated': 0,
+ 'empty': 0,
+ 'incoherent': 0
+ }
+
+ self.src_cursor.execute("SELECT Cote, Annee_Enr FROM Support")
+
+ while True:
+ row = self.src_cursor.fetchone()
+ if not row:
+ break
+
+ old_code = row[0]
+ year_str = row[1]
+
+ from_year = 0
+ to_year = 0
+
+ self.stats['total'] += 1
+ if year_str == '':
+ self.stats['empty'] += 1
+ elif re.match('^[+-]?[Nn12?]$', year_str):
+ self.stats['unsignificant'] += 1
+ else:
+ match = re.match(r'^<?([0-9]{2,4})[;/,>-]\'?([0-9]{2,4})[\';/,>?-]?$', year_str)
+ if not match:
+ match = re.match(r'^([0-9]{2,4})[;/,>?-]?$', year_str)
+
+ if match:
+ from_year = self.parse_year(int(match.group(1)))
+ if match.lastindex > 1:
+ to_year = self.parse_year(int(match.group(2)))
+ else:
+ to_year = from_year
+
+ if from_year or to_year and (not to_year or to_year >= from_year):
+ self.stats['migrated'] += 1
+ else:
+ self.stats['incoherent'] += 1
+
+ else:
+ self.stats['unparsed'] += 1
+
+ self.target_cursor.execute("UPDATE media_collections SET recorded_from_year = %s, "
+ "recorded_to_year = %s WHERE old_code = %s", (from_year, to_year, old_code))
+
+class ItemsDateConverter(DataMigrator):
+ """Convert items recording dates"""
+
+ implements(IDataMigrator)
+
+ def get_name(self):
+ return "dates:items"
+
+ def parse_month(self, str, patterns):
+ for i in patterns:
+ if re.match('^' + patterns[i] + '$', str, re.IGNORECASE):
+ return i
+ return 0
+
+ def last_day_of_month(self, month, year):
+ if month == 2:
+ if year % 4 == 0:
+ return 29
+ else:
+ return 28
+ elif month < 8:
+ if month % 2 == 1:
+ return 31
+ else:
+ return 30
+ else:
+ if month % 2 == 0:
+ return 31
+ else:
+ return 30
+
+ def parse_date_data(self, data, default_year, month_names):
+
+ if data.has_key('year') and int(data['year']):
+ year = self.parse_year(int(data['year']))
+ if data.has_key('until_year'):
+ until_year = self.parse_year(int(data['until_year']))
+ else:
+ until_year = year
+ else:
+ year = default_year
+ until_year = year
+
+ if data.has_key('name'):
+ data['month'] = self.parse_month(data['name'], month_names)
+
+ if data.has_key('until_name'):
+ data['until_month'] = self.parse_month(data['until_name'], month_names)
+
+ if data.has_key('month') and int(data['month']):
+ month = int(data['month'])
+ if data.has_key('until_month'):
+ until_month = int(data['until_month'])
+ else:
+ until_month = month
+ else:
+ month = 1
+ until_month = 12
+
+ if data.has_key('day') and int(data['day']):
+ day = int(data['day'])
+ if data.has_key('until_day'):
+ until_day = int(data['until_day'])
+ else:
+ until_day = day
+ else:
+ day = 1
+ until_day = self.last_day_of_month(until_month, until_year)
+
+ try:
+ from_date = date(year, month, day)
+ try:
+ until_date = date(until_year, until_month, until_day)
+ if until_date >= from_date:
+ return (from_date, until_date)
+ else:
+ return (from_date, from_date)
+ except ValueError:
+ return (from_date, from_date)
+ except ValueError:
+ return None
+
+ def process(self):
+
+ names = {
+ 1: ur'jan(vier|v\.?|\.?)',
+ 2: ur'f[eé]v(rier|r\.?|\.?)',
+ 3: ur'mar(s|\.?)',
+ 4: ur'avr(il|\.?)',
+ 5: ur'mai',
+ 6: ur'juin',
+ 7: ur'juil(let|\.?)',
+ 8: ur'a[ôo][ûu]t',
+ 9: ur'sep(tembre|t\.?|\.?)',
+ 10: ur'oct(obre|\.?)',
+ 11: ur'd[ée]c(embre|\.?)',
+ 12: ur'nov(embre|\.?)'
+ }
+
+ namepattern = u'|'.join([names[i] for i in names])
+
+ patterns = [
+ # month only
+ ur'^ *(?P<name>' + namepattern + ') *$',
+
+ # day and month, no year
+ ur'^ *(?P<day>[0-9]{1,2}) *(?P<name>' + namepattern + ') *$',
+
+ # from and until month
+ ur'^ *(?P<name>' + namepattern + ') *[,-] *(?P<until_name>' + namepattern + ') *$',
+
+ # from year only
+ ur'^ *(été|) *(?P<year>[0-9]{2,4})[ ?]*$',
+
+ # from and until year
+ ur'^ *(?P<year>[0-9]{4}) *[/,-] *(?P<until_year>[0-9]{2,4}) *$',
+
+ # month and year
+ ur'^ *(?P<month>[0-9]{1,2}) *[_./-] *(?P<year>[0-9]{2,4}) *$',
+ ur'^ *(?P<name>' + namepattern + ') *\.? *(?P<year>[0-9]{2,4}) *-'
+ ' *(?P<until_name>' + namepattern + ') *\.? *(?P<until_year>[0-9]{2,4}) *$',
+ ur'^ *(début|mi|fin|) *(?P<name>' + namepattern + ') *(?P<year>[0-9]{2,4})[ ?]*$',
+
+ # from month, until month, and year
+ ur'^ *(?P<month>[0-9]{1,2}) *[aà] *(?P<until_month>[0-9]{1,2}) *- *(?P<year>[0-9]{4}) *$',
+ ur'^ *(?P<name>' + namepattern + ') *[,/-] *(?P<until_name>' + namepattern + ') *[./]? *(?P<year>[0-9]{2,4}) *$',
+
+ # from month+year, until month+year
+ ur'^ *(?P<month>[0-9]{1,2}) *[_./-] *(?P<year>[0-9]{2,4}) *; *(?P<until_month>[0-9]{1,2}) *[_./-] *(?P<until_year>[0-9]{2,4}) *$',
+ ur'^ *(?P<month>[0-9]{1,2}) */ *(?P<year>[0-9]{2,4}) *- *(?P<until_month>[0-9]{1,2}) */ *(?P<until_year>[0-9]{2,4}) *$',
+
+ # day, month and year
+ ur'^ *(?P<day>[0-9]{1,2}) *[_./-] *0?(?P<month>[0-9]{1,2}) *[_./-] *(?P<year>[0-9]{1,4}) *$',
+ ur'^ *(?P<year>[0-9]{4}) *[_./-] *(?P<month>[0-9]{2}) *[_./-] *(?P<day>[0-9]{2}) *$',
+ ur'^ *(?P<day>[0-9]{1,2}) *(?P<name>' + namepattern + ') *(?P<year>[0-9]{2,4})[? ]*$',
+
+ # from+until day, single month and year
+ ur'^ *(?P<day>[0-9]{1,2})[ &+,-]+(?P<until_day>[0-9]{1,2}) *[ _./-] *(?P<month>[0-9]{1,2}) *[ _./-] *(?P<year>[0-9]{2,4}) *$',
+ ur'^ *(?P<day>[0-9]{1,2}) *(-|/|et|au) *(?P<until_day>[0-9]{1,2}) *(?P<name>' + namepattern + ') *(?P<year>[0-9]{2,4})[? ]*$',
+
+ ]
+
+ self.src_cursor.execute("SELECT COUNT(*) FROM Phono")
+ self.stats = {
+ 'total' : self.src_cursor.fetchone()[0],
+ 'matched' : 0,
+ 'converted' : 0,
+ 'unsignificant' : 0,
+ 'unparsed' : 0,
+ 'incoherent' : 0,
+ 'nosuchitem' : 0
+ }
+
+ self.target_cursor.execute("UPDATE media_items SET recorded_from_date = 0, recorded_to_date = 0")
+
+ self.src_cursor.execute("SELECT Cote_Phono, Annee_Enreg, Dates_Enregistr FROM Phono")
+
+ i = 0
+ while True:
+ row = self.src_cursor.fetchone()
+ if not row:
+ break
+
+ old_code = row[0]
+ year_str = row[1]
+ default_year = self.parse_year(int(year_str))
+ date_str = row[2]
+
+ data = {}
+ if re.match(r'^ *(nn?|=|id|idem|\?+|[-_]?1|[0 /]*) *$', date_str, re.IGNORECASE):
+ self.stats['unsignificant'] += 1
+ else:
+ for p in patterns:
+ match = re.match(p, date_str, re.IGNORECASE)
+ if match:
+ self.stats['matched'] += 1
+ data = match.groupdict()
+ break
+
+ if data or default_year:
+ dates = self.parse_date_data(data, default_year, names)
+ if dates:
+ self.target_cursor.execute('UPDATE media_items SET recorded_from_date = %s, recorded_to_date = %s '
+ 'WHERE old_code = %s',
+ (dates[0].strftime('%Y-%m-%d'), dates[1].strftime('%Y-%m-%d'), old_code))
+ if self.target_cursor.rowcount:
+ self.stats['converted'] += 1
+ else:
+ self.stats['nosuchitem'] += 1
+ else:
+ print "incoherent date (%s): %s -- %s" % (old_code, str(data), str(row))
+ self.stats['incoherent'] += 1
+
+ else:
+ #print '|%s|' % row[0]
+ self.stats['unparsed'] += 1
+
+ i += 1
+ if i % 1000 == 0:
+ self.step()
+
+class DateRangeSynchronizer(DataMigrator):
+ "Ensures collections recording years contain items recording dates"
+
+ implements(IDataMigrator)
+
+ def get_name(self):
+ return "dates:sync"
+
+ def process(self):
+ self.stats = {'synced' : 0}
+
+ self.target_cursor.execute("SELECT id, recorded_from_year, recorded_to_year FROM media_collections")
+
+ items_cursor = self.target_db.cursor()
+ update_cursor = self.target_db.cursor()
+ i = 0
+ while True:
+ row = self.target_cursor.fetchone()
+ if not row:
+ break
+
+ collection_id = row[0]
+ from_year = row[1]
+ to_year = row[2]
+
+ synced = False
+
+ items_cursor.execute("SELECT MIN(YEAR(recorded_from_date)) FROM media_items "
+ "WHERE collection_id = %s AND recorded_from_date <> 0", (collection_id,))
+ itemdata = items_cursor.fetchone()
+ if itemdata:
+ item_min_year = itemdata[0]
+ if item_min_year and (not from_year or item_min_year < from_year):
+ update_cursor.execute("UPDATE media_collections SET recorded_from_year = %s WHERE id = %s", (item_min_year,collection_id))
+ synced = True
+
+ items_cursor.execute("SELECT MAX(YEAR(recorded_to_date)) FROM media_items "
+ "WHERE collection_id = %s AND recorded_to_date <> 0", (collection_id,))
+ itemdata = items_cursor.fetchone()
+ if itemdata:
+ item_max_year = itemdata[0]
+ if item_max_year and (not to_year or item_max_year > to_year):
+ update_cursor.execute("UPDATE media_collections SET recorded_to_year = %s WHERE id = %s", (item_max_year,collection_id))
+ synced = True
+
+ if synced:
+ self.stats['synced'] += 1
+
+ if i % 400 == 0:
+ self.step()
+ i += 1
+
+
+
+
+
+
from core import DataMigrator, EnumMapper
from _mysql_exceptions import IntegrityError
from MySQLdb.constants.ER import DUP_ENTRY
-import re
class ItemsCopyMigrator(DataMigrator):
"""Perform a preliminary raw copy of the item table"""
def process(self):
EnumMapper.process(self, 'Phono', 'Cote_Phono', 'media_items', self.map)
-class ItemsDateConverter(DataMigrator):
- """Convert items recording dates"""
-
- implements(IDataMigrator)
-
- def get_name(self):
- return "items:date"
-
- def process(self):
-
- names = {
- 1: ur'jan(vier|v\.?|\.?)',
- 2: ur'f[eé]v(rier|r\.?|\.?)',
- 3: ur'mar(s|\.?)',
- 4: ur'avr(il|\.?)',
- 5: ur'mai',
- 6: ur'juin',
- 7: ur'juil(let|\.?)',
- 8: ur'a[ôo][ûu]t',
- 9: ur'sep(tembre|t\.?|\.?)',
- 10: ur'oct(obre|\.?)',
- 11: ur'd[ée]c(embre|\.?)',
- 12: ur'nov(embre|\.?)'
- }
-
- namepattern = u'|'.join([names[i] for i in names])
-
- patterns = [
- # month only
- ur'^ *(?P<name>' + namepattern + ') *$',
-
- # day and month, no year
- ur'^ *(?P<day>[0-9]{1,2}) *(?P<name>' + namepattern + ') *$',
-
- # from and until month
- ur'^ *(?P<name>' + namepattern + ') *[,-] *(?P<until_name>' + namepattern + ') *$',
-
- # from year only
- ur'^ *(été|) *(?P<year>[0-9]{2,4})[ ?]*$',
-
- # from and until year
- ur'^ *(?P<year>[0-9]{4}) *[/,-] *(?P<until_year>[0-9]{2,4}) *$',
-
- # month and year
- ur'^ *(?P<month>[0-9]{1,2}) *[_./-] *(?P<year>[0-9]{2,4}) *$',
- ur'^ *(?P<name>' + namepattern + ') *\.? *(?P<year>[0-9]{2,4}) *-'
- ' *(?P<until_name>' + namepattern + ') *\.? *(?P<until_year>[0-9]{2,4}) *$',
- ur'^ *(début|mi|fin|) *(?P<name>' + namepattern + ') *(?P<year>[0-9]{2,4})[ ?]*$',
-
- # from month, until month, and year
- ur'^ *(?P<month>[0-9]{1,2}) *[aà] *(?P<until_month>[0-9]{1,2}) *- *(?P<year>[0-9]{4}) *$',
- ur'^ *(?P<name>' + namepattern + ') *[,/-] *(?P<until_name>' + namepattern + ') *[./]? *(?P<year>[0-9]{2,4}) *$',
-
- # from month+year, until month+year
- ur'^ *(?P<month>[0-9]{1,2}) *[_./-] *(?P<year>[0-9]{2,4}) *; *(?P<until_month>[0-9]{1,2}) *[_./-] *(?P<until_year>[0-9]{2,4}) *$',
- ur'^ *(?P<month>[0-9]{1,2}) */ *(?P<year>[0-9]{2,4}) *- *(?P<until_month>[0-9]{1,2}) */ *(?P<until_year>[0-9]{2,4}) *$',
-
- # day, month and year
- ur'^ *(?P<day>[0-9]{1,2}) *[_./-] *0?(?P<month>[0-9]{1,2}) *[_./-] *(?P<year>[0-9]{1,4}) *$',
- ur'^ *(?P<year>[0-9]{4}) *[_./-] *(?P<month>[0-9]{2}) *[_./-] *(?P<day>[0-9]{2}) *$',
- ur'^ *(?P<day>[0-9]{1,2}) *(?P<name>' + namepattern + ') *(?P<year>[0-9]{2,4})[? ]*$',
-
- # from+until day, single month and year
- ur'^ *(?P<day>[0-9]{1,2})[ &+,-]+(?P<until_day>[0-9]{1,2}) *[ _./-] *(?P<month>[0-9]{1,2}) *[ _./-] *(?P<year>[0-9]{2,4}) *$',
- ur'^ *(?P<day>[0-9]{1,2}) *(-|/|et|au) *(?P<until_day>[0-9]{1,2}) *(?P<name>' + namepattern + ') *(?P<year>[0-9]{2,4})[? ]*$',
-
- ]
-
- self.src_cursor.execute("SELECT COUNT(*) FROM Phono")
- self.stats = {
- 'total' : self.src_cursor.fetchone()[0],
- 'matched' : 0,
- 'empty' : 0,
- 'unsignificant' : 0,
- 'unparsed' : 0
- }
-
- self.src_cursor.execute("SELECT COUNT(*) FROM Phono WHERE Dates_Enregistr REGEXP '^ *$'")
- self.stats['empty'] = self.src_cursor.fetchone()[0]
-
- self.src_cursor.execute("SELECT Dates_Enregistr FROM Phono WHERE Dates_Enregistr NOT REGEXP '^ *$'")
-
- print "WARNING: this migrator is a work in progress"
- while True:
- row = self.src_cursor.fetchone()
- if not row:
- break
-
- recognized = False
- if re.match('^ *(nn?|=|id|idem|\?+|[-_]?1|[0 ]*) *$', row[0], re.IGNORECASE):
- self.stats['unsignificant'] += 1
- recognized = True
- else:
- for p in patterns:
- if re.match(p, row[0], re.IGNORECASE):
- self.stats['matched'] += 1
- recognized = True
- break
-
- if not recognized:
- #print '|%s|' % row[0]
- self.stats['unparsed'] += 1
-
-
-
-
-
-