From: olivier Date: Tue, 9 Jun 2009 15:31:34 +0000 (+0000) Subject: add collections recording year migrator X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=b40f57a9c5350077c80b71db306c4b81a98ba884;p=telemeta-data.git add collections recording year migrator git-svn-id: http://svn.parisson.org/svn/crem@91 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- diff --git a/trunk/import/migration/tasks/collections.py b/trunk/import/migration/tasks/collections.py index a0aaf09..2d39782 100644 --- a/trunk/import/migration/tasks/collections.py +++ b/trunk/import/migration/tasks/collections.py @@ -37,6 +37,7 @@ from core import DataMigrator from _mysql_exceptions import IntegrityError from MySQLdb.constants.ER import DUP_ENTRY import re +from datetime import date class CollectionsCopyMigrator(DataMigrator): """Perform a preliminary raw copy of the collection table""" @@ -384,4 +385,71 @@ class CollectionsPublishersMapper(DataMigrator): self.target_cursor.execute("SELECT COUNT(*) FROM media_collections WHERE publisher_collection_id IS NOT NULL") self.stats['with_collection'] = self.target_cursor.fetchone()[0] +class CollectionsYearConverter(DataMigrator): + """Convert collection recording years""" + + implements(IDataMigrator) + + def get_name(self): + return "collections:date" + + def parse_year(self, year): + if year < 100: + return year + 1900 + elif year >= 1900 and year <= date.today().year: + return year + + return 0 + + def process(self): + + self.stats = { + 'total': 0, + 'unsignificant': 0, + 'unparsed': 0, + 'migrated': 0, + 'empty': 0, + 'incoherent': 0 + } + + self.src_cursor.execute("SELECT Cote, Annee_Enr FROM Support") + + while True: + row = self.src_cursor.fetchone() + if not row: + break + + old_code = row[0] + year_str = row[1] + + from_year = 0 + to_year = 0 + + self.stats['total'] += 1 + if year_str == '': + self.stats['empty'] += 1 + elif re.match('^[+-]?[Nn12?]$', year_str): + self.stats['unsignificant'] += 1 + else: + match = re.match(r'^-]\'?([0-9]{2,4})[\';/,>?-]?$', year_str) + if not match: + match = re.match(r'^([0-9]{2,4})[;/,>?-]?$', year_str) + + if match: + from_year = self.parse_year(int(match.group(1))) + if match.lastindex > 1: + to_year = self.parse_year(int(match.group(2))) + else: + to_year = from_year + + if from_year or to_year and (not to_year or to_year >= from_year): + self.stats['migrated'] += 1 + else: + self.stats['incoherent'] += 1 + + else: + self.stats['unparsed'] += 1 + + self.target_cursor.execute("UPDATE media_collections SET recorded_from_year = %s, " + "recorded_to_year = %s WHERE old_code = %s", (from_year, to_year, old_code))