From bd4a14a6fc5eb3b9df95ae6184a1d9aee0b44cdf Mon Sep 17 00:00:00 2001 From: olivier Date: Thu, 23 Apr 2009 17:44:47 +0000 Subject: [PATCH] migration: add collections enumerations mapping migrator git-svn-id: http://svn.parisson.org/svn/crem@85 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- trunk/import/migration/migrate.py | 1 + trunk/import/migration/tasks/collections.py | 113 ++++++++++++-------- 2 files changed, 72 insertions(+), 42 deletions(-) diff --git a/trunk/import/migration/migrate.py b/trunk/import/migration/migrate.py index 2540f21..61cee2e 100644 --- a/trunk/import/migration/migrate.py +++ b/trunk/import/migration/migrate.py @@ -65,6 +65,7 @@ class MigrationManager(Component): start = time.time() task.setup(self.cfg, self.src_db, self.target_db) task.process() + self.target_db.commit() sys.stdout.write("\t(%.2fs)\n" % (time.time() - start)) done.append(task) if only_task: diff --git a/trunk/import/migration/tasks/collections.py b/trunk/import/migration/tasks/collections.py index 2aa5bd3..2bc45e9 100644 --- a/trunk/import/migration/tasks/collections.py +++ b/trunk/import/migration/tasks/collections.py @@ -37,12 +37,12 @@ from core import DataMigrator from _mysql_exceptions import IntegrityError from MySQLdb.constants.ER import DUP_ENTRY -class CollectionsMigrator(DataMigrator): +class CollectionsCopyMigrator(DataMigrator): """Perform a preliminary raw copy of the collection table""" implements(IDataMigrator) - flat_map = [ + map = [ ('Ref', 'reference'), ('Cote', 'old_code'), ('Cote', 'code'), @@ -66,23 +66,10 @@ class CollectionsMigrator(DataMigrator): ('A informer_07_03_', 'a_informer_07_03') ] - enums_map = [ - ('Format', 'physical_format'), - ('Reedition', 'publishing_status'), - #('Editeur', 'publisher'), - #('Collect_Série', 'publisher_collection'), - ('Mode_Acqui', 'acquisition_mode'), - ('Redacteur_Fiche', 'metadata_author'), - ('Saisie_Fiche', 'metadata_writer'), - ('Droit_Utiliser', 'legal_rights'), - ('Terrain_ou_Autr', 'recording_context'), - ('Numerisation', 'ad_conversion') - ] - def get_name(self): return "collections:copy" - def build_flat_assignments(self, map): + def build_assignments(self, map): assign = [] for f1, f2 in map: f2 = '`%s`' % f2 @@ -91,34 +78,11 @@ class CollectionsMigrator(DataMigrator): return assign - def build_enum_assignments(self, src_table, map): - assign = [] - for src_field, target_base in map: - target_field = '`%s_id`' % target_base - if target_base[-1] == 's': - enum_table = target_base - else: - enum_table = target_base + 's' - - subquery = "(SELECT id FROM `%s`.`%s` AS e WHERE %s.`%s` = e.value)" % ( - self.target_db_name, enum_table, src_table, src_field) - - assign.append((target_field, subquery)) - - return assign - def process(self): - target_fields = [] - src_fields = [] - - flat = self.build_flat_assignments(self.flat_map) - target_fields += [str(a[0]) for a in flat] - src_fields += [str(a[1]) for a in flat] - - enum = self.build_enum_assignments('s', self.enums_map) - target_fields += [str(a[0]) for a in enum] - src_fields += [str(a[1]) for a in enum] + assign = self.build_assignments(self.map) + target_fields = [str(a[0]) for a in assign] + src_fields = [str(a[1]) for a in assign] self.src_cursor.execute("SELECT COUNT(*) FROM %s.Support" % self.src_db_name) count = self.src_cursor.fetchone()[0] @@ -143,4 +107,69 @@ class CollectionsMigrator(DataMigrator): else: raise e +class CollectionsEnumMigrator(DataMigrator): + """Map simple enumerations into the collections table""" + + implements(IDataMigrator) + map = [ + ('Format', 'physical_format'), + ('Reedition', 'publishing_status'), + #('Editeur', 'publisher'), + #('Collect_Série', 'publisher_collection'), + ('Mode_Acqui', 'acquisition_mode'), + ('Redacteur_Fiche', 'metadata_author'), + ('Saisie_Fiche', 'metadata_writer'), + ('Droit_Utiliser', 'legal_rights'), + ('Terrain_ou_Autr', 'recording_context'), + ('Numerisation', 'ad_conversion') + ] + + def get_name(self): + return "collections:enums" + + def process(self): + buffer_size = 200 + offset = 0 + + src_fields = [] + target_fields = [] + enum_tables = [] + for src_field, target_base in self.map: + src_fields.append(src_field) + target_fields.append('`%s_id`' % target_base) + if target_base[-1] == 's': + enum_tables.append(target_base) + else: + enum_tables.append(target_base + 's') + + while not offset or self.src_cursor.rowcount: + self.src_cursor.execute("SELECT Cote, %s FROM Support LIMIT %d, %d" % ( + ", ".join(src_fields), offset, buffer_size)) + while True: + row = self.src_cursor.fetchone() + if not row: + break + code = row[0] + for i in range(0, len(row) - 1): + value = row[i + 1] + if value and len(value) > 0: + self.target_cursor.execute("SELECT id FROM " + enum_tables[i] + + " WHERE value = %s", (value,)) + idrow = self.target_cursor.fetchone() + if idrow: + self.target_cursor.execute("UPDATE media_collections SET " + + target_fields[i] + " = %s " + "WHERE code = %s", (idrow[0], code)) + if self.target_cursor.rowcount > 1: + raise Exception("Updated more than one row, this shouldn't happen..") + else: + print "Can't find value '%s' in %s" % (value, enum_tables[i]) + + offset += self.src_cursor.rowcount + self.step() + + + + + -- 2.39.5