]> git.parisson.com Git - telemeta-data.git/commitdiff
migration: add collections enumerations mapping migrator
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Thu, 23 Apr 2009 17:44:47 +0000 (17:44 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Thu, 23 Apr 2009 17:44:47 +0000 (17:44 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@85 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/migrate.py
trunk/import/migration/tasks/collections.py

index 2540f211bcde819eaa2066f3508f498b08da9bec..61cee2e0a41f87e4081c66b235b6ffdc2e41a5ff 100644 (file)
@@ -65,6 +65,7 @@ class MigrationManager(Component):
                 start = time.time()
                 task.setup(self.cfg, self.src_db, self.target_db)
                 task.process()
+                self.target_db.commit()
                 sys.stdout.write("\t(%.2fs)\n" % (time.time() - start))
                 done.append(task)
                 if only_task:
index 2aa5bd35e37187d2dd730bce8c2bed76239ed7e0..2bc45e94c20810935ee232a986f4707e0537cc45 100644 (file)
@@ -37,12 +37,12 @@ from core import DataMigrator
 from _mysql_exceptions import IntegrityError
 from MySQLdb.constants.ER import DUP_ENTRY
 
-class CollectionsMigrator(DataMigrator):
+class CollectionsCopyMigrator(DataMigrator):
     """Perform a preliminary raw copy of the collection table"""
 
     implements(IDataMigrator)
 
-    flat_map = [
+    map = [
         ('Ref',                'reference'),
         ('Cote',               'old_code'),
         ('Cote',               'code'),
@@ -66,23 +66,10 @@ class CollectionsMigrator(DataMigrator):
         ('A informer_07_03_',  'a_informer_07_03')
     ]        
 
-    enums_map = [
-        ('Format',           'physical_format'),
-        ('Reedition',        'publishing_status'),
-        #('Editeur',          'publisher'),
-        #('Collect_Série',    'publisher_collection'),
-        ('Mode_Acqui',        'acquisition_mode'),
-        ('Redacteur_Fiche',  'metadata_author'),
-        ('Saisie_Fiche',     'metadata_writer'),
-        ('Droit_Utiliser',   'legal_rights'),
-        ('Terrain_ou_Autr',  'recording_context'),
-        ('Numerisation',     'ad_conversion')
-    ]
-   
     def get_name(self):
         return "collections:copy"
 
-    def build_flat_assignments(self, map):
+    def build_assignments(self, map):
         assign = []
         for f1, f2 in map:
             f2 = '`%s`' % f2
@@ -91,34 +78,11 @@ class CollectionsMigrator(DataMigrator):
 
         return assign
 
-    def build_enum_assignments(self, src_table, map):
-        assign = []
-        for src_field, target_base in map:
-            target_field = '`%s_id`' % target_base
-            if target_base[-1] == 's':
-                enum_table = target_base
-            else:
-                enum_table = target_base + 's'
-
-            subquery = "(SELECT id FROM `%s`.`%s` AS e WHERE %s.`%s` = e.value)" % (
-                       self.target_db_name, enum_table, src_table, src_field)
-            
-            assign.append((target_field, subquery))
-
-        return assign
-
     def process(self):
         
-        target_fields   = []
-        src_fields      = []
-
-        flat            = self.build_flat_assignments(self.flat_map)
-        target_fields  += [str(a[0]) for a in flat]
-        src_fields     += [str(a[1]) for a in flat]
-
-        enum            = self.build_enum_assignments('s', self.enums_map)
-        target_fields  += [str(a[0]) for a in enum]
-        src_fields     += [str(a[1]) for a in enum]
+        assign          = self.build_assignments(self.map)
+        target_fields   = [str(a[0]) for a in assign]
+        src_fields      = [str(a[1]) for a in assign]
 
         self.src_cursor.execute("SELECT COUNT(*) FROM %s.Support" %  self.src_db_name)
         count = self.src_cursor.fetchone()[0]
@@ -143,4 +107,69 @@ class CollectionsMigrator(DataMigrator):
                 else:
                     raise e
 
+class CollectionsEnumMigrator(DataMigrator):
+    """Map simple enumerations into the collections table"""
+
+    implements(IDataMigrator)
 
+    map = [
+        ('Format',           'physical_format'),
+        ('Reedition',        'publishing_status'),
+        #('Editeur',          'publisher'),
+        #('Collect_Série',    'publisher_collection'),
+        ('Mode_Acqui',        'acquisition_mode'),
+        ('Redacteur_Fiche',  'metadata_author'),
+        ('Saisie_Fiche',     'metadata_writer'),
+        ('Droit_Utiliser',   'legal_rights'),
+        ('Terrain_ou_Autr',  'recording_context'),
+        ('Numerisation',     'ad_conversion')
+    ]
+   
+    def get_name(self):
+        return "collections:enums"
+
+    def process(self):
+        buffer_size = 200
+        offset      = 0
+
+        src_fields      = []
+        target_fields   = []
+        enum_tables     = []
+        for src_field, target_base in self.map:
+            src_fields.append(src_field)
+            target_fields.append('`%s_id`' % target_base)
+            if target_base[-1] == 's':
+                enum_tables.append(target_base)
+            else:
+                enum_tables.append(target_base + 's')
+            
+        while not offset or self.src_cursor.rowcount:
+            self.src_cursor.execute("SELECT Cote, %s FROM Support LIMIT %d, %d" % (
+                                    ", ".join(src_fields), offset, buffer_size))
+            while True:
+                row = self.src_cursor.fetchone()
+                if not row:
+                    break
+                code = row[0]
+                for i in range(0, len(row) - 1):
+                    value = row[i + 1]
+                    if value and len(value) > 0:
+                        self.target_cursor.execute("SELECT id FROM " + enum_tables[i] + 
+                                                   " WHERE value = %s", (value,))
+                        idrow = self.target_cursor.fetchone()
+                        if idrow:
+                            self.target_cursor.execute("UPDATE media_collections SET " 
+                                                       + target_fields[i] + " = %s "
+                                                       "WHERE code = %s", (idrow[0], code))
+                            if self.target_cursor.rowcount > 1:
+                                raise Exception("Updated more than one row, this shouldn't happen..")
+                        else:
+                            print "Can't find value '%s' in %s" % (value, enum_tables[i])
+
+            offset += self.src_cursor.rowcount                
+            self.step()                            
+
+                            
+
+
+