]> git.parisson.com Git - telemeta-data.git/commitdiff
migration: complete collections:publisher mapper
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Mon, 4 May 2009 13:20:58 +0000 (13:20 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Mon, 4 May 2009 13:20:58 +0000 (13:20 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@89 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/migrate.py
trunk/import/migration/tasks/collections.py

index 61cee2e0a41f87e4081c66b235b6ffdc2e41a5ff..047299fe2c2223995b9dff95c48e39ec09f81508 100644 (file)
@@ -130,7 +130,7 @@ if __name__ == '__main__':
         print "Usage: %s <config_file> [task_name]" % sys.argv[0]
         print "Tasks:"
         for task in manager.list_tasks():
-            print "  %-20s%s" % (task.get_name(), task.__doc__)
+            print "  %-24s%s" % (task.get_name(), task.__doc__)
 
         sys.exit(1)
 
index 5cae1a3d3014dc35a7f44aa08fff188d5e783ec4..a0aaf099c3d34203f0c76f8d5368487570b388ee 100644 (file)
@@ -314,38 +314,74 @@ class CollectionsPublishersMapper(DataMigrator):
         return "collections:publisher"
 
     def process(self):
-        pass
-
-"""
-        self.target_cursor.execute("SELECT COUNT(*) FROM media_collections")
-        self.stats = {
-            'total':      self.target_cursor.fetchone()[0],
-            'with_publisher':   0,
-            'with_collection':  0
-        }
 
+        publishers = {}
         self.target_cursor.execute("UPDATE media_collections SET publisher_id = NULL")
+        self.target_cursor.execute("UPDATE media_collections SET publisher_collection_id = NULL")
 
-        query = "UPDATE media_collections AS c \n" \
-                "SET publisher_id = \n" \
-                "   (SELECT p.id FROM publishers AS p INNER JOIN %s.Support AS s \n" \
-                "    ON p.value = s.Editeur WHERE s.Cote = c.old_code) \n" 
 
-        self.target_cursor.execute(query % (self.src_db_name, ))
-        self.stats['with_publisher'] += self.target_cursor.rowcount
-        offset += limit
-                        
-        self.target_cursor.execute("UPDATE media_collections SET publisher_collection_id = NULL")
+        self.target_cursor.execute("SELECT p.id, p.value, c.id, c.value FROM publishers "
+                                   "AS p LEFT JOIN publisher_collections AS c ON p.id = c.publisher_id");
+        while True:
+            row = self.target_cursor.fetchone()
+            if not row:
+                break
+
+            id, name, coll_id, coll_name = row
+            if not publishers.has_key(name):
+                publishers[name] = {}
+
+            publishers[name]['id'] = id
+
+            if not publishers[name].has_key('sub'):
+                publishers[name]['sub'] = {}
+
+            if coll_id:
+                publishers[name]['sub'][coll_name] = coll_id
+
+        self.src_cursor.execute("SELECT Cote, TRIM(Editeur), TRIM(Collect_Serie) FROM Support");
+
+        i = 0
+        while True:
+            row = self.src_cursor.fetchone()
+            if not row:
+                break
+
+            old_code, name, coll_name = row
+
+            id = None
+            coll_id = None
+
+            if name and len(name):
+                try:
+                    id = publishers[name]['id']
+                    try:
+                        if coll_name and len(coll_name):
+                            coll_id = publishers[name]['sub'][coll_name]
+                    except KeyError:
+                        print "Unknown publisher/collection: '%s' / '%s'" % name, coll_name
+                    
+                except KeyError:
+                    print "Unknown publisher: '%s'" % name
+
+            elif coll_name and len(coll_name):
+                raise Exception("Row %s has publisher collection '%s' but no publisher" % (old_code, coll_name))
+
+            self.target_cursor.execute("UPDATE media_collections SET publisher_id = %s, publisher_collection_id = %s "
+                                       "WHERE old_code = %s", (id, coll_id, old_code));
+
+            if i % 500 == 0:
+                self.step()
+
+            i += 1
+
+
+        self.stats = {}
+        self.target_cursor.execute("SELECT COUNT(*) FROM media_collections")
+        self.stats['total'] = self.target_cursor.fetchone()[0]
+        self.target_cursor.execute("SELECT COUNT(*) FROM media_collections WHERE publisher_id IS NOT NULL")
+        self.stats['with_publisher'] = self.target_cursor.fetchone()[0]
+        self.target_cursor.execute("SELECT COUNT(*) FROM media_collections WHERE publisher_collection_id IS NOT NULL")
+        self.stats['with_collection'] = self.target_cursor.fetchone()[0]
+
 
-        query = "UPDATE media_collections AS c \n" \
-                "SET publisher_collection_id = \n" \
-                "   (SELECT p.id FROM publisher_collections AS p INNER JOIN %s.Support AS s \n" \
-                "    ON p.value = s.Collect_Serie WHERE s.Cote = c.old_code AND c.publisher_id = p.publisher_id)" \
-                "LIMIT %d, %d" 
-
-        offset = 0
-        while offset < self.stats['total']:
-            self.target_cursor.execute(query % (self.src_db_name, offset, limit))
-            self.stats['with_collection'] += self.target_cursor.rowcount
-            offset += limit
-"""