From f695237e0a769796d58a63e4733e884482b989cc Mon Sep 17 00:00:00 2001 From: olivier Date: Mon, 4 May 2009 13:20:58 +0000 Subject: [PATCH] migration: complete collections:publisher mapper git-svn-id: http://svn.parisson.org/svn/crem@89 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- trunk/import/migration/migrate.py | 2 +- trunk/import/migration/tasks/collections.py | 96 ++++++++++++++------- 2 files changed, 67 insertions(+), 31 deletions(-) diff --git a/trunk/import/migration/migrate.py b/trunk/import/migration/migrate.py index 61cee2e..047299f 100644 --- a/trunk/import/migration/migrate.py +++ b/trunk/import/migration/migrate.py @@ -130,7 +130,7 @@ if __name__ == '__main__': print "Usage: %s [task_name]" % sys.argv[0] print "Tasks:" for task in manager.list_tasks(): - print " %-20s%s" % (task.get_name(), task.__doc__) + print " %-24s%s" % (task.get_name(), task.__doc__) sys.exit(1) diff --git a/trunk/import/migration/tasks/collections.py b/trunk/import/migration/tasks/collections.py index 5cae1a3..a0aaf09 100644 --- a/trunk/import/migration/tasks/collections.py +++ b/trunk/import/migration/tasks/collections.py @@ -314,38 +314,74 @@ class CollectionsPublishersMapper(DataMigrator): return "collections:publisher" def process(self): - pass - -""" - self.target_cursor.execute("SELECT COUNT(*) FROM media_collections") - self.stats = { - 'total': self.target_cursor.fetchone()[0], - 'with_publisher': 0, - 'with_collection': 0 - } + publishers = {} self.target_cursor.execute("UPDATE media_collections SET publisher_id = NULL") + self.target_cursor.execute("UPDATE media_collections SET publisher_collection_id = NULL") - query = "UPDATE media_collections AS c \n" \ - "SET publisher_id = \n" \ - " (SELECT p.id FROM publishers AS p INNER JOIN %s.Support AS s \n" \ - " ON p.value = s.Editeur WHERE s.Cote = c.old_code) \n" - self.target_cursor.execute(query % (self.src_db_name, )) - self.stats['with_publisher'] += self.target_cursor.rowcount - offset += limit - - self.target_cursor.execute("UPDATE media_collections SET publisher_collection_id = NULL") + self.target_cursor.execute("SELECT p.id, p.value, c.id, c.value FROM publishers " + "AS p LEFT JOIN publisher_collections AS c ON p.id = c.publisher_id"); + while True: + row = self.target_cursor.fetchone() + if not row: + break + + id, name, coll_id, coll_name = row + if not publishers.has_key(name): + publishers[name] = {} + + publishers[name]['id'] = id + + if not publishers[name].has_key('sub'): + publishers[name]['sub'] = {} + + if coll_id: + publishers[name]['sub'][coll_name] = coll_id + + self.src_cursor.execute("SELECT Cote, TRIM(Editeur), TRIM(Collect_Serie) FROM Support"); + + i = 0 + while True: + row = self.src_cursor.fetchone() + if not row: + break + + old_code, name, coll_name = row + + id = None + coll_id = None + + if name and len(name): + try: + id = publishers[name]['id'] + try: + if coll_name and len(coll_name): + coll_id = publishers[name]['sub'][coll_name] + except KeyError: + print "Unknown publisher/collection: '%s' / '%s'" % name, coll_name + + except KeyError: + print "Unknown publisher: '%s'" % name + + elif coll_name and len(coll_name): + raise Exception("Row %s has publisher collection '%s' but no publisher" % (old_code, coll_name)) + + self.target_cursor.execute("UPDATE media_collections SET publisher_id = %s, publisher_collection_id = %s " + "WHERE old_code = %s", (id, coll_id, old_code)); + + if i % 500 == 0: + self.step() + + i += 1 + + + self.stats = {} + self.target_cursor.execute("SELECT COUNT(*) FROM media_collections") + self.stats['total'] = self.target_cursor.fetchone()[0] + self.target_cursor.execute("SELECT COUNT(*) FROM media_collections WHERE publisher_id IS NOT NULL") + self.stats['with_publisher'] = self.target_cursor.fetchone()[0] + self.target_cursor.execute("SELECT COUNT(*) FROM media_collections WHERE publisher_collection_id IS NOT NULL") + self.stats['with_collection'] = self.target_cursor.fetchone()[0] + - query = "UPDATE media_collections AS c \n" \ - "SET publisher_collection_id = \n" \ - " (SELECT p.id FROM publisher_collections AS p INNER JOIN %s.Support AS s \n" \ - " ON p.value = s.Collect_Serie WHERE s.Cote = c.old_code AND c.publisher_id = p.publisher_id)" \ - "LIMIT %d, %d" - - offset = 0 - while offset < self.stats['total']: - self.target_cursor.execute(query % (self.src_db_name, offset, limit)) - self.stats['with_collection'] += self.target_cursor.rowcount - offset += limit -""" -- 2.39.5