From: olivier Date: Thu, 11 Jun 2009 18:00:02 +0000 (+0000) Subject: migration: add item keywords mapper X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=4ac37a09174affb39968edc75ef24af719ddf51b;p=telemeta-data.git migration: add item keywords mapper git-svn-id: http://svn.parisson.org/svn/crem@104 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- diff --git a/trunk/import/migration/tasks/enums.py b/trunk/import/migration/tasks/enums.py index e08932b..c701a46 100644 --- a/trunk/import/migration/tasks/enums.py +++ b/trunk/import/migration/tasks/enums.py @@ -50,8 +50,7 @@ class SimpleEnumMigrator(DataMigrator): 'Terrain_ou_Autr': 'recording_contexts', 'Numerisation': 'ad_conversions', 'Form': 'vernacular_styles', - 'FormStyl generi': 'generic_styles', - 'Mot_Clef': 'context_keywords' + 'FormStyl generi': 'generic_styles' } def get_name(self): @@ -63,8 +62,6 @@ class SimpleEnumMigrator(DataMigrator): dest = self.map[src] if src == 'Form': src_field = 'Form' - elif src == 'Mot_Clef': - src_field = 'Mot_Clef' else: src_field = 'value' diff --git a/trunk/import/migration/tasks/items.py b/trunk/import/migration/tasks/items.py index d576f82..4b0f73d 100644 --- a/trunk/import/migration/tasks/items.py +++ b/trunk/import/migration/tasks/items.py @@ -116,3 +116,70 @@ class ItemsEnumMapper(EnumMapper): def process(self): EnumMapper.process(self, 'Phono', 'Cote_Phono', 'media_items', self.map) +class ItemsKeywordsMapper(DataMigrator): + """Map many-to-many context keywords to items""" + + implements(IDataMigrator) + + def get_name(self): + return "items:keywords" + + def process(self): + self.step() + self.target_cursor.execute("DELETE FROM context_keywords") + self.target_cursor.execute("INSERT INTO context_keywords (value) " + "SELECT Mot_Clef FROM %s.Mot_Clef WHERE Mot_Clef <> ''" + % self.src_db_name) + self.step() + self.target_cursor.execute("INSERT INTO context_keywords (value) " + "SELECT DISTINCT(Mot_Clef) FROM %s.Fonction_Usage " + "WHERE Mot_Clef <> '' AND Mot_Clef NOT IN (SELECT value FROM context_keywords)" + % self.src_db_name) + + self.step() + self.src_cursor.execute("SELECT * FROM Fonction_Usage GROUP BY Cote_Phono, Mot_Clef") + self.stats = {'relations': self.src_cursor.rowcount, 'converted': 0, 'nosuchitem': 0, + 'nosuchkeyword': 0, 'empty': 0} + self.src_cursor.execute("SELECT * FROM Fonction_Usage WHERE Mot_Clef = '' GROUP BY Cote_Phono, Mot_Clef") + self.stats['empty'] = self.src_cursor.rowcount + + self.step() + self.target_cursor.execute("DELETE from media_item_keywords") + + self.step() + query = "INSERT INTO media_item_keywords (item_id, keyword_id) " \ + "SELECT i.id, k.id FROM %s.Fonction_Usage AS f " \ + "INNER JOIN media_items AS i ON f.Cote_Phono = i.old_code " \ + "INNER JOIN context_keywords AS k ON f.Mot_Clef = k.value " \ + "GROUP BY i.id, k.id" + self.target_cursor.execute(query % self.src_db_name) + self.stats['converted'] = self.target_cursor.rowcount + + self.step() + query = "SELECT * FROM %s.Fonction_Usage AS f LEFT JOIN media_items AS i " \ + "ON f.Cote_Phono = i.old_code WHERE i.old_code IS NULL " \ + "GROUP BY f.Cote_Phono, f.Mot_Clef" + self.target_cursor.execute(query % self.src_db_name) + self.stats['nosuchitem'] = self.target_cursor.rowcount + + self.step() + query = "SELECT * FROM %s.Fonction_Usage AS f " \ + "LEFT JOIN context_keywords AS k ON f.Mot_Clef = k.value " \ + "WHERE k.value IS NULL AND f.Mot_Clef <> '' GROUP BY f.Cote_Phono, f.Mot_Clef" + self.target_cursor.execute(query % self.src_db_name) + self.stats['nosuchkeyword'] = self.target_cursor.rowcount + if self.target_cursor.rowcount: + print "Unknown keywords:" + query = "SELECT f.Mot_Clef, COUNT(*) FROM %s.Fonction_Usage AS f " \ + "LEFT JOIN context_keywords AS k ON f.Mot_Clef = k.value " \ + "WHERE k.value IS NULL AND f.Mot_Clef <> '' GROUP BY f.Mot_Clef" + self.target_cursor.execute(query % self.src_db_name) + while True: + row = self.target_cursor.fetchone() + if not row: + break + print " %s: count=%d" % row + + + +