From 463cd3aafaee2729e7ec2b5bfac533a7b1dd46d4 Mon Sep 17 00:00:00 2001 From: olivier Date: Mon, 15 Jun 2009 17:27:47 +0000 Subject: [PATCH] migration: add items geo ethno mapper git-svn-id: http://svn.parisson.org/svn/crem@110 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- trunk/import/migration/tasks/items.py | 104 +++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/trunk/import/migration/tasks/items.py b/trunk/import/migration/tasks/items.py index e690c60..651f625 100644 --- a/trunk/import/migration/tasks/items.py +++ b/trunk/import/migration/tasks/items.py @@ -180,6 +180,108 @@ class ItemsKeywordsMapper(DataMigrator): break print " %s: count=%d" % row +class ItemsLocationsMapper(DataMigrator): + """Migrate items locations trying to map them to the Geo Ethno thesaurus""" + + implements(IDataMigrator) + + def get_name(self): + return "items:locations" + + def is_descendant_of(self, location, ascendant, ascendant_type): + self.target("SELECT l.name, l.type FROM location_relations AS r " + "INNER JOIN locations AS l ON r.parent_location_name = l.name " + "WHERE r.location_name = %s", + (location,)) + while True: + row = self.target_cursor.fetchone() + if not row: + break + parent_name, parent_type = row + if parent_name == ascendant and parent_type == ascendant_type: + return True + else: + if self.is_descendant_of(parent_name, ascendant, ascendant_type): + return True + return False + + def find_location(self, name_or_alias, type): + self.target("SELECT name FROM locations AS l INNER JOIN location_aliases AS a ON l.name = a.location_name " + "WHERE l.type = %s AND (l.name LIKE %s OR a.alias LIKE %s)", + (type, name_or_alias, name_or_alias)) + if self.target_cursor.rowcount: + return self.target_cursor.fetchone()[0] + return None + + def concat(self, locality, country, continent = None): + pieces = [] + if locality: + pieces.append(locality) + if country: + pieces.append(country) + if continent: + pieces.append(continent) + + return u'-'.join(pieces) + + def process(self): + self.target("UPDATE media_items SET location_name = NULL, location_comment = ''") + + self.src("SELECT Cote_Phono, Continent, Etat, Region_Village FROM Phono") + self.stats = { + 'total' : self.src_cursor.rowcount, + 'fullmap' : 0, + 'country' : 0, + 'continent' : 0, + 'empty' : 0, + 'nomap' : 0 + } + + i = 0 + self.target("SET foreign_key_checks = 0") + while True: + if i % 1000 == 0: + self.step() + i += 1 + row = self.src_cursor.fetchone() + if not row: + break + oldcode, continent, country, locality = row + continent = continent.strip() + country = country.strip() + locality = locality.strip() + + comment = '' + location = self.find_location(continent, "continent") + if location: + c = self.find_location(country, "country") + if c and self.is_descendant_of(c, location, 'continent'): + location = c + l = self.find_location(locality, "other") + if l and self.is_descendant_of(l, location, 'country'): + location = l + self.stats['fullmap'] += 1 + else: + comment = locality + self.stats['country'] += 1 + else: + comment = self.concat(locality, country) + self.stats['continent'] += 1 + else: + comment = self.concat(locality, country, continent) + + if location or comment: + self.target("UPDATE media_items SET location_name = %s, location_comment = %s WHERE old_code = %s", + (location, comment, oldcode)) + elif not comment: + self.stats['empty'] += 1 + else: + self.stats['nomap'] += 1 + + self.target("SET foreign_key_checks = 1") + + + - + -- 2.39.5