]> git.parisson.com Git - telemeta-data.git/commitdiff
migration: add items geo ethno mapper
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Mon, 15 Jun 2009 17:27:47 +0000 (17:27 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Mon, 15 Jun 2009 17:27:47 +0000 (17:27 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@110 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/tasks/items.py

index e690c6010ff2c9ed247f928bc2c15fc5f647f3ae..651f6259e31dd919a1aba6d6b942068ac3c9dfe2 100644 (file)
@@ -180,6 +180,108 @@ class ItemsKeywordsMapper(DataMigrator):
                     break
                 print "  %s: count=%d" % row
                 
+class ItemsLocationsMapper(DataMigrator):
+    """Migrate items locations trying to map them to the Geo Ethno thesaurus"""
+
+    implements(IDataMigrator)
+
+    def get_name(self):
+        return "items:locations"
+
+    def is_descendant_of(self, location, ascendant, ascendant_type):
+        self.target("SELECT l.name, l.type FROM location_relations AS r "
+                    "INNER JOIN locations AS l ON r.parent_location_name = l.name "
+                    "WHERE r.location_name = %s",
+                     (location,))
+        while True:
+            row = self.target_cursor.fetchone()
+            if not row:
+                break
+            parent_name, parent_type = row
+            if parent_name == ascendant and parent_type == ascendant_type:
+                return True
+            else:
+                if self.is_descendant_of(parent_name, ascendant, ascendant_type):
+                    return True
+        return False
+
+    def find_location(self, name_or_alias, type):
+        self.target("SELECT name FROM locations AS l INNER JOIN location_aliases AS a ON l.name = a.location_name "
+                    "WHERE l.type = %s AND (l.name LIKE %s OR a.alias LIKE %s)",
+                    (type, name_or_alias, name_or_alias))
+        if self.target_cursor.rowcount:
+            return self.target_cursor.fetchone()[0]
+        return None
+        
+    def concat(self, locality, country, continent = None):
+        pieces = []
+        if locality:
+            pieces.append(locality)
+        if country:
+            pieces.append(country)
+        if continent:
+            pieces.append(continent)
+
+        return u'-'.join(pieces)
+
+    def process(self):
+        self.target("UPDATE media_items SET location_name = NULL, location_comment = ''")
+
+        self.src("SELECT Cote_Phono, Continent, Etat, Region_Village FROM Phono")
+        self.stats = {
+            'total'     :    self.src_cursor.rowcount,
+            'fullmap'   :  0,
+            'country'   : 0,
+            'continent' : 0,
+            'empty'     : 0,
+            'nomap'     : 0
+        }
+
+        i = 0
+        self.target("SET foreign_key_checks = 0")
+        while True:
+            if i % 1000 == 0:
+                self.step()
+            i += 1
+            row = self.src_cursor.fetchone()
+            if not row:
+                break
+            oldcode, continent, country, locality = row
+            continent   = continent.strip()
+            country     = country.strip()
+            locality    = locality.strip()
+
+            comment     = ''
+            location = self.find_location(continent, "continent")
+            if location:
+                c = self.find_location(country, "country")
+                if c and self.is_descendant_of(c, location, 'continent'):
+                    location = c
+                    l = self.find_location(locality, "other")
+                    if l and self.is_descendant_of(l, location, 'country'):
+                        location = l
+                        self.stats['fullmap'] += 1
+                    else:
+                        comment = locality
+                        self.stats['country'] += 1
+                else:
+                    comment = self.concat(locality, country)
+                    self.stats['continent'] += 1
+            else:
+                comment = self.concat(locality, country, continent)
+        
+            if location or comment:
+                self.target("UPDATE media_items SET location_name = %s, location_comment = %s WHERE old_code = %s",
+                            (location, comment, oldcode))
+            elif not comment:
+                self.stats['empty'] += 1
+            else:
+                self.stats['nomap'] += 1
+
+        self.target("SET foreign_key_checks = 1")
+                    
+
+
             
-            
+