]> git.parisson.com Git - telemeta-data.git/commitdiff
fix geoethno ancestry building
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Tue, 2 Feb 2010 10:22:06 +0000 (10:22 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Tue, 2 Feb 2010 10:22:06 +0000 (10:22 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@145 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/tasks/geoethno.py

index 294445161d8c2b1d30b1821eb4b55747cea7e415..4f6440a617b6ece11e5974670a16faaf2d429a3b 100644 (file)
@@ -111,10 +111,11 @@ class GeoEthnoImporter(DataMigrator):
 
         if (parentName):
             parent_id = self.getone("SELECT id FROM locations WHERE name = %s", (parentName,))
-            self.stats['relations'] += self.replace("INSERT INTO location_relations "+
-                                                   "(location_id, ancestor_location_id, is_direct) "+
-                                                   "VALUE (%s, %s, %s)", 
-                                                   (id, parent_id, 1))
+            if id != parent_id:
+                self.stats['relations'] += self.replace("INSERT INTO location_relations "+
+                                                       "(location_id, ancestor_location_id, is_direct) "+
+                                                       "VALUE (%s, %s, %s)", 
+                                                       (id, parent_id, 1))
 
         for hname in historic_names:
             self.stats['historical names'] += self.replace("INSERT INTO locations "+
@@ -122,10 +123,11 @@ class GeoEthnoImporter(DataMigrator):
                                                            "VALUES (%s, %s, %s, %s, %s)", (hname, short_type, type_id, id, 1))
             hid = self.getone("SELECT id FROM locations WHERE name = %s", (hname,))
             if (len(parentName)):
-                self.stats['relations'] += self.replace("INSERT INTO location_relations "+
-                                                        "(location_id, ancestor_location_id, is_direct) "+
-                                                        "VALUE (%s, %s, %s)", 
-                                                        (hid, parent_id, 1))
+                if hid != parent_id:
+                    self.stats['relations'] += self.replace("INSERT INTO location_relations "+
+                                                            "(location_id, ancestor_location_id, is_direct) "+
+                                                            "VALUE (%s, %s, %s)", 
+                                                            (hid, parent_id, 1))
                     
 
     def add_aliases(self, name, items):
@@ -225,25 +227,33 @@ class GeoEthnoAncestryBuilder(DataMigrator):
         self.target_cursor.execute(query, args)
         return self.target_cursor.fetchone()[0]
 
-    def get_ancestors(self, cursor, id):
+    def get_ancestors(self, ancestors, cursor, id, min_distance=1):
         cursor.execute("SELECT ancestor_location_id FROM location_relations "
                        "WHERE location_id = %s AND is_direct = 1", (id,))
-        ancestors = []
+        direct = []
         while True:
             row = cursor.fetchone()
             if not row:
                 break
             id, = row
-            ancestors.append(id)
-            up = self.get_ancestors(cursor, id)
-            if up:
-                ancestors.extend(up)
+            direct.append(id)
+
+        if min_distance <= 1:
+            ancestors.extend(direct)
+
+        for id in direct:
+            try:
+                up = self.get_ancestors(ancestors, cursor, id, min_distance - 1)
+            except RuntimeError:
+                print "Caught RuntimeError - ancestors: " + str(ancestors[0:100])
+                raise
+
         return ancestors                
 
     def process(self):
         self.target("DELETE FROM location_relations WHERE is_direct = 0")
         ndirect = self.getone("SELECT count(*) FROM location_relations")
-        self.stats = {'direct' : ndirect, 'indirect': 0, 'total': ndirect}
+        self.stats = {'direct' : ndirect, 'indirect': 0, 'total': ndirect, 'redundant': 0}
         self.start(ndirect)
         rcursor1 = self.target_db.cursor()
         rcursor2 = self.target_db.cursor()
@@ -254,13 +264,20 @@ class GeoEthnoAncestryBuilder(DataMigrator):
                 break
 
             id, = row
-            ancestors = self.get_ancestors(rcursor2, id)
-            if len(ancestors) > 1:
-                for aid in ancestors[1:]:
+            ancestors = []
+            self.get_ancestors(ancestors, rcursor2, id, min_distance=2)
+            for aid in ancestors:
+                try:
                     self.target("INSERT INTO location_relations (location_id, ancestor_location_id) "
                                 "VALUE (%s, %s)", (id, aid))
                     self.stats['indirect'] += 1
                     self.stats['total']    += 1                                
+                except IntegrityError, e:
+                    (errno, errmsg) = e
+                    if errno == DUP_ENTRY:
+                        self.stats['redundant'] += 1
+                    else:
+                        raise e
 
             self.step()