From 954599b0a3003f50480818049dfbc1d655485741 Mon Sep 17 00:00:00 2001 From: olivier Date: Tue, 2 Feb 2010 10:22:06 +0000 Subject: [PATCH] fix geoethno ancestry building git-svn-id: http://svn.parisson.org/svn/crem@145 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- trunk/import/migration/tasks/geoethno.py | 53 ++++++++++++++++-------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/trunk/import/migration/tasks/geoethno.py b/trunk/import/migration/tasks/geoethno.py index 2944451..4f6440a 100644 --- a/trunk/import/migration/tasks/geoethno.py +++ b/trunk/import/migration/tasks/geoethno.py @@ -111,10 +111,11 @@ class GeoEthnoImporter(DataMigrator): if (parentName): parent_id = self.getone("SELECT id FROM locations WHERE name = %s", (parentName,)) - self.stats['relations'] += self.replace("INSERT INTO location_relations "+ - "(location_id, ancestor_location_id, is_direct) "+ - "VALUE (%s, %s, %s)", - (id, parent_id, 1)) + if id != parent_id: + self.stats['relations'] += self.replace("INSERT INTO location_relations "+ + "(location_id, ancestor_location_id, is_direct) "+ + "VALUE (%s, %s, %s)", + (id, parent_id, 1)) for hname in historic_names: self.stats['historical names'] += self.replace("INSERT INTO locations "+ @@ -122,10 +123,11 @@ class GeoEthnoImporter(DataMigrator): "VALUES (%s, %s, %s, %s, %s)", (hname, short_type, type_id, id, 1)) hid = self.getone("SELECT id FROM locations WHERE name = %s", (hname,)) if (len(parentName)): - self.stats['relations'] += self.replace("INSERT INTO location_relations "+ - "(location_id, ancestor_location_id, is_direct) "+ - "VALUE (%s, %s, %s)", - (hid, parent_id, 1)) + if hid != parent_id: + self.stats['relations'] += self.replace("INSERT INTO location_relations "+ + "(location_id, ancestor_location_id, is_direct) "+ + "VALUE (%s, %s, %s)", + (hid, parent_id, 1)) def add_aliases(self, name, items): @@ -225,25 +227,33 @@ class GeoEthnoAncestryBuilder(DataMigrator): self.target_cursor.execute(query, args) return self.target_cursor.fetchone()[0] - def get_ancestors(self, cursor, id): + def get_ancestors(self, ancestors, cursor, id, min_distance=1): cursor.execute("SELECT ancestor_location_id FROM location_relations " "WHERE location_id = %s AND is_direct = 1", (id,)) - ancestors = [] + direct = [] while True: row = cursor.fetchone() if not row: break id, = row - ancestors.append(id) - up = self.get_ancestors(cursor, id) - if up: - ancestors.extend(up) + direct.append(id) + + if min_distance <= 1: + ancestors.extend(direct) + + for id in direct: + try: + up = self.get_ancestors(ancestors, cursor, id, min_distance - 1) + except RuntimeError: + print "Caught RuntimeError - ancestors: " + str(ancestors[0:100]) + raise + return ancestors def process(self): self.target("DELETE FROM location_relations WHERE is_direct = 0") ndirect = self.getone("SELECT count(*) FROM location_relations") - self.stats = {'direct' : ndirect, 'indirect': 0, 'total': ndirect} + self.stats = {'direct' : ndirect, 'indirect': 0, 'total': ndirect, 'redundant': 0} self.start(ndirect) rcursor1 = self.target_db.cursor() rcursor2 = self.target_db.cursor() @@ -254,13 +264,20 @@ class GeoEthnoAncestryBuilder(DataMigrator): break id, = row - ancestors = self.get_ancestors(rcursor2, id) - if len(ancestors) > 1: - for aid in ancestors[1:]: + ancestors = [] + self.get_ancestors(ancestors, rcursor2, id, min_distance=2) + for aid in ancestors: + try: self.target("INSERT INTO location_relations (location_id, ancestor_location_id) " "VALUE (%s, %s)", (id, aid)) self.stats['indirect'] += 1 self.stats['total'] += 1 + except IntegrityError, e: + (errno, errmsg) = e + if errno == DUP_ENTRY: + self.stats['redundant'] += 1 + else: + raise e self.step() -- 2.39.5