From: olivier Date: Tue, 3 Mar 2009 09:11:16 +0000 (+0000) Subject: migration/geoethno: add historic names and location types processing X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=019c7f62f2c00f09fca86ea12a096364fa432573;p=telemeta-data.git migration/geoethno: add historic names and location types processing git-svn-id: http://svn.parisson.org/svn/crem@63 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- diff --git a/trunk/import/migration/geoethno.py b/trunk/import/migration/geoethno.py index f966340..3ec572d 100644 --- a/trunk/import/migration/geoethno.py +++ b/trunk/import/migration/geoethno.py @@ -13,7 +13,7 @@ class GeoEthnoImporter(object): self.db = target_db self.cursor = self.db.cursor() self.dom = dom.parse(input_file) - + self.known_types = [] def get_children_by_tag_name(self, node, tagName): children = [] @@ -31,7 +31,7 @@ class GeoEthnoImporter(object): flat.append(n.firstChild.nodeValue.strip()) return flat - def insert_location(self, name, type, parentName): + def insert_location(self, name, type, parentName, historic_names): if type == 'CONTINENT': short_type = 'continent' elif type == 'BASEADM': @@ -39,6 +39,8 @@ class GeoEthnoImporter(object): else: short_type = 'other' + self.register_type(type) + self.cursor.execute("REPLACE INTO locations "+ "(name, type, complete_type_id, current_name, is_authoritative) "+ "VALUES (%s, %s, %s, %s, %s)", (name, short_type, type, name, 1)) @@ -52,7 +54,14 @@ class GeoEthnoImporter(object): self.nrelations += 1 - if self.nlocations % 100 == 0: + for hname in historic_names: + self.cursor.execute("REPLACE INTO locations "+ + "(name, type, complete_type_id, current_name, is_authoritative) "+ + "VALUES (%s, %s, %s, %s, %s)", (hname, short_type, type, name, 1)) + self.nhistoric_names += 1 + + + if self.nlocations % 1000 == 0: sys.stdout.write('.') sys.stdout.flush() @@ -63,8 +72,13 @@ class GeoEthnoImporter(object): "VALUES (%s, %s, %s)", (name, alias, 1)) self.naliases += 1 - def add_historic_names(self, name, items): - pass + def register_type(self, id): + try: + self.known_types.index(id) + except ValueError: + self.cursor.execute("REPLACE INTO location_types (id, name) "+ + "VALUES (%s, %s)", (id,"")) + self.known_types.append(id) def is_empty(self, node): for n in node.childNodes: @@ -102,9 +116,8 @@ class GeoEthnoImporter(object): aliasNodes = self.get_children_by_tag_name(n, 'ALIAS') historicNameNodes = self.get_children_by_tag_name(n, 'DESCR-HISTORIQUE') - self.insert_location(name, type, parentName) + self.insert_location(name, type, parentName, self.flatten_node_list(historicNameNodes)) self.add_aliases(name, self.flatten_node_list(aliasNodes)) - self.add_historic_names(name, self.flatten_node_list(historicNameNodes)) self.process_children(n, name) self.path.pop() @@ -113,6 +126,7 @@ class GeoEthnoImporter(object): self.path = [] self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '') sys.stdout.write('\nGeoethno import result:\n') + sys.stdout.write(' types: %d\n' % len(self.known_types)) sys.stdout.write(' locations: %d\n' % self.nlocations) sys.stdout.write(' relations: %d\n' % self.nrelations) sys.stdout.write(' aliases: %d\n' % self.naliases) @@ -120,7 +134,7 @@ class GeoEthnoImporter(object): def warn(self, msg): - print u"\nError: %s: %s\n" % ("/".join(self.path), msg) + print u"\nWarning: %s: %s\n" % ("/".join(self.path), msg) class Error(Exception):