]> git.parisson.com Git - telemeta-data.git/commitdiff
migration/geoethno: add historic names and location types processing
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Tue, 3 Mar 2009 09:11:16 +0000 (09:11 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Tue, 3 Mar 2009 09:11:16 +0000 (09:11 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@63 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/geoethno.py

index f966340a91f1a8d62cfd7dd43c7c5451679e5312..3ec572ddfd57857ac2cdbffc64d0773acd346a87 100644 (file)
@@ -13,7 +13,7 @@ class GeoEthnoImporter(object):
         self.db = target_db
         self.cursor = self.db.cursor()
         self.dom = dom.parse(input_file)
-        
+        self.known_types = []
 
     def get_children_by_tag_name(self, node, tagName):
         children = []
@@ -31,7 +31,7 @@ class GeoEthnoImporter(object):
                 flat.append(n.firstChild.nodeValue.strip())
         return flat
 
-    def insert_location(self, name, type, parentName):
+    def insert_location(self, name, type, parentName, historic_names):
         if type == 'CONTINENT':
             short_type = 'continent'
         elif type == 'BASEADM':
@@ -39,6 +39,8 @@ class GeoEthnoImporter(object):
         else:
             short_type = 'other'
 
+        self.register_type(type)
+
         self.cursor.execute("REPLACE INTO locations "+
                             "(name, type, complete_type_id, current_name, is_authoritative) "+
                             "VALUES (%s, %s, %s, %s, %s)", (name, short_type, type, name, 1))
@@ -52,7 +54,14 @@ class GeoEthnoImporter(object):
 
             self.nrelations += 1
 
-        if self.nlocations % 100 == 0:
+        for hname in historic_names:
+            self.cursor.execute("REPLACE INTO locations "+
+                                "(name, type, complete_type_id, current_name, is_authoritative) "+
+                                "VALUES (%s, %s, %s, %s, %s)", (hname, short_type, type, name, 1))
+            self.nhistoric_names += 1
+                    
+
+        if self.nlocations % 1000 == 0:
             sys.stdout.write('.')
             sys.stdout.flush()
         
@@ -63,8 +72,13 @@ class GeoEthnoImporter(object):
                                 "VALUES (%s, %s, %s)", (name, alias, 1))
             self.naliases += 1
 
-    def add_historic_names(self, name, items):
-        pass
+    def register_type(self, id):
+        try:
+            self.known_types.index(id)
+        except ValueError:
+            self.cursor.execute("REPLACE INTO location_types (id, name) "+
+                                "VALUES (%s, %s)", (id,""))
+            self.known_types.append(id)                                
 
     def is_empty(self, node):
         for n in node.childNodes:
@@ -102,9 +116,8 @@ class GeoEthnoImporter(object):
                 
                 aliasNodes = self.get_children_by_tag_name(n, 'ALIAS')
                 historicNameNodes = self.get_children_by_tag_name(n, 'DESCR-HISTORIQUE')
-                self.insert_location(name, type, parentName)
+                self.insert_location(name, type, parentName, self.flatten_node_list(historicNameNodes))
                 self.add_aliases(name, self.flatten_node_list(aliasNodes))
-                self.add_historic_names(name, self.flatten_node_list(historicNameNodes))
                 self.process_children(n, name)
 
         self.path.pop()                
@@ -113,6 +126,7 @@ class GeoEthnoImporter(object):
         self.path = []
         self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '')
         sys.stdout.write('\nGeoethno import result:\n')
+        sys.stdout.write('  types: %d\n' % len(self.known_types))
         sys.stdout.write('  locations: %d\n' % self.nlocations)
         sys.stdout.write('  relations: %d\n' % self.nrelations)
         sys.stdout.write('  aliases: %d\n' % self.naliases)
@@ -120,7 +134,7 @@ class GeoEthnoImporter(object):
 
 
     def warn(self, msg):
-        print u"\nError: %s: %s\n" % ("/".join(self.path), msg)
+        print u"\nWarning: %s: %s\n" % ("/".join(self.path), msg)
         
 
 class Error(Exception):