]> git.parisson.com Git - telemeta-data.git/commitdiff
fix scientific instrument mapping ; improve geoethno mapping
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Tue, 16 Feb 2010 01:09:12 +0000 (01:09 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Tue, 16 Feb 2010 01:09:12 +0000 (01:09 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@159 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/tasks/geoethno.py
trunk/import/migration/tasks/instruments.py
trunk/import/migration/tasks/items.py

index 5b26a61b0fd74e37c842802251781296f73d755f..e9b08bbfd0c507601d57e44678c917db97d74207 100644 (file)
@@ -215,6 +215,30 @@ class GeoEthnoImporter(DataMigrator):
         self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '')
         self.end()
 
+class GeoEthnoMappingEnhancer(DataMigrator):
+    """Add some locations and relations to ease CREM mapping"""
+
+    implements(IDataMigrator)
+
+    def get_name(self):
+        return "geoethno:enhance"
+
+    def getone(self, query, args=None):
+        self.target_cursor.execute(query, args)
+        return self.target_cursor.fetchone()[0]
+
+    def add_relation(self, location, parent):
+        location_id = self.getone("SELECT id FROM locations WHERE name = %s", (location,))
+        parent_id = self.getone("SELECT id FROM locations WHERE name = %s", (parent,))
+        self.target("INSERT INTO location_relations "
+                    "(location_id, ancestor_location_id, is_direct) "
+                    "VALUE (%s, %s, %s)", (location_id, parent_id, 1))
+
+    def process(self):
+        self.start()
+        self.add_relation(u'Fédération de Russie', 'Asie')
+        self.end()
+
 class GeoEthnoAncestryBuilder(DataMigrator):
     """Build indirect location ancestry relations"""
 
@@ -250,9 +274,23 @@ class GeoEthnoAncestryBuilder(DataMigrator):
 
         return ancestors                
 
+    def get_past_locations(self, location_id, cursor):
+        cursor.execute("SELECT id FROM locations WHERE current_location_id = %s AND current_location_id <> id",
+                       (location_id,))
+                               
+        list = []
+        while True:
+            row = cursor.fetchone()
+            if not row:
+                break
+            id, = row
+            list.append(id)
+
+        return list
+
     def process(self):
         self.target("DELETE FROM location_relations WHERE is_direct = 0")
-        ndirect = self.getone("SELECT count(*) FROM location_relations")
+        ndirect = self.getone("SELECT COUNT(*) FROM location_relations")
         self.stats = {'direct' : ndirect, 'indirect': 0, 'total': ndirect, 'redundant': 0}
         self.start(ndirect)
         rcursor1 = self.target_db.cursor()
@@ -265,31 +303,33 @@ class GeoEthnoAncestryBuilder(DataMigrator):
 
             id, = row
             ancestors = []
-            self.get_ancestors(ancestors, rcursor2, id, min_distance=2)
+            self.get_ancestors(ancestors, rcursor2, id, min_distance=1)
+            cpl = [id]
+            cpl.extend(self.get_past_locations(id, rcursor2))
             for aid in ancestors:
-                try:
-                    self.target("INSERT INTO location_relations (location_id, ancestor_location_id) "
-                                "VALUE (%s, %s)", (id, aid))
-                    self.stats['indirect'] += 1
-                    self.stats['total']    += 1                                
-                except IntegrityError, e:
-                    (errno, errmsg) = e
-                    if errno == DUP_ENTRY:
-                        self.stats['redundant'] += 1
-                    else:
-                        raise e
+                acpl = [aid]
+                acpl.extend(self.get_past_locations(aid, rcursor2))
+                for _aid in acpl:
+                    for id in cpl:
+                        try:
+                            self.target("INSERT INTO location_relations (location_id, ancestor_location_id) "
+                                        "VALUE (%s, %s)", (id, _aid))
+                            self.stats['indirect'] += 1
+                            self.stats['total']    += 1                                
+                        except IntegrityError, e:
+                            (errno, errmsg) = e
+                            if errno == DUP_ENTRY:
+                                self.stats['redundant'] += 1
+                            else:
+                                raise e
 
             self.step()                                
             
         self.end()
 
-
 class Error(Exception):
     def __init__(self, importer, msg):
         print u"\nError: %s: %s" % ("/".join(importer.path), msg)
 
 
 
-
-        
-
index 3d8812e8883aff3435a86fbd8b31032f04168d32..8fbd0efa1c0176a21060460f891e5c2a282a1e54 100644 (file)
@@ -289,11 +289,13 @@ class ItemPerformancesMigrator(DataMigrator):
         return 'instruments:performances'
 
     def process(self):
+        self.target("DELETE FROM media_item_performances")
         self.src("SELECT Cote_Phono, Instr_Scientif, Instr_Vernacul, Interprete, Total_Instrum FROM Formation")
         self.stats = {
             'total'         : self.src_cursor.rowcount,
             'migrated'      : 0,
-            'nosuchitem'    : 0
+            'nosuchitem'    : 0,
+            'nosuchinstrument': 0
         }
         self.start(self.stats['total'])
         while True:
@@ -304,15 +306,22 @@ class ItemPerformancesMigrator(DataMigrator):
             self.target("SELECT id FROM media_items WHERE old_code = %s", (oldcode,))
             if self.target_cursor.rowcount:
                 item_id, = self.target_cursor.fetchone()
-                cut = row[0].split('-', 1)
                 instrument_id = None
-                if len(cut) == 2:
-                    instrument_prefix = cut[0].strip()
-                    instrument_name   = cut[1].strip()
-                    self.target("SELECT id FROM _instruments_map WHERE prefix = %s AND name = %s",
-                                (instrument_prefix, instrument_name))
-                    if self.target_cursor.rowcount:
-                        instrument_id, = self.target_cursor.fetchone()
+                if instrument:
+                    cut = instrument.split('-', 1)
+                    if len(cut) == 2:
+                        instrument_prefix = cut[0].strip()
+                        instrument_name   = cut[1].strip()
+                        self.target("SELECT id FROM _instruments_map WHERE prefix = %s AND name = %s",
+                                    (instrument_prefix, instrument_name))
+                        if self.target_cursor.rowcount:
+                            instrument_id, = self.target_cursor.fetchone()
+                        else:
+                            self.warn("Unrecognized instrument: %s" % instrument)
+                            self.stats['nosuchinstrument'] += 1
+                    else:
+                        self.warn("Unrecognized instrument: %s" % instrument)
+                        self.stats['nosuchinstrument'] += 1
 
                 cut = alias.split('-', 1)
                 if len(cut) == 2:
index 1b20870c686872370ec97ef3c183f9170d2b17e4..29aeb07c85d27d4404cb55c84058fa54112f76d6 100644 (file)
@@ -204,7 +204,7 @@ class ItemsLocationsMapper(DataMigrator):
         return bool(self.target_cursor.fetchone()[0])                    
 
     def find_location(self, name_or_alias, type = None):
-        select = "SELECT id, type FROM locations AS l INNER JOIN location_aliases AS a ON l.id = a.location_id "
+        select = "SELECT id, type FROM locations AS l LEFT JOIN location_aliases AS a ON l.id = a.location_id "
         if type:
             self.target(select + "WHERE l.type = %s AND (l.name LIKE %s OR a.alias LIKE %s)",
                         (type, name_or_alias, name_or_alias))