From c40a11be3d23ab8c66d0be72229ef57ec2e47d3d Mon Sep 17 00:00:00 2001 From: olivier Date: Tue, 16 Feb 2010 01:09:12 +0000 Subject: [PATCH] fix scientific instrument mapping ; improve geoethno mapping git-svn-id: http://svn.parisson.org/svn/crem@159 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- trunk/import/migration/tasks/geoethno.py | 74 ++++++++++++++++----- trunk/import/migration/tasks/instruments.py | 27 +++++--- trunk/import/migration/tasks/items.py | 2 +- 3 files changed, 76 insertions(+), 27 deletions(-) diff --git a/trunk/import/migration/tasks/geoethno.py b/trunk/import/migration/tasks/geoethno.py index 5b26a61..e9b08bb 100644 --- a/trunk/import/migration/tasks/geoethno.py +++ b/trunk/import/migration/tasks/geoethno.py @@ -215,6 +215,30 @@ class GeoEthnoImporter(DataMigrator): self.process_children(self.dom.getElementsByTagName('GEOETHNO')[0], '') self.end() +class GeoEthnoMappingEnhancer(DataMigrator): + """Add some locations and relations to ease CREM mapping""" + + implements(IDataMigrator) + + def get_name(self): + return "geoethno:enhance" + + def getone(self, query, args=None): + self.target_cursor.execute(query, args) + return self.target_cursor.fetchone()[0] + + def add_relation(self, location, parent): + location_id = self.getone("SELECT id FROM locations WHERE name = %s", (location,)) + parent_id = self.getone("SELECT id FROM locations WHERE name = %s", (parent,)) + self.target("INSERT INTO location_relations " + "(location_id, ancestor_location_id, is_direct) " + "VALUE (%s, %s, %s)", (location_id, parent_id, 1)) + + def process(self): + self.start() + self.add_relation(u'Fédération de Russie', 'Asie') + self.end() + class GeoEthnoAncestryBuilder(DataMigrator): """Build indirect location ancestry relations""" @@ -250,9 +274,23 @@ class GeoEthnoAncestryBuilder(DataMigrator): return ancestors + def get_past_locations(self, location_id, cursor): + cursor.execute("SELECT id FROM locations WHERE current_location_id = %s AND current_location_id <> id", + (location_id,)) + + list = [] + while True: + row = cursor.fetchone() + if not row: + break + id, = row + list.append(id) + + return list + def process(self): self.target("DELETE FROM location_relations WHERE is_direct = 0") - ndirect = self.getone("SELECT count(*) FROM location_relations") + ndirect = self.getone("SELECT COUNT(*) FROM location_relations") self.stats = {'direct' : ndirect, 'indirect': 0, 'total': ndirect, 'redundant': 0} self.start(ndirect) rcursor1 = self.target_db.cursor() @@ -265,31 +303,33 @@ class GeoEthnoAncestryBuilder(DataMigrator): id, = row ancestors = [] - self.get_ancestors(ancestors, rcursor2, id, min_distance=2) + self.get_ancestors(ancestors, rcursor2, id, min_distance=1) + cpl = [id] + cpl.extend(self.get_past_locations(id, rcursor2)) for aid in ancestors: - try: - self.target("INSERT INTO location_relations (location_id, ancestor_location_id) " - "VALUE (%s, %s)", (id, aid)) - self.stats['indirect'] += 1 - self.stats['total'] += 1 - except IntegrityError, e: - (errno, errmsg) = e - if errno == DUP_ENTRY: - self.stats['redundant'] += 1 - else: - raise e + acpl = [aid] + acpl.extend(self.get_past_locations(aid, rcursor2)) + for _aid in acpl: + for id in cpl: + try: + self.target("INSERT INTO location_relations (location_id, ancestor_location_id) " + "VALUE (%s, %s)", (id, _aid)) + self.stats['indirect'] += 1 + self.stats['total'] += 1 + except IntegrityError, e: + (errno, errmsg) = e + if errno == DUP_ENTRY: + self.stats['redundant'] += 1 + else: + raise e self.step() self.end() - class Error(Exception): def __init__(self, importer, msg): print u"\nError: %s: %s" % ("/".join(importer.path), msg) - - - diff --git a/trunk/import/migration/tasks/instruments.py b/trunk/import/migration/tasks/instruments.py index 3d8812e..8fbd0ef 100644 --- a/trunk/import/migration/tasks/instruments.py +++ b/trunk/import/migration/tasks/instruments.py @@ -289,11 +289,13 @@ class ItemPerformancesMigrator(DataMigrator): return 'instruments:performances' def process(self): + self.target("DELETE FROM media_item_performances") self.src("SELECT Cote_Phono, Instr_Scientif, Instr_Vernacul, Interprete, Total_Instrum FROM Formation") self.stats = { 'total' : self.src_cursor.rowcount, 'migrated' : 0, - 'nosuchitem' : 0 + 'nosuchitem' : 0, + 'nosuchinstrument': 0 } self.start(self.stats['total']) while True: @@ -304,15 +306,22 @@ class ItemPerformancesMigrator(DataMigrator): self.target("SELECT id FROM media_items WHERE old_code = %s", (oldcode,)) if self.target_cursor.rowcount: item_id, = self.target_cursor.fetchone() - cut = row[0].split('-', 1) instrument_id = None - if len(cut) == 2: - instrument_prefix = cut[0].strip() - instrument_name = cut[1].strip() - self.target("SELECT id FROM _instruments_map WHERE prefix = %s AND name = %s", - (instrument_prefix, instrument_name)) - if self.target_cursor.rowcount: - instrument_id, = self.target_cursor.fetchone() + if instrument: + cut = instrument.split('-', 1) + if len(cut) == 2: + instrument_prefix = cut[0].strip() + instrument_name = cut[1].strip() + self.target("SELECT id FROM _instruments_map WHERE prefix = %s AND name = %s", + (instrument_prefix, instrument_name)) + if self.target_cursor.rowcount: + instrument_id, = self.target_cursor.fetchone() + else: + self.warn("Unrecognized instrument: %s" % instrument) + self.stats['nosuchinstrument'] += 1 + else: + self.warn("Unrecognized instrument: %s" % instrument) + self.stats['nosuchinstrument'] += 1 cut = alias.split('-', 1) if len(cut) == 2: diff --git a/trunk/import/migration/tasks/items.py b/trunk/import/migration/tasks/items.py index 1b20870..29aeb07 100644 --- a/trunk/import/migration/tasks/items.py +++ b/trunk/import/migration/tasks/items.py @@ -204,7 +204,7 @@ class ItemsLocationsMapper(DataMigrator): return bool(self.target_cursor.fetchone()[0]) def find_location(self, name_or_alias, type = None): - select = "SELECT id, type FROM locations AS l INNER JOIN location_aliases AS a ON l.id = a.location_id " + select = "SELECT id, type FROM locations AS l LEFT JOIN location_aliases AS a ON l.id = a.location_id " if type: self.target(select + "WHERE l.type = %s AND (l.name LIKE %s OR a.alias LIKE %s)", (type, name_or_alias, name_or_alias)) -- 2.39.5