From dcf59b8c1ce0fd42dfdb8bd8da778e2a4c82e948 Mon Sep 17 00:00:00 2001 From: olivier Date: Mon, 15 Jun 2009 14:21:03 +0000 Subject: [PATCH] migration: add instruments, aliases and performances migrators git-svn-id: http://svn.parisson.org/svn/crem@109 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- trunk/import/migration/tasks/__init__.py | 1 + trunk/import/migration/tasks/core.py | 6 + trunk/import/migration/tasks/instruments.py | 212 ++++++++++++++++++++ 3 files changed, 219 insertions(+) diff --git a/trunk/import/migration/tasks/__init__.py b/trunk/import/migration/tasks/__init__.py index b28e9b8..ae13d13 100644 --- a/trunk/import/migration/tasks/__init__.py +++ b/trunk/import/migration/tasks/__init__.py @@ -39,3 +39,4 @@ import publishers import collections import items import dates +import instruments diff --git a/trunk/import/migration/tasks/core.py b/trunk/import/migration/tasks/core.py index de01870..de61f48 100644 --- a/trunk/import/migration/tasks/core.py +++ b/trunk/import/migration/tasks/core.py @@ -50,6 +50,12 @@ class DataMigrationTask(Component): sys.stdout.write('.') sys.stdout.flush() + def src(self, query, args = None): + self.src_cursor.execute(query, args) + + def target(self, query, args = None): + self.target_cursor.execute(query, args) + class DataMigrator(DataMigrationTask): def build_assignments(self, map): diff --git a/trunk/import/migration/tasks/instruments.py b/trunk/import/migration/tasks/instruments.py index 0a921ab..e1642b3 100644 --- a/trunk/import/migration/tasks/instruments.py +++ b/trunk/import/migration/tasks/instruments.py @@ -34,6 +34,8 @@ from telemeta.core import * from api import IDataMigrator from core import DataMigrator +from _mysql_exceptions import IntegrityError +from MySQLdb.constants.ER import DUP_ENTRY class InstrumentsThesaurusMigrator(DataMigrator): """Migrate the instruments thesaurus""" @@ -122,4 +124,214 @@ class InstrumentsThesaurusMigrator(DataMigrator): def get_name(self): return 'instruments:thesaurus' + def process(self): + self.target("CREATE TEMPORARY TABLE _instruments_map " + "(id INT NOT NULL, prefix VARCHAR(16), name VARCHAR(128), " + " UNIQUE(prefix, name))") + + self.target("DELETE FROM instruments") + self.target("DELETE FROM instrument_relations") + + families_map = {} + for prefix, name in self.families: + self.target("INSERT INTO instruments SET name = %s", (name,)) + families_map[prefix] = self.target_cursor.lastrowid + self.target("INSERT INTO _instruments_map VALUES(%s, %s, %s)", + (self.target_cursor.lastrowid, prefix, name)) + + for prefix in families_map: + if len(prefix) > 1: + parent_prefix = prefix[:-1] + if families_map.has_key(parent_prefix): + self.target("INSERT INTO instrument_relations SET instrument_id = %s, parent_instrument_id = %s", + (families_map[prefix], families_map[parent_prefix])) + + #self.src("SELECT Scientifique FROM Scientifique ORDER BY Scientifique") + self.src("SELECT Scientifique AS i from Scientifique UNION " + "SELECT Instr_Scientif AS i FROM Formation") + + self.stats = { + 'total' : self.src_cursor.rowcount, + 'migrated' : 0, + 'badprefix' : 0, + 'unparsed' : 0, + 'duplicate' : 0 + } + + unknown_families = [] + while True: + row = self.src_cursor.fetchone() + if not row: + break + + code = row[0] + cut = code.split('-', 1) + if len(cut) == 2: + prefix = cut[0].strip() + name = cut[1].strip() + if families_map.has_key(prefix): + self.target("SELECT * FROM _instruments_map WHERE prefix = %s AND name = %s", + (prefix, name)) + if not self.target_cursor.rowcount: + self.target("INSERT INTO instruments SET name = %s", (name,)) + instrument_id = self.target_cursor.lastrowid + self.target("INSERT INTO instrument_relations SET instrument_id = %s, parent_instrument_id = %s", + (instrument_id, families_map[prefix])) + self.target("INSERT INTO _instruments_map VALUES(%s, %s, %s)", + (instrument_id, prefix, name)) + self.stats['migrated'] += 1 + else: + self.stats['duplicate'] += 1 + else: + if not prefix in unknown_families: + print "Unknown family prefix: " + prefix + unknown_families.append(prefix) + self.stats['badprefix'] += 1 + else: + print "Can't parse instrument code: %s" % code + self.stats['unparsed'] += 1 + +class InstrumentAliasesMigrator(DataMigrator): + """Migrate the instrument aliases""" + + implements(IDataMigrator) + def get_name(self): + return 'instruments:aliases' + + def process(self): + self.target("CREATE TEMPORARY TABLE _instrument_aliases_map " + "(id INT NOT NULL, prefix VARCHAR(16), name VARCHAR(128), " + " UNIQUE(prefix, name))") + + self.target("DELETE FROM instrument_aliases") + self.target("DELETE FROM instrument_alias_relations") + + self.src("SELECT Vernaculaire AS i FROM Vernaculaire UNION " + "SELECT Instr_Vernacul AS i FROM Formation") + + self.stats = { + 'total' : self.src_cursor.rowcount, + 'migrated' : 0, + 'relations' : 0, + 'badinstruments' : 0, + 'duplicate' : 0 + } + #self.src("SELECT Vernaculaire FROM Vernaculaire") + instr_cursor = self.src_db.cursor() + + i = 0 + unknown_instruments = [] + while True: + if i % 200 == 0: + self.step() + i += 1 + row = self.src_cursor.fetchone() + if not row: + break + alias, = row + cut = alias.split('-', 1) + if len(cut) == 2: + alias_prefix = cut[0].strip() + alias_name = cut[1].strip() + else: + alias_prefix = '' + alias_name = alias.strip() + + if alias_name: + self.target("SELECT id FROM _instrument_aliases_map WHERE prefix = %s AND name = %s", + (alias_prefix, alias_name)) + if self.target_cursor.rowcount: + alias_id, = self.target_cursor.fetchone() + self.stats['duplicate'] += 1 + if not self.target_cursor.rowcount: + self.target("INSERT INTO instrument_aliases SET name = %s", alias_name) + alias_id = self.target_cursor.lastrowid + self.target("INSERT INTO _instrument_aliases_map SET id = %s, prefix = %s, name = %s", + (alias_id, alias_prefix, alias_name)) + self.stats['migrated'] += 1 + instr_cursor.execute("SELECT Instr_Scientif FROM Formation WHERE Instr_Vernacul = %s", (alias,)) + while True: + row = instr_cursor.fetchone() + if not row: + break + cut = row[0].split('-', 1) + if len(cut) == 2: + instrument_prefix = cut[0].strip() + instrument_name = cut[1].strip() + self.target("SELECT id FROM _instruments_map WHERE prefix = %s AND name = %s", + (instrument_prefix, instrument_name)) + if self.target_cursor.rowcount: + instrument_id, = self.target_cursor.fetchone() + self.target("SELECT * FROM instrument_alias_relations WHERE alias_id = %s AND instrument_id = %s", + (alias_id, instrument_id)) + if not self.target_cursor.rowcount: + self.target("INSERT INTO instrument_alias_relations SET alias_id = %s, instrument_id = %s", + (alias_id, instrument_id)) + self.stats['relations'] += 1 + elif not row[0] in unknown_instruments: + print "No such instrument: %s" % row[0] + unknown_instruments.append(row[0]) + self.stats['badinstruments'] = len(unknown_instruments) + +class ItemPerformancesMigrator(DataMigrator): + """Migrate items performances""" + + implements(IDataMigrator) + + def get_name(self): + return 'instruments:performances' + + def process(self): + self.src("SELECT Cote_Phono, Instr_Scientif, Instr_Vernacul, Interprete, Total_Instrum FROM Formation") + self.stats = { + 'total' : self.src_cursor.rowcount, + 'migrated' : 0, + 'nosuchitem' : 0 + } + while True: + row = self.src_cursor.fetchone() + if not row: + break + oldcode, instrument, alias, musicians, instruments_num = row + self.target("SELECT id FROM media_items WHERE old_code = %s", (oldcode,)) + if self.target_cursor.rowcount: + item_id, = self.target_cursor.fetchone() + cut = row[0].split('-', 1) + instrument_id = None + if len(cut) == 2: + instrument_prefix = cut[0].strip() + instrument_name = cut[1].strip() + self.target("SELECT id FROM _instruments_map WHERE prefix = %s AND name = %s", + (instrument_prefix, instrument_name)) + if self.target_cursor.rowcount: + instrument_id, = self.target_cursor.fetchone() + + cut = alias.split('-', 1) + if len(cut) == 2: + alias_prefix = cut[0].strip() + alias_name = cut[1].strip() + else: + alias_prefix = '' + alias_name = alias.strip() + + alias_id = None + if len(alias_name): + self.target("SELECT id FROM _instrument_aliases_map WHERE prefix = %s AND name = %s", + (alias_prefix, alias_name)) + if self.target_cursor.rowcount: + alias_id, = self.target_cursor.fetchone() + musicians = musicians.strip() + instruments_num = instruments_num.strip() + + if instrument_id or alias_id or musicians or instruments_num: + self.target("INSERT INTO media_item_performances SET media_item_id = %s, " + "instrument_id = %s, alias_id = %s, instruments_num = %s, musicians = %s ", + (item_id, instrument_id, alias_id, instruments_num, musicians)) + self.stats['migrated'] += 1 + + else: + print "No such item: %s" % oldcode + self.stats['nosuchitem'] += 1 + + -- 2.39.5