]> git.parisson.com Git - telemeta-data.git/commitdiff
migration: add instruments, aliases and performances migrators
authorolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Mon, 15 Jun 2009 14:21:03 +0000 (14:21 +0000)
committerolivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Mon, 15 Jun 2009 14:21:03 +0000 (14:21 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@109 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/migration/tasks/__init__.py
trunk/import/migration/tasks/core.py
trunk/import/migration/tasks/instruments.py

index b28e9b875137ce5b50cdf6a300b98b4576543bce..ae13d138346fc8544c94a07102a9fd669a2e15a1 100644 (file)
@@ -39,3 +39,4 @@ import publishers
 import collections
 import items
 import dates
+import instruments
index de018700ee81c3f4bedd859c074b0046149fcbc5..de61f482f3770add8cdb53af9f21b463d284063c 100644 (file)
@@ -50,6 +50,12 @@ class DataMigrationTask(Component):
         sys.stdout.write('.')
         sys.stdout.flush()
 
+    def src(self, query, args = None):
+        self.src_cursor.execute(query, args)
+
+    def target(self, query, args = None):
+        self.target_cursor.execute(query, args)
+
 class DataMigrator(DataMigrationTask):
 
     def build_assignments(self, map):
index 0a921ab5c705e40a4ef018d2e629ffc7665e2a08..e1642b364a76491774200cd7c5ef69419b34c662 100644 (file)
@@ -34,6 +34,8 @@
 from telemeta.core import *
 from api import IDataMigrator
 from core import DataMigrator
+from _mysql_exceptions import IntegrityError
+from MySQLdb.constants.ER import DUP_ENTRY
 
 class InstrumentsThesaurusMigrator(DataMigrator):
     """Migrate the instruments thesaurus"""
@@ -122,4 +124,214 @@ class InstrumentsThesaurusMigrator(DataMigrator):
     def get_name(self):
         return 'instruments:thesaurus'
 
+    def process(self):
+        self.target("CREATE TEMPORARY TABLE _instruments_map "
+                    "(id INT NOT NULL, prefix VARCHAR(16), name VARCHAR(128), "
+                    " UNIQUE(prefix, name))")
+
+        self.target("DELETE FROM instruments")
+        self.target("DELETE FROM instrument_relations")
+
+        families_map = {}
+        for prefix, name in self.families:
+            self.target("INSERT INTO instruments SET name = %s", (name,))
+            families_map[prefix] = self.target_cursor.lastrowid
+            self.target("INSERT INTO _instruments_map VALUES(%s, %s, %s)",
+                        (self.target_cursor.lastrowid, prefix, name))
+
+        for prefix in families_map:
+            if len(prefix) > 1:
+                parent_prefix = prefix[:-1]
+                if families_map.has_key(parent_prefix):
+                    self.target("INSERT INTO instrument_relations SET instrument_id = %s, parent_instrument_id = %s",
+                                (families_map[prefix], families_map[parent_prefix]))
+
+        #self.src("SELECT Scientifique FROM Scientifique ORDER BY Scientifique")
+        self.src("SELECT Scientifique AS i from Scientifique UNION "
+                 "SELECT Instr_Scientif AS i FROM Formation")
+
+        self.stats = {
+            'total'     : self.src_cursor.rowcount,
+            'migrated'  : 0,
+            'badprefix' : 0,
+            'unparsed'  : 0,
+            'duplicate' : 0
+        }
+
+        unknown_families = []
+        while True:
+            row = self.src_cursor.fetchone()
+            if not row:
+                break
+            
+            code = row[0]
+            cut = code.split('-', 1)
+            if len(cut) == 2:
+                prefix = cut[0].strip()
+                name   = cut[1].strip()
+                if families_map.has_key(prefix):
+                    self.target("SELECT * FROM _instruments_map WHERE prefix = %s AND name = %s",
+                                (prefix, name))
+                    if not self.target_cursor.rowcount:
+                        self.target("INSERT INTO instruments SET name = %s", (name,))
+                        instrument_id = self.target_cursor.lastrowid
+                        self.target("INSERT INTO instrument_relations SET instrument_id = %s, parent_instrument_id = %s",
+                            (instrument_id, families_map[prefix]))
+                        self.target("INSERT INTO _instruments_map VALUES(%s, %s, %s)",
+                                    (instrument_id, prefix, name))
+                        self.stats['migrated'] += 1
+                    else:
+                        self.stats['duplicate'] += 1
+                else:
+                    if not prefix in unknown_families:
+                        print "Unknown family prefix: " + prefix
+                        unknown_families.append(prefix)
+                    self.stats['badprefix'] += 1
+            else:
+                print "Can't parse instrument code: %s" % code
+                self.stats['unparsed'] += 1
+
+class InstrumentAliasesMigrator(DataMigrator):
+    """Migrate the instrument aliases"""
+
+    implements(IDataMigrator)
     
+    def get_name(self):
+        return 'instruments:aliases'
+
+    def process(self):
+        self.target("CREATE TEMPORARY TABLE _instrument_aliases_map "
+                    "(id INT NOT NULL, prefix VARCHAR(16), name VARCHAR(128), "
+                    " UNIQUE(prefix, name))")
+
+        self.target("DELETE FROM instrument_aliases")
+        self.target("DELETE FROM instrument_alias_relations")
+
+        self.src("SELECT Vernaculaire AS i FROM Vernaculaire UNION "
+                 "SELECT Instr_Vernacul AS i FROM Formation")
+
+        self.stats = {
+            'total'     : self.src_cursor.rowcount,
+            'migrated'  : 0,
+            'relations' : 0,
+            'badinstruments' : 0,
+            'duplicate' : 0
+        }
+        #self.src("SELECT Vernaculaire FROM Vernaculaire")
+        instr_cursor = self.src_db.cursor()
+
+        i = 0
+        unknown_instruments = []
+        while True:
+            if i % 200 == 0:
+                self.step()
+            i += 1
+            row = self.src_cursor.fetchone()
+            if not row:
+                break
+            alias, = row    
+            cut = alias.split('-', 1)
+            if len(cut) == 2:
+                alias_prefix = cut[0].strip()
+                alias_name   = cut[1].strip()
+            else:
+                alias_prefix = ''
+                alias_name = alias.strip()
+           
+            if alias_name:
+                self.target("SELECT id FROM _instrument_aliases_map WHERE prefix = %s AND name = %s",
+                            (alias_prefix, alias_name))
+                if self.target_cursor.rowcount:
+                    alias_id, = self.target_cursor.fetchone()
+                    self.stats['duplicate'] += 1
+                if not self.target_cursor.rowcount:
+                    self.target("INSERT INTO instrument_aliases SET name = %s", alias_name)
+                    alias_id = self.target_cursor.lastrowid
+                    self.target("INSERT INTO _instrument_aliases_map SET id = %s, prefix = %s, name = %s",
+                                (alias_id, alias_prefix, alias_name))
+                    self.stats['migrated'] += 1
+                instr_cursor.execute("SELECT Instr_Scientif FROM Formation WHERE Instr_Vernacul = %s", (alias,))
+                while True:
+                    row = instr_cursor.fetchone()
+                    if not row:
+                        break
+                    cut = row[0].split('-', 1)
+                    if len(cut) == 2:
+                        instrument_prefix = cut[0].strip()
+                        instrument_name   = cut[1].strip()
+                        self.target("SELECT id FROM _instruments_map WHERE prefix = %s AND name = %s", 
+                                    (instrument_prefix, instrument_name))
+                        if self.target_cursor.rowcount:
+                            instrument_id, = self.target_cursor.fetchone()
+                            self.target("SELECT * FROM instrument_alias_relations WHERE alias_id = %s AND instrument_id = %s",
+                                        (alias_id, instrument_id))
+                            if not self.target_cursor.rowcount:
+                                self.target("INSERT INTO instrument_alias_relations SET alias_id = %s, instrument_id = %s",
+                                            (alias_id, instrument_id))
+                                self.stats['relations'] += 1
+                        elif not row[0] in unknown_instruments:
+                            print "No such instrument: %s" % row[0]
+                            unknown_instruments.append(row[0])
+        self.stats['badinstruments'] = len(unknown_instruments)
+
+class ItemPerformancesMigrator(DataMigrator):
+    """Migrate items performances"""
+
+    implements(IDataMigrator)
+            
+    def get_name(self):
+        return 'instruments:performances'
+
+    def process(self):
+        self.src("SELECT Cote_Phono, Instr_Scientif, Instr_Vernacul, Interprete, Total_Instrum FROM Formation")
+        self.stats = {
+            'total'         : self.src_cursor.rowcount,
+            'migrated'      : 0,
+            'nosuchitem'    : 0
+        }
+        while True:
+            row = self.src_cursor.fetchone()
+            if not row:
+                break
+            oldcode, instrument, alias, musicians, instruments_num = row
+            self.target("SELECT id FROM media_items WHERE old_code = %s", (oldcode,))
+            if self.target_cursor.rowcount:
+                item_id, = self.target_cursor.fetchone()
+                cut = row[0].split('-', 1)
+                instrument_id = None
+                if len(cut) == 2:
+                    instrument_prefix = cut[0].strip()
+                    instrument_name   = cut[1].strip()
+                    self.target("SELECT id FROM _instruments_map WHERE prefix = %s AND name = %s",
+                                (instrument_prefix, instrument_name))
+                    if self.target_cursor.rowcount:
+                        instrument_id, = self.target_cursor.fetchone()
+
+                cut = alias.split('-', 1)
+                if len(cut) == 2:
+                    alias_prefix = cut[0].strip()
+                    alias_name   = cut[1].strip()
+                else:
+                    alias_prefix = ''
+                    alias_name = alias.strip()
+
+                alias_id = None
+                if len(alias_name):
+                    self.target("SELECT id FROM _instrument_aliases_map WHERE prefix = %s AND name = %s",
+                                (alias_prefix, alias_name))
+                    if self.target_cursor.rowcount:
+                        alias_id, = self.target_cursor.fetchone()
+                musicians = musicians.strip()
+                instruments_num = instruments_num.strip()
+
+                if instrument_id or alias_id or musicians or instruments_num:
+                    self.target("INSERT INTO media_item_performances SET media_item_id = %s, "
+                                "instrument_id = %s, alias_id = %s, instruments_num = %s, musicians = %s ",
+                                (item_id, instrument_id, alias_id, instruments_num, musicians))
+                    self.stats['migrated'] += 1
+                
+            else:
+                print "No such item: %s" % oldcode
+                self.stats['nosuchitem'] += 1
+
+