From: olivier <olivier@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Date: Wed, 10 Jun 2009 18:04:54 +0000 (+0000)
Subject: migration: add items raw copy and enumerations mapper
X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=f2ae9e844aefaf997c41b2dd55e33f4ff1fd9f20;p=telemeta-data.git

migration: add items raw copy and enumerations mapper

git-svn-id: http://svn.parisson.org/svn/crem@95 3bf09e05-f825-4182-b9bc-eedd7160adf0
---

diff --git a/trunk/import/migration/tasks/__init__.py b/trunk/import/migration/tasks/__init__.py
index e531bb5..5598a9f 100644
--- a/trunk/import/migration/tasks/__init__.py
+++ b/trunk/import/migration/tasks/__init__.py
@@ -37,3 +37,4 @@ import geoethno
 import ethnic
 import publishers
 import collections
+import items
diff --git a/trunk/import/migration/tasks/collections.py b/trunk/import/migration/tasks/collections.py
index 999ccb5..e37bfb4 100644
--- a/trunk/import/migration/tasks/collections.py
+++ b/trunk/import/migration/tasks/collections.py
@@ -33,7 +33,7 @@
 
 from telemeta.core import *
 from api import IDataMigrator
-from core import DataMigrator
+from core import DataMigrator, EnumMapper
 from _mysql_exceptions import IntegrityError
 from MySQLdb.constants.ER import DUP_ENTRY
 import re
@@ -71,15 +71,6 @@ class CollectionsCopyMigrator(DataMigrator):
     def get_name(self):
         return "collections:copy"
 
-    def build_assignments(self, map):
-        assign = []
-        for f1, f2 in map:
-            f2 = '`%s`' % f2
-            f1 = '`%s`' % f1
-            assign.append((f2, f1))
-
-        return assign
-
     def process(self):
         
         assign          = self.build_assignments(self.map)
@@ -117,7 +108,7 @@ class CollectionsCopyMigrator(DataMigrator):
                 else:
                     raise e
 
-class CollectionsEnumMapper(DataMigrator):
+class CollectionsEnumMapper(EnumMapper):
     """Map simple enumerations into the collections table"""
 
     implements(IDataMigrator)
@@ -125,8 +116,6 @@ class CollectionsEnumMapper(DataMigrator):
     map = [
         ('Format',           'physical_format'),
         ('Reedition',        'publishing_status'),
-        #('Editeur',          'publisher'),
-        #('Collect_SÃ©rie',    'publisher_collection'),
         ('Mode_Acqui',        'acquisition_mode'),
         ('Redacteur_Fiche',  'metadata_author'),
         ('Saisie_Fiche',     'metadata_writer'),
@@ -139,47 +128,7 @@ class CollectionsEnumMapper(DataMigrator):
         return "collections:enums"
 
     def process(self):
-        buffer_size = 200
-        offset      = 0
-
-        src_fields      = []
-        target_fields   = []
-        enum_tables     = []
-        for src_field, target_base in self.map:
-            src_fields.append(src_field)
-            target_fields.append('`%s_id`' % target_base)
-            if target_base[-1] == 's':
-                enum_tables.append(target_base)
-            else:
-                enum_tables.append(target_base + 's')
-            
-        while not offset or self.src_cursor.rowcount:
-            self.src_cursor.execute("SELECT Cote, %s FROM Support LIMIT %d, %d" % (
-                                    ", ".join(src_fields), offset, buffer_size))
-            while True:
-                row = self.src_cursor.fetchone()
-                if not row:
-                    break
-                code = row[0]
-                for i in range(0, len(row) - 1):
-                    value = row[i + 1]
-                    if value and len(value) > 0:
-                        self.target_cursor.execute("SELECT id FROM " + enum_tables[i] + 
-                                                   " WHERE value = %s", (value,))
-                        idrow = self.target_cursor.fetchone()
-                        if idrow:
-                            self.target_cursor.execute("UPDATE media_collections SET " 
-                                                       + target_fields[i] + " = %s "
-                                                       "WHERE code = %s", (idrow[0], code))
-                            if self.target_cursor.rowcount > 1:
-                                raise Exception("Updated more than one row, this shouldn't happen..")
-                            elif not self.target_cursor.rowcount:
-                                print "Can't find migrated collection: %s" % code
-                        else:
-                            print "Can't find value '%s' in %s" % (value, enum_tables[i])
-
-            offset += self.src_cursor.rowcount                
-            self.step()                            
+        EnumMapper.process(self, 'Support', 'Cote', 'media_collections', self.map)
 
 class CollectionsCodeConverter(DataMigrator):
     """Convert old to new-style collection codes"""
diff --git a/trunk/import/migration/tasks/core.py b/trunk/import/migration/tasks/core.py
index d9f1773..85fdad3 100644
--- a/trunk/import/migration/tasks/core.py
+++ b/trunk/import/migration/tasks/core.py
@@ -50,7 +50,15 @@ class DataMigrationTask(Component):
         sys.stdout.flush()
 
 class DataMigrator(DataMigrationTask):
-    pass
+
+    def build_assignments(self, map):
+        assign = []
+        for f1, f2 in map:
+            f2 = '`%s`' % f2
+            f1 = '`%s`' % f1
+            assign.append((f2, f1))
+
+        return assign
 
 class DataInitializer(DataMigrationTask):
     pass
@@ -104,3 +112,57 @@ class GroupedItemsManager(object):
         for i in self.groups:
             nitems += len(self.groups[i])
         return nitems
+
+class EnumMapper(DataMigrator):
+    """Map simple enumerations"""
+
+    def process(self, src_table, src_id_field, target_table, map):
+        buffer_size = 200
+        offset      = 0
+
+        src_fields          = []
+        target_fields       = []
+        enum_tables         = []
+        enum_value_fields   = []
+        for src_field, target_def in map:
+            src_fields.append('`%s`' % src_field)
+            cut = target_def.split(':')
+            target_base = cut[0]
+            if len(cut) > 1:
+                enum_value_fields.append(cut[1])
+            else:
+                enum_value_fields.append('value')
+
+            target_fields.append('`%s_id`' % target_base)
+            if target_base[-1] == 's':
+                enum_tables.append(target_base)
+            else:
+                enum_tables.append(target_base + 's')
+            
+        while not offset or self.src_cursor.rowcount:
+            self.src_cursor.execute("SELECT %s, %s FROM %s LIMIT %d, %d" % (
+                                    src_id_field, ", ".join(src_fields), src_table, offset, buffer_size))
+            while True:
+                row = self.src_cursor.fetchone()
+                if not row:
+                    break
+                code = row[0]
+                for i in range(0, len(row) - 1):
+                    value = row[i + 1]
+                    if value and len(value) > 0:
+                        self.target_cursor.execute("SELECT id FROM " + enum_tables[i] + 
+                                                   " WHERE " + enum_value_fields[i] +  " = %s", (value,))
+                        idrow = self.target_cursor.fetchone()
+                        if idrow:
+                            self.target_cursor.execute("UPDATE " + target_table + " SET " 
+                                                       + target_fields[i] + " = %s "
+                                                       "WHERE old_code = %s", (idrow[0], code))
+                            if self.target_cursor.rowcount > 1:
+                                raise Exception("Updated more than one row, this shouldn't happen..")
+                            elif not self.target_cursor.rowcount:
+                                print "Can't find migrated collection: %s" % code
+                        else:
+                            print "Can't find value '%s' in %s" % (value, enum_tables[i])
+
+            offset += self.src_cursor.rowcount                
+            self.step()                            
diff --git a/trunk/import/migration/tasks/items.py b/trunk/import/migration/tasks/items.py
new file mode 100644
index 0000000..d576f82
--- /dev/null
+++ b/trunk/import/migration/tasks/items.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+#
+# CREM Database migrator
+
+# Copyright (C) 2009 Samalyse SARL
+# Author: Olivier Guilyardi <olivier samalyse com>
+
+# This software is governed by the CeCILL  license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from telemeta.core import *
+from api import IDataMigrator
+from core import DataMigrator, EnumMapper
+from _mysql_exceptions import IntegrityError
+from MySQLdb.constants.ER import DUP_ENTRY
+
+class ItemsCopyMigrator(DataMigrator):
+    """Perform a preliminary raw copy of the item table"""
+
+    implements(IDataMigrator)
+
+    map = [
+        ('Face_Plage',         'track'),
+        ('Cote_Phono',         'old_code'),
+        ('Duree',              'approx_duration'),
+        ('Titre_piece',        'title'),
+        ('Transcrip_Trad',     'alt_title'),
+        ('Auteur',             'author'),
+        ('Comm_FonctUsage',    'context_comment'),
+        ('Documentation',      'external_references'),
+        ('Moda Execut',        'moda_execut'),
+        ('Enregistre_par',     'collector'),
+        ('Aire_Geo_Cult',      'cultural_area'),
+        ('ChoixCollecteur',    'collector_selection'),
+        ('NroBand NroPiec',    'creator_reference')
+    ]        
+
+    def get_name(self):
+        return "items:copy"
+
+    def process(self):
+        assign          = self.build_assignments(self.map)
+        target_fields   = [str(a[0]) for a in assign]
+        src_fields      = [str(a[1]) for a in assign]
+
+        self.target_cursor.execute("DELETE FROM media_items")
+
+        self.src_cursor.execute("SELECT COUNT(*) FROM Phono")
+        count = self.src_cursor.fetchone()[0]
+        self.stats = { 'total': count, 'imported': 0, 'ignored': 0}
+
+        query = "INSERT INTO media_items (\n  collection_id,\n  %s\n)\n" \
+                "SELECT \n  c.id,\n  %s\n FROM %s.Phono AS p " \
+                "INNER JOIN media_collections AS c ON p.Cote_Support = c.old_code " % (
+                    ",\n  ".join(target_fields), 
+                    ",\n  ".join(src_fields),
+                    self.src_db_name)
+
+        self.target_cursor.execute(query)
+
+        self.target_cursor.execute("SELECT COUNT(*) FROM media_items")
+        self.stats['imported'] = self.target_cursor.fetchone()[0]
+        self.stats['ignored'] = self.stats['total'] - self.stats['imported']
+
+        print "Couldn't import the following items, no such (or un-migrated) collection:"
+
+        query = "SELECT p.Cote_Phono, p.Cote_Support FROM %s.Phono AS p " \
+                "LEFT JOIN media_collections AS c ON p.Cote_Support = c.old_code " \
+                "WHERE c.old_code IS NULL" % self.src_db_name
+
+        self.target_cursor.execute(query);
+                    
+        while True:
+            row = self.target_cursor.fetchone()
+            if not row:
+                break
+            print "    %s (collection: %s)" % (row[0], row[1])
+
+class ItemsEnumMapper(EnumMapper):
+    """Map simple enumerations into the items table"""
+
+    implements(IDataMigrator)
+
+    map = [
+        ('Ethnie_GrSocial',  'ethnic_group:name'),
+        ('Form_Genr_Style',  'vernacular_style'),
+        ('FormStyl generi',  'generic_style')
+    ]
+   
+    def get_name(self):
+        return "items:enums"
+
+    def process(self):
+        EnumMapper.process(self, 'Phono', 'Cote_Phono', 'media_items', self.map)
+