From: olivier Date: Wed, 10 Jun 2009 18:04:54 +0000 (+0000) Subject: migration: add items raw copy and enumerations mapper X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=f2ae9e844aefaf997c41b2dd55e33f4ff1fd9f20;p=telemeta-data.git migration: add items raw copy and enumerations mapper git-svn-id: http://svn.parisson.org/svn/crem@95 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- diff --git a/trunk/import/migration/tasks/__init__.py b/trunk/import/migration/tasks/__init__.py index e531bb5..5598a9f 100644 --- a/trunk/import/migration/tasks/__init__.py +++ b/trunk/import/migration/tasks/__init__.py @@ -37,3 +37,4 @@ import geoethno import ethnic import publishers import collections +import items diff --git a/trunk/import/migration/tasks/collections.py b/trunk/import/migration/tasks/collections.py index 999ccb5..e37bfb4 100644 --- a/trunk/import/migration/tasks/collections.py +++ b/trunk/import/migration/tasks/collections.py @@ -33,7 +33,7 @@ from telemeta.core import * from api import IDataMigrator -from core import DataMigrator +from core import DataMigrator, EnumMapper from _mysql_exceptions import IntegrityError from MySQLdb.constants.ER import DUP_ENTRY import re @@ -71,15 +71,6 @@ class CollectionsCopyMigrator(DataMigrator): def get_name(self): return "collections:copy" - def build_assignments(self, map): - assign = [] - for f1, f2 in map: - f2 = '`%s`' % f2 - f1 = '`%s`' % f1 - assign.append((f2, f1)) - - return assign - def process(self): assign = self.build_assignments(self.map) @@ -117,7 +108,7 @@ class CollectionsCopyMigrator(DataMigrator): else: raise e -class CollectionsEnumMapper(DataMigrator): +class CollectionsEnumMapper(EnumMapper): """Map simple enumerations into the collections table""" implements(IDataMigrator) @@ -125,8 +116,6 @@ class CollectionsEnumMapper(DataMigrator): map = [ ('Format', 'physical_format'), ('Reedition', 'publishing_status'), - #('Editeur', 'publisher'), - #('Collect_Série', 'publisher_collection'), ('Mode_Acqui', 'acquisition_mode'), ('Redacteur_Fiche', 'metadata_author'), ('Saisie_Fiche', 'metadata_writer'), @@ -139,47 +128,7 @@ class CollectionsEnumMapper(DataMigrator): return "collections:enums" def process(self): - buffer_size = 200 - offset = 0 - - src_fields = [] - target_fields = [] - enum_tables = [] - for src_field, target_base in self.map: - src_fields.append(src_field) - target_fields.append('`%s_id`' % target_base) - if target_base[-1] == 's': - enum_tables.append(target_base) - else: - enum_tables.append(target_base + 's') - - while not offset or self.src_cursor.rowcount: - self.src_cursor.execute("SELECT Cote, %s FROM Support LIMIT %d, %d" % ( - ", ".join(src_fields), offset, buffer_size)) - while True: - row = self.src_cursor.fetchone() - if not row: - break - code = row[0] - for i in range(0, len(row) - 1): - value = row[i + 1] - if value and len(value) > 0: - self.target_cursor.execute("SELECT id FROM " + enum_tables[i] + - " WHERE value = %s", (value,)) - idrow = self.target_cursor.fetchone() - if idrow: - self.target_cursor.execute("UPDATE media_collections SET " - + target_fields[i] + " = %s " - "WHERE code = %s", (idrow[0], code)) - if self.target_cursor.rowcount > 1: - raise Exception("Updated more than one row, this shouldn't happen..") - elif not self.target_cursor.rowcount: - print "Can't find migrated collection: %s" % code - else: - print "Can't find value '%s' in %s" % (value, enum_tables[i]) - - offset += self.src_cursor.rowcount - self.step() + EnumMapper.process(self, 'Support', 'Cote', 'media_collections', self.map) class CollectionsCodeConverter(DataMigrator): """Convert old to new-style collection codes""" diff --git a/trunk/import/migration/tasks/core.py b/trunk/import/migration/tasks/core.py index d9f1773..85fdad3 100644 --- a/trunk/import/migration/tasks/core.py +++ b/trunk/import/migration/tasks/core.py @@ -50,7 +50,15 @@ class DataMigrationTask(Component): sys.stdout.flush() class DataMigrator(DataMigrationTask): - pass + + def build_assignments(self, map): + assign = [] + for f1, f2 in map: + f2 = '`%s`' % f2 + f1 = '`%s`' % f1 + assign.append((f2, f1)) + + return assign class DataInitializer(DataMigrationTask): pass @@ -104,3 +112,57 @@ class GroupedItemsManager(object): for i in self.groups: nitems += len(self.groups[i]) return nitems + +class EnumMapper(DataMigrator): + """Map simple enumerations""" + + def process(self, src_table, src_id_field, target_table, map): + buffer_size = 200 + offset = 0 + + src_fields = [] + target_fields = [] + enum_tables = [] + enum_value_fields = [] + for src_field, target_def in map: + src_fields.append('`%s`' % src_field) + cut = target_def.split(':') + target_base = cut[0] + if len(cut) > 1: + enum_value_fields.append(cut[1]) + else: + enum_value_fields.append('value') + + target_fields.append('`%s_id`' % target_base) + if target_base[-1] == 's': + enum_tables.append(target_base) + else: + enum_tables.append(target_base + 's') + + while not offset or self.src_cursor.rowcount: + self.src_cursor.execute("SELECT %s, %s FROM %s LIMIT %d, %d" % ( + src_id_field, ", ".join(src_fields), src_table, offset, buffer_size)) + while True: + row = self.src_cursor.fetchone() + if not row: + break + code = row[0] + for i in range(0, len(row) - 1): + value = row[i + 1] + if value and len(value) > 0: + self.target_cursor.execute("SELECT id FROM " + enum_tables[i] + + " WHERE " + enum_value_fields[i] + " = %s", (value,)) + idrow = self.target_cursor.fetchone() + if idrow: + self.target_cursor.execute("UPDATE " + target_table + " SET " + + target_fields[i] + " = %s " + "WHERE old_code = %s", (idrow[0], code)) + if self.target_cursor.rowcount > 1: + raise Exception("Updated more than one row, this shouldn't happen..") + elif not self.target_cursor.rowcount: + print "Can't find migrated collection: %s" % code + else: + print "Can't find value '%s' in %s" % (value, enum_tables[i]) + + offset += self.src_cursor.rowcount + self.step() diff --git a/trunk/import/migration/tasks/items.py b/trunk/import/migration/tasks/items.py new file mode 100644 index 0000000..d576f82 --- /dev/null +++ b/trunk/import/migration/tasks/items.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +# +# CREM Database migrator + +# Copyright (C) 2009 Samalyse SARL +# Author: Olivier Guilyardi + +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". + +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. + +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. + +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +from telemeta.core import * +from api import IDataMigrator +from core import DataMigrator, EnumMapper +from _mysql_exceptions import IntegrityError +from MySQLdb.constants.ER import DUP_ENTRY + +class ItemsCopyMigrator(DataMigrator): + """Perform a preliminary raw copy of the item table""" + + implements(IDataMigrator) + + map = [ + ('Face_Plage', 'track'), + ('Cote_Phono', 'old_code'), + ('Duree', 'approx_duration'), + ('Titre_piece', 'title'), + ('Transcrip_Trad', 'alt_title'), + ('Auteur', 'author'), + ('Comm_FonctUsage', 'context_comment'), + ('Documentation', 'external_references'), + ('Moda Execut', 'moda_execut'), + ('Enregistre_par', 'collector'), + ('Aire_Geo_Cult', 'cultural_area'), + ('ChoixCollecteur', 'collector_selection'), + ('NroBand NroPiec', 'creator_reference') + ] + + def get_name(self): + return "items:copy" + + def process(self): + assign = self.build_assignments(self.map) + target_fields = [str(a[0]) for a in assign] + src_fields = [str(a[1]) for a in assign] + + self.target_cursor.execute("DELETE FROM media_items") + + self.src_cursor.execute("SELECT COUNT(*) FROM Phono") + count = self.src_cursor.fetchone()[0] + self.stats = { 'total': count, 'imported': 0, 'ignored': 0} + + query = "INSERT INTO media_items (\n collection_id,\n %s\n)\n" \ + "SELECT \n c.id,\n %s\n FROM %s.Phono AS p " \ + "INNER JOIN media_collections AS c ON p.Cote_Support = c.old_code " % ( + ",\n ".join(target_fields), + ",\n ".join(src_fields), + self.src_db_name) + + self.target_cursor.execute(query) + + self.target_cursor.execute("SELECT COUNT(*) FROM media_items") + self.stats['imported'] = self.target_cursor.fetchone()[0] + self.stats['ignored'] = self.stats['total'] - self.stats['imported'] + + print "Couldn't import the following items, no such (or un-migrated) collection:" + + query = "SELECT p.Cote_Phono, p.Cote_Support FROM %s.Phono AS p " \ + "LEFT JOIN media_collections AS c ON p.Cote_Support = c.old_code " \ + "WHERE c.old_code IS NULL" % self.src_db_name + + self.target_cursor.execute(query); + + while True: + row = self.target_cursor.fetchone() + if not row: + break + print " %s (collection: %s)" % (row[0], row[1]) + +class ItemsEnumMapper(EnumMapper): + """Map simple enumerations into the items table""" + + implements(IDataMigrator) + + map = [ + ('Ethnie_GrSocial', 'ethnic_group:name'), + ('Form_Genr_Style', 'vernacular_style'), + ('FormStyl generi', 'generic_style') + ] + + def get_name(self): + return "items:enums" + + def process(self): + EnumMapper.process(self, 'Phono', 'Cote_Phono', 'media_items', self.map) +