from core import DataMigrator
from _mysql_exceptions import IntegrityError
from MySQLdb.constants.ER import DUP_ENTRY
+import re
class CollectionsCopyMigrator(DataMigrator):
"""Perform a preliminary raw copy of the collection table"""
target_fields = [str(a[0]) for a in assign]
src_fields = [str(a[1]) for a in assign]
+ self.target_cursor.execute("DELETE FROM media_collections")
+
self.src_cursor.execute("SELECT COUNT(*) FROM %s.Support" % self.src_db_name)
count = self.src_cursor.fetchone()[0]
self.stats = { 'total': count, 'imported': 0, 'ignored': 0}
for offset in range(0, count):
+ if offset % 400 == 0:
+ self.step()
+
query = "INSERT INTO %s.media_collections (\n %s\n)\n" \
"SELECT \n %s\n FROM %s.Support AS s LIMIT %d, 1" % (
self.target_db_name,
offset += self.src_cursor.rowcount
self.step()
-
+class CollectionsCodeConverter(DataMigrator):
+ """Convert old to new-style collection codes"""
+
+ implements(IDataMigrator)
+
+ patterns = [
+ { 'format': 'BM.aaa.nnn.mmm', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})$'},
+ { 'format': 'BM.aaaa.nnn.mmm/pp', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/[0-9]{2}$'},
+ { 'format': 'BM.aaaa.nnn.mmm', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})$'},
+ { 'format': 'BM.aaaa.nnn.mmm/', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/$'},
+ { 'format': 'BM.aaaa.nnn.mmm/ppp', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/[0-9]{3}$'},
+ { 'format': 'BM.aaaa.nnn.mm/pp', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{2})/[0-9]{2}$'},
+ { 'format': 'BM.aaaa.nnn', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})$'},
+ { 'format': 'BM.aaa.nnn.mmm/pp', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})/[0-9]{2}$'},
+ { 'format': 'BM.aaa.nnn FANTOME', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3}) FANTOME$'},
+ { 'format': 'BM.aaa.nnn', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})$'},
+ { 'format': 'BM.aaa.nnnBISoo/pp', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})BIS([0-9]{2})/[0-9]{2}$'},
+ { 'format': 'BM.aaa.nnn.mmm.ppp', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})\.[0-9]{3}$'},
+ { 'format': 'BM.aaa.nnn.mmm/ppp', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})/[0-9]{3}$'},
+ { 'format': 'BM.aaa.nnn/pp', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})/[0-9]{2}$'},
+ { 'format': 'BM.aaa.nnn-BIS.ooo/pp', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})-BIS\.([0-9]{3})/[0-9]{2}$'},
+ { 'format': 'BM.aaaa.nnn.mmm/NN', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/NN$'},
+ { 'format': 'BM.aaa.nnn.mmm/pp-DEPOT', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})/[0-9]{2}-DEPOT$'},
+ { 'format': 'BM.aaa.nnn.mmm-o>p', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})-[0-9]>[0-9]$'},
+ { 'format': 'CY.aaaa.nnn', 'regex': r'^(CY)\.([0-9]{4})\.([0-9]{3})$'},
+ { 'format': 'DI.aaaa.nnn.mmm', 'regex': r'^(DI)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})$'},
+ { 'format': 'DI.aaaa.nnn.mmm/pp', 'regex': r'^(DI)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/[0-9]{2}$'},
+ { 'format': 'DI.aaa.nnn.mmm', 'regex': r'^(DI)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})$'},
+ { 'format': 'DI.aaa.nnn.mmm/pp', 'regex': r'^(DI)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})/[0-9]{2}$'},
+ { 'format': 'DI.aaa.nnn.mmm-o/p', 'regex': r'^(DI)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})-[0-9]/[0-9]$'},
+ { 'format': 'FANTOME 2*', 'regex': r'FANTOME 2\*$'},
+ ]
+
+ def get_name(self):
+ return "collections:code"
+
+ def process(self):
+
+ self.target_cursor.execute("SELECT COUNT(*) FROM media_collections")
+
+ self.stats = {
+ 'total': self.target_cursor.fetchone()[0],
+ 'matched': 0,
+ 'unmatched': 0,
+ 'converted': 0,
+ 'removed': 0,
+ 'remaining': 0
+ }
+ read_cursor = self.target_db.cursor()
+ for pattern in self.patterns:
+ read_cursor.execute("SELECT old_code FROM media_collections WHERE old_code REGEXP %s",
+ (pattern['regex'],))
+ while True:
+ row = read_cursor.fetchone()
+ if not row:
+ break
+
+ self.stats['matched'] += 1
+ if self.stats['matched'] % 500 == 0:
+ self.step()
+
+ old_code = row[0]
+ match = re.match(pattern['regex'], old_code)
+
+ published = False
+ year = -1
+ serial = -1
+ physical = -1
+
+ if (match.lastindex >= 1):
+ published = (match.group(1) == 'DI')
+ if (match.lastindex >= 2):
+ year = int(match.group(2))
+ if (match.lastindex >= 3):
+ serial = int(match.group(3))
+ if (match.lastindex >= 4):
+ physical = int(match.group(4))
+
+
+ if (year == -1 or serial == -1):
+ self.target_cursor.execute("DELETE FROM media_collections WHERE old_code = %s", (old_code,));
+ print 'Removed record, old code is missing year or serial: %s' % old_code
+ self.stats['removed'] += 1
+ continue
+ tokens = []
+ if published:
+ tokens.append('CNRSMH_E')
+ else:
+ tokens.append('CNRSMH_I')
+
+ if year < 1000:
+ if year < 100:
+ year += 2000
+ else:
+ year += 1000
+
+ tokens.append(str(year))
+ tokens.append(str(serial).rjust(3, '0'))
+
+ if published:
+ if physical != -1:
+ tokens.append(str(physical).rjust(3, '0'))
+ else:
+ tokens.append('001')
+
+ new_code = '_'.join(tokens)
+
+ try:
+ self.target_cursor.execute("UPDATE media_collections SET code = %s WHERE old_code = %s", (new_code, old_code))
+ self.stats['converted'] += 1
+ except IntegrityError, e:
+ (errno, errstr) = e
+ if errno == DUP_ENTRY:
+ self.target_cursor.execute("DELETE FROM media_collections WHERE old_code = %s", (old_code,));
+ print 'Removed record, code conversion caused a duplicate entry: %s -> %s' % (old_code, new_code)
+ self.stats['removed'] += 1
+ else:
+ raise e
+
+ self.stats['remaining'] = self.stats['total'] - self.stats['removed']
+ self.stats['unmatched'] = self.stats['total'] - self.stats['matched']