From 111c0db86071d450d535885c3bfb24fa3e1a5632 Mon Sep 17 00:00:00 2001 From: olivier Date: Tue, 6 May 2008 17:07:52 +0000 Subject: [PATCH] using CNRSMH_I_YYYY_NNN for unpublished collections git-svn-id: http://svn.parisson.org/svn/crem@17 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- docref/tools/analyse_collection_ids.py | 30 ++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/docref/tools/analyse_collection_ids.py b/docref/tools/analyse_collection_ids.py index e5dfeaf..d180f55 100644 --- a/docref/tools/analyse_collection_ids.py +++ b/docref/tools/analyse_collection_ids.py @@ -9,6 +9,7 @@ CREATE TABLE `coll` ( `format` varchar(255) default NULL, `regex` varchar(255) default NULL, `new_id` varchar(64) default NULL, + `dup` BOOLEAN NOT NULL, UNIQUE KEY `new_id` (`new_id`), UNIQUE KEY `cote` (`cote`) ) @@ -19,6 +20,7 @@ collection_patterns = [ { 'format': 'BM.aaa.nnn.mmm', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})$'}, { 'format': 'BM.aaaa.nnn.mmm/pp', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/[0-9]{2}$'}, { 'format': 'BM.aaaa.nnn.mmm', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})$'}, + { 'format': 'BM.aaaa.nnn.mmm/', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/$'}, { 'format': 'BM.aaaa.nnn.mmm/ppp', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/[0-9]{3}$'}, { 'format': 'BM.aaaa.nnn.mm/pp', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{2})/[0-9]{2}$'}, { 'format': 'BM.aaaa.nnn', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})$'}, @@ -47,7 +49,7 @@ db = MySQLdb.connect(user='root', db='test'); rcursor = db.cursor() wcursor = db.cursor() -wcursor.execute("UPDATE coll SET format='', new_id = NULL") +wcursor.execute("UPDATE coll SET format='', new_id = NULL, dup = 0") nrow = 0 for pattern in collection_patterns: @@ -97,10 +99,11 @@ for pattern in collection_patterns: tokens.append(str(serial).rjust(3, '0')) - if physical != -1: - tokens.append(str(physical).rjust(3, '0')) - else: - tokens.append('001') + if published: + if physical != -1: + tokens.append(str(physical).rjust(3, '0')) + else: + tokens.append('001') new_id = '_'.join(tokens) @@ -109,22 +112,31 @@ for pattern in collection_patterns: except _mysql_exceptions.IntegrityError, (errno, errstr): if errno == 1062: stdout.write(' duplicate entry: ' + id + ' -> ' + new_id + '\n') + wcursor.execute("UPDATE coll SET dup = 1 WHERE cote = %s", (id,)) else: raise row = rcursor.fetchone() nrow += 1 -rcursor.execute("SELECT format, COUNT(*) FROM coll GROUP BY format"); +rcursor.execute("SELECT format, cote, COUNT(*), SUM(dup) FROM coll GROUP BY format"); stdout.write("\nFORMAT STATS:\n") row = rcursor.fetchone() +stdout.write("format\texample\tcount\tduplicates\n"); while row: - stdout.write(row[0] + ": " + str(row[1]) + '\n') + stdout.write(row[0] + "\t" + row[1] + "\t" + str(row[2]) + "\t" + str(row[3]) + '\n') row = rcursor.fetchone() -rcursor.execute("SELECT cote FROM coll WHERE new_id IS NULL") -stdout.write("\nUNCONVERTED IDs:\n") +rcursor.execute("SELECT cote FROM coll WHERE new_id IS NULL AND dup = 1") +stdout.write("\nUNCONVERTED IDs (duplicates):\n") +row = rcursor.fetchone() +while row: + stdout.write(row[0] + '\n') + row = rcursor.fetchone() + +rcursor.execute("SELECT cote FROM coll WHERE new_id IS NULL AND dup = 0") +stdout.write("\nUNCONVERTED IDs (non duplicates):\n") row = rcursor.fetchone() while row: stdout.write(row[0] + '\n') -- 2.39.5