`format` varchar(255) default NULL,
`regex` varchar(255) default NULL,
`new_id` varchar(64) default NULL,
+ `dup` BOOLEAN NOT NULL,
UNIQUE KEY `new_id` (`new_id`),
UNIQUE KEY `cote` (`cote`)
)
{ 'format': 'BM.aaa.nnn.mmm', 'regex': r'^(BM)\.([0-9]{3})\.([0-9]{3})\.([0-9]{3})$'},
{ 'format': 'BM.aaaa.nnn.mmm/pp', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/[0-9]{2}$'},
{ 'format': 'BM.aaaa.nnn.mmm', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})$'},
+ { 'format': 'BM.aaaa.nnn.mmm/', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/$'},
{ 'format': 'BM.aaaa.nnn.mmm/ppp', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{3})/[0-9]{3}$'},
{ 'format': 'BM.aaaa.nnn.mm/pp', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})\.([0-9]{2})/[0-9]{2}$'},
{ 'format': 'BM.aaaa.nnn', 'regex': r'^(BM)\.([0-9]{4})\.([0-9]{3})$'},
rcursor = db.cursor()
wcursor = db.cursor()
-wcursor.execute("UPDATE coll SET format='', new_id = NULL")
+wcursor.execute("UPDATE coll SET format='', new_id = NULL, dup = 0")
nrow = 0
for pattern in collection_patterns:
tokens.append(str(serial).rjust(3, '0'))
- if physical != -1:
- tokens.append(str(physical).rjust(3, '0'))
- else:
- tokens.append('001')
+ if published:
+ if physical != -1:
+ tokens.append(str(physical).rjust(3, '0'))
+ else:
+ tokens.append('001')
new_id = '_'.join(tokens)
except _mysql_exceptions.IntegrityError, (errno, errstr):
if errno == 1062:
stdout.write(' duplicate entry: ' + id + ' -> ' + new_id + '\n')
+ wcursor.execute("UPDATE coll SET dup = 1 WHERE cote = %s", (id,))
else:
raise
row = rcursor.fetchone()
nrow += 1
-rcursor.execute("SELECT format, COUNT(*) FROM coll GROUP BY format");
+rcursor.execute("SELECT format, cote, COUNT(*), SUM(dup) FROM coll GROUP BY format");
stdout.write("\nFORMAT STATS:\n")
row = rcursor.fetchone()
+stdout.write("format\texample\tcount\tduplicates\n");
while row:
- stdout.write(row[0] + ": " + str(row[1]) + '\n')
+ stdout.write(row[0] + "\t" + row[1] + "\t" + str(row[2]) + "\t" + str(row[3]) + '\n')
row = rcursor.fetchone()
-rcursor.execute("SELECT cote FROM coll WHERE new_id IS NULL")
-stdout.write("\nUNCONVERTED IDs:\n")
+rcursor.execute("SELECT cote FROM coll WHERE new_id IS NULL AND dup = 1")
+stdout.write("\nUNCONVERTED IDs (duplicates):\n")
+row = rcursor.fetchone()
+while row:
+ stdout.write(row[0] + '\n')
+ row = rcursor.fetchone()
+
+rcursor.execute("SELECT cote FROM coll WHERE new_id IS NULL AND dup = 0")
+stdout.write("\nUNCONVERTED IDs (non duplicates):\n")
row = rcursor.fetchone()
while row:
stdout.write(row[0] + '\n')