--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import re
+import sys
+import csv
+import xlrd
+import logging
+import scikits.audiolab as audiolab
+
+
+ITEM_PATTERN = [
+ { 'format': 'CNRSMH_I_aaaa_nnn_mmm', 'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})_([0-9]{3})$'},
+ { 'format': 'CNRSMH_I_aaaa_nnn_mmm_tt', 'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})$'},
+ { 'format': 'CNRSMH_I_aaaa_nnn_mmm_tt_pp', 'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})_([0-9]{2})$'},
+ { 'format': 'CNRSMH_E_aaaa_nnn_mmm_tt', 'regex': r'^(CNRSMH)_E_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})$'},
+ { 'format': 'CNRSMH_E_aaaa_nnn_mmm_tt_pp', 'regex': r'^(CNRSMH)_E_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})_([0-9]{2})$'},
+ ]
+
+COLLECTION_PATTERN = [
+ { 'format': 'CNRSMH_I_aaaa_nnn', 'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})$'},
+ { 'format': 'CNRSMH_E_aaaa_nnn_mmm', 'regex': r'^(CNRSMH)_E_([0-9]{4})_([0-9]{3})_([0-9]{3})$'},
+ ]
+
+def check_name(patterns, name):
+ match = False
+ for pattern in patterns:
+ match = re.match(pattern['regex'], name)
+ if match:
+ break
+ return match
+
+
+class Logger:
+ """A logging object"""
+
+ def __init__(self, file):
+ self.logger = logging.getLogger('myapp')
+ self.hdlr = logging.FileHandler(file)
+ self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
+ self.hdlr.setFormatter(self.formatter)
+ self.logger.addHandler(self.hdlr)
+ self.logger.setLevel(logging.INFO)
+
+ def write(self, prefix, message):
+ self.logger.info(prefix + ' : ' + message.decode('utf8'))
+
+ def write_error(self, prefix, message):
+ self.logger.info(prefix + ' : ERROR : ' + message.decode('utf8'))
+
+ def new_collection(self, collection):
+ self.logger.info('***** ' + collection + '*****')
+
+
+class CremCollection:
+
+ def __init__(self, dir):
+ self.dir = dir
+ self.dir_name = self.dir.split(os.sep)[-1]
+ self.file_list = os.listdir(self.dir)
+
+ def xls_list(self):
+ file_list = []
+ for file in self.file_list:
+ ext = os.path.splitext(file)[1]
+ if ext == '.xls' or ext == '.XLS':
+ file_list.append(file)
+ return file_list
+
+ def wav_list(self):
+ list = []
+ for file in self.file_list:
+ ext = os.path.splitext(file)[1]
+ if ext == '.wav' or ext == '.WAV':
+ list.append(file)
+ return list
+
+
+class CremCSV:
+
+ def set_file(self, file):
+ self.csv_file = open(file, 'w')
+ self.csv = csv.writer(self.csv_file, delimiter=';')
+
+
+class CremXLS:
+
+ def __init__(self, file):
+ self.first_row = 7
+ self.original_col = 0
+ self.new_col = 3
+ self.book = xlrd.open_workbook(file)
+ self.sheet = self.book.sheet_by_index(0)
+ self.original_refs = self.original_refs()
+ self.new_refs = self.new_refs()
+
+ def original_refs(self):
+ col = self.sheet.col(0)
+ list = []
+ for cell in col[self.first_row:]:
+ if cell.ctype == 1:
+ list.append(cell.value)
+ return list
+
+ def new_refs(self):
+ col = self.sheet.col(2)
+ list = []
+ for cell in col[self.first_row:]:
+ if cell.ctype == 1:
+ list.append(cell.value)
+ return list
+
+
+class CremItemFile:
+
+ def __init__(self, media):
+ self.media = media
+
+ def is_wav(self):
+ try:
+ self.audio_file = audiolab.sndfile(self.media, 'read')
+ if self.audio_file.get_nframes() and self.audio_file.get_nframes() != 0:
+ return True
+ except IOError:
+ return False
+
+ def properties(self):
+ self.frames = self.audio_file.get_nframes()
+ self.samplerate = self.audio_file.get_samplerate()
+ self.channels = self.audio_file.get_channels()
+ self.format = self.audio_file.get_file_format()
+ self.encoding = self.audio_file.get_encoding()
+
+
+
+class CremCheck:
+
+ def __init__(self, root_dir, log_file):
+ self.root_dir = root_dir
+ self.logger = Logger(log_file)
+ self.dir_list = os.listdir(self.root_dir)
+
+ def check_wav(self):
+ pass
+
+ def check_new_refs(self):
+ for name in self.new_refs:
+ return check_name(ITEM_PATTERN, name)
+
+ def check(self):
+ for dir in self.dir_list:
+ collection = CremCollection(self.root_dir + dir)
+ #self.logger.write(collection.dir, 'VERIFICATION')
+ xls_list = collection.xls_list()
+ wav_list = collection.wav_list()
+
+ # check collection dir name
+ if not check_name(COLLECTION_PATTERN, dir):
+ self.logger.write_error(collection.dir, 'Dossier mal nommé')
+ break
+
+ # check if XLS file exists
+ if len(xls_list) == 0:
+ self.logger.write_error(collection.dir, 'PAS de fichier XLS dans le dossier collection')
+ break
+ elif len(xls_list) > 1:
+ self.logger.write_error(collection.dir, 'Plusieurs fichiers XLS dans le dossier collection')
+ break
+ else:
+ xls = CremXLS(self.root_dir + os.sep + dir + os.sep + xls_list[0])
+
+ # check if number of refs equals the number of WAV files
+ if len(wav_list) != len(xls.new_refs):
+ print len(wav_list)
+ print len(xls.new_refs)
+ self.logger.write_error(collection.dir, 'Le nombre de références du fichier XLS diffère du nombre de fichiers')
+
+ temp_list = []
+
+ for item in xls.new_refs:
+
+ # check new items refs
+ if not check_name(ITEM_PATTERN, item):
+ self.logger.write_error(collection.dir, 'La référence ' + str(item) + ' est mal formattée')
+ if item[0:17] != collection.dir_name :
+ self.logger.write_error(collection.dir, 'La référence ' + str(item) + ' ne correspond pas à la collection')
+
+ # check if WAV files exist
+ name_wav = item + '.wav'
+ name_WAV = item + '.WAV'
+ if not name_wav in wav_list or name_WAV in wav_list:
+ self.logger.write_error(collection.dir, 'Le fichier ' + str(item) + '.wav n\'existe pas')
+
+ # check unicity
+ if not item in temp_list:
+ temp_list.append(item)
+ else:
+ self.logger.write_error(collection.dir, 'La référence ' + str(item) + ' est multiple')
+
+ #self.logger.write(collection.dir, 'FIN')
+
+
+def main():
+ c = CremCheck(sys.argv[-2], sys.argv[-1])
+ c.check()
+
+if __name__ == '__main__':
+ main()