From 3dfbc62b6050f8cb385c9ff9181af0d884395705 Mon Sep 17 00:00:00 2001 From: yomguy Date: Wed, 6 Jan 2010 16:45:47 +0000 Subject: [PATCH] add first audio crem checker tools git-svn-id: http://svn.parisson.org/svn/crem@130 3bf09e05-f825-4182-b9bc-eedd7160adf0 --- trunk/import/audio_import/crem_checker.py | 209 ++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100755 trunk/import/audio_import/crem_checker.py diff --git a/trunk/import/audio_import/crem_checker.py b/trunk/import/audio_import/crem_checker.py new file mode 100755 index 0000000..f82aab6 --- /dev/null +++ b/trunk/import/audio_import/crem_checker.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import re +import sys +import csv +import xlrd +import logging +import scikits.audiolab as audiolab + + +ITEM_PATTERN = [ + { 'format': 'CNRSMH_I_aaaa_nnn_mmm', 'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})_([0-9]{3})$'}, + { 'format': 'CNRSMH_I_aaaa_nnn_mmm_tt', 'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})$'}, + { 'format': 'CNRSMH_I_aaaa_nnn_mmm_tt_pp', 'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})_([0-9]{2})$'}, + { 'format': 'CNRSMH_E_aaaa_nnn_mmm_tt', 'regex': r'^(CNRSMH)_E_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})$'}, + { 'format': 'CNRSMH_E_aaaa_nnn_mmm_tt_pp', 'regex': r'^(CNRSMH)_E_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})_([0-9]{2})$'}, + ] + +COLLECTION_PATTERN = [ + { 'format': 'CNRSMH_I_aaaa_nnn', 'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})$'}, + { 'format': 'CNRSMH_E_aaaa_nnn_mmm', 'regex': r'^(CNRSMH)_E_([0-9]{4})_([0-9]{3})_([0-9]{3})$'}, + ] + +def check_name(patterns, name): + match = False + for pattern in patterns: + match = re.match(pattern['regex'], name) + if match: + break + return match + + +class Logger: + """A logging object""" + + def __init__(self, file): + self.logger = logging.getLogger('myapp') + self.hdlr = logging.FileHandler(file) + self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') + self.hdlr.setFormatter(self.formatter) + self.logger.addHandler(self.hdlr) + self.logger.setLevel(logging.INFO) + + def write(self, prefix, message): + self.logger.info(prefix + ' : ' + message.decode('utf8')) + + def write_error(self, prefix, message): + self.logger.info(prefix + ' : ERROR : ' + message.decode('utf8')) + + def new_collection(self, collection): + self.logger.info('***** ' + collection + '*****') + + +class CremCollection: + + def __init__(self, dir): + self.dir = dir + self.dir_name = self.dir.split(os.sep)[-1] + self.file_list = os.listdir(self.dir) + + def xls_list(self): + file_list = [] + for file in self.file_list: + ext = os.path.splitext(file)[1] + if ext == '.xls' or ext == '.XLS': + file_list.append(file) + return file_list + + def wav_list(self): + list = [] + for file in self.file_list: + ext = os.path.splitext(file)[1] + if ext == '.wav' or ext == '.WAV': + list.append(file) + return list + + +class CremCSV: + + def set_file(self, file): + self.csv_file = open(file, 'w') + self.csv = csv.writer(self.csv_file, delimiter=';') + + +class CremXLS: + + def __init__(self, file): + self.first_row = 7 + self.original_col = 0 + self.new_col = 3 + self.book = xlrd.open_workbook(file) + self.sheet = self.book.sheet_by_index(0) + self.original_refs = self.original_refs() + self.new_refs = self.new_refs() + + def original_refs(self): + col = self.sheet.col(0) + list = [] + for cell in col[self.first_row:]: + if cell.ctype == 1: + list.append(cell.value) + return list + + def new_refs(self): + col = self.sheet.col(2) + list = [] + for cell in col[self.first_row:]: + if cell.ctype == 1: + list.append(cell.value) + return list + + +class CremItemFile: + + def __init__(self, media): + self.media = media + + def is_wav(self): + try: + self.audio_file = audiolab.sndfile(self.media, 'read') + if self.audio_file.get_nframes() and self.audio_file.get_nframes() != 0: + return True + except IOError: + return False + + def properties(self): + self.frames = self.audio_file.get_nframes() + self.samplerate = self.audio_file.get_samplerate() + self.channels = self.audio_file.get_channels() + self.format = self.audio_file.get_file_format() + self.encoding = self.audio_file.get_encoding() + + + +class CremCheck: + + def __init__(self, root_dir, log_file): + self.root_dir = root_dir + self.logger = Logger(log_file) + self.dir_list = os.listdir(self.root_dir) + + def check_wav(self): + pass + + def check_new_refs(self): + for name in self.new_refs: + return check_name(ITEM_PATTERN, name) + + def check(self): + for dir in self.dir_list: + collection = CremCollection(self.root_dir + dir) + #self.logger.write(collection.dir, 'VERIFICATION') + xls_list = collection.xls_list() + wav_list = collection.wav_list() + + # check collection dir name + if not check_name(COLLECTION_PATTERN, dir): + self.logger.write_error(collection.dir, 'Dossier mal nommé') + break + + # check if XLS file exists + if len(xls_list) == 0: + self.logger.write_error(collection.dir, 'PAS de fichier XLS dans le dossier collection') + break + elif len(xls_list) > 1: + self.logger.write_error(collection.dir, 'Plusieurs fichiers XLS dans le dossier collection') + break + else: + xls = CremXLS(self.root_dir + os.sep + dir + os.sep + xls_list[0]) + + # check if number of refs equals the number of WAV files + if len(wav_list) != len(xls.new_refs): + print len(wav_list) + print len(xls.new_refs) + self.logger.write_error(collection.dir, 'Le nombre de références du fichier XLS diffère du nombre de fichiers') + + temp_list = [] + + for item in xls.new_refs: + + # check new items refs + if not check_name(ITEM_PATTERN, item): + self.logger.write_error(collection.dir, 'La référence ' + str(item) + ' est mal formattée') + if item[0:17] != collection.dir_name : + self.logger.write_error(collection.dir, 'La référence ' + str(item) + ' ne correspond pas à la collection') + + # check if WAV files exist + name_wav = item + '.wav' + name_WAV = item + '.WAV' + if not name_wav in wav_list or name_WAV in wav_list: + self.logger.write_error(collection.dir, 'Le fichier ' + str(item) + '.wav n\'existe pas') + + # check unicity + if not item in temp_list: + temp_list.append(item) + else: + self.logger.write_error(collection.dir, 'La référence ' + str(item) + ' est multiple') + + #self.logger.write(collection.dir, 'FIN') + + +def main(): + c = CremCheck(sys.argv[-2], sys.argv[-1]) + c.check() + +if __name__ == '__main__': + main() -- 2.39.5