]> git.parisson.com Git - telemeta-data.git/commitdiff
add first audio crem checker tools
authoryomguy <yomguy@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Wed, 6 Jan 2010 16:45:47 +0000 (16:45 +0000)
committeryomguy <yomguy@3bf09e05-f825-4182-b9bc-eedd7160adf0>
Wed, 6 Jan 2010 16:45:47 +0000 (16:45 +0000)
git-svn-id: http://svn.parisson.org/svn/crem@130 3bf09e05-f825-4182-b9bc-eedd7160adf0

trunk/import/audio_import/crem_checker.py [new file with mode: 0755]

diff --git a/trunk/import/audio_import/crem_checker.py b/trunk/import/audio_import/crem_checker.py
new file mode 100755 (executable)
index 0000000..f82aab6
--- /dev/null
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import re
+import sys
+import csv
+import xlrd
+import logging
+import scikits.audiolab as audiolab
+
+
+ITEM_PATTERN = [
+        { 'format': 'CNRSMH_I_aaaa_nnn_mmm',           'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})_([0-9]{3})$'},
+        { 'format': 'CNRSMH_I_aaaa_nnn_mmm_tt',        'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})$'},
+        { 'format': 'CNRSMH_I_aaaa_nnn_mmm_tt_pp',     'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})_([0-9]{2})$'},
+        { 'format': 'CNRSMH_E_aaaa_nnn_mmm_tt',        'regex': r'^(CNRSMH)_E_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})$'},
+        { 'format': 'CNRSMH_E_aaaa_nnn_mmm_tt_pp',     'regex': r'^(CNRSMH)_E_([0-9]{4})_([0-9]{3})_([0-9]{3})_([0-9]{2})_([0-9]{2})$'},
+        ]
+
+COLLECTION_PATTERN = [
+        { 'format': 'CNRSMH_I_aaaa_nnn',           'regex': r'^(CNRSMH)_I_([0-9]{4})_([0-9]{3})$'},
+        { 'format': 'CNRSMH_E_aaaa_nnn_mmm',        'regex': r'^(CNRSMH)_E_([0-9]{4})_([0-9]{3})_([0-9]{3})$'},
+        ]
+
+def check_name(patterns, name):
+    match = False
+    for pattern in patterns:
+        match = re.match(pattern['regex'], name)
+        if match:
+            break
+    return match
+
+
+class Logger:
+    """A logging object"""
+
+    def __init__(self, file):
+        self.logger = logging.getLogger('myapp')
+        self.hdlr = logging.FileHandler(file)
+        self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
+        self.hdlr.setFormatter(self.formatter)
+        self.logger.addHandler(self.hdlr)
+        self.logger.setLevel(logging.INFO)
+
+    def write(self, prefix, message):
+        self.logger.info(prefix + ' : ' + message.decode('utf8'))
+
+    def write_error(self, prefix, message):
+        self.logger.info(prefix + ' : ERROR : ' + message.decode('utf8'))
+
+    def new_collection(self, collection):
+        self.logger.info('***** ' + collection + '*****')
+
+
+class CremCollection:
+
+    def __init__(self, dir):
+        self.dir = dir
+        self.dir_name = self.dir.split(os.sep)[-1]
+        self.file_list = os.listdir(self.dir)
+
+    def xls_list(self):
+        file_list = []
+        for file in self.file_list:
+            ext = os.path.splitext(file)[1]
+            if ext == '.xls' or ext == '.XLS':
+                file_list.append(file)
+        return file_list
+
+    def wav_list(self):
+        list = []
+        for file in self.file_list:
+            ext = os.path.splitext(file)[1]
+            if ext == '.wav' or ext == '.WAV':
+                list.append(file)
+        return list
+
+
+class CremCSV:
+
+    def set_file(self, file):
+        self.csv_file = open(file, 'w')
+        self.csv = csv.writer(self.csv_file,  delimiter=';')
+
+
+class CremXLS:
+
+    def __init__(self, file):
+        self.first_row = 7
+        self.original_col = 0
+        self.new_col = 3
+        self.book = xlrd.open_workbook(file)
+        self.sheet = self.book.sheet_by_index(0)
+        self.original_refs = self.original_refs()
+        self.new_refs = self.new_refs()
+
+    def original_refs(self):
+        col = self.sheet.col(0)
+        list = []
+        for cell in col[self.first_row:]:
+            if cell.ctype == 1:
+                list.append(cell.value)
+        return list
+
+    def new_refs(self):
+        col = self.sheet.col(2)
+        list = []
+        for cell in col[self.first_row:]:
+            if cell.ctype == 1:
+                list.append(cell.value)
+        return list
+
+
+class CremItemFile:
+
+    def __init__(self, media):
+        self.media = media
+
+    def is_wav(self):
+        try:
+            self.audio_file = audiolab.sndfile(self.media, 'read')
+            if self.audio_file.get_nframes() and self.audio_file.get_nframes() != 0:
+                return True
+        except IOError:
+            return False
+
+    def properties(self):
+        self.frames = self.audio_file.get_nframes()
+        self.samplerate = self.audio_file.get_samplerate()
+        self.channels = self.audio_file.get_channels()
+        self.format = self.audio_file.get_file_format()
+        self.encoding = self.audio_file.get_encoding()
+
+
+
+class CremCheck:
+
+    def __init__(self, root_dir, log_file):
+        self.root_dir = root_dir
+        self.logger = Logger(log_file)
+        self.dir_list = os.listdir(self.root_dir)
+
+    def check_wav(self):
+        pass
+
+    def check_new_refs(self):
+        for name in self.new_refs:
+            return check_name(ITEM_PATTERN, name)
+
+    def check(self):
+        for dir in self.dir_list:
+            collection = CremCollection(self.root_dir + dir)
+            #self.logger.write(collection.dir, 'VERIFICATION')
+            xls_list = collection.xls_list()
+            wav_list = collection.wav_list()
+
+            # check collection dir name
+            if not check_name(COLLECTION_PATTERN, dir):
+                self.logger.write_error(collection.dir, 'Dossier mal nommé')
+                break
+
+            # check if XLS file exists
+            if len(xls_list) == 0:
+                self.logger.write_error(collection.dir, 'PAS de fichier XLS dans le dossier collection')
+                break
+            elif len(xls_list) > 1:
+                self.logger.write_error(collection.dir, 'Plusieurs fichiers XLS dans le dossier collection')
+                break
+            else:
+                xls = CremXLS(self.root_dir + os.sep + dir + os.sep + xls_list[0])
+
+                # check if number of refs equals the number of WAV files
+                if len(wav_list) != len(xls.new_refs):
+                    print len(wav_list)
+                    print len(xls.new_refs)
+                    self.logger.write_error(collection.dir, 'Le nombre de références du fichier XLS diffère du nombre de fichiers')
+
+                temp_list = []
+
+                for item in xls.new_refs:
+
+                    # check new items refs
+                    if not check_name(ITEM_PATTERN, item):
+                        self.logger.write_error(collection.dir, 'La référence ' + str(item) + ' est mal formattée')
+                    if item[0:17] != collection.dir_name :
+                        self.logger.write_error(collection.dir, 'La référence ' + str(item) + ' ne correspond pas à la collection')
+
+                    # check if WAV files exist
+                    name_wav = item + '.wav'
+                    name_WAV = item + '.WAV'
+                    if not name_wav in wav_list or name_WAV in wav_list:
+                        self.logger.write_error(collection.dir, 'Le fichier ' + str(item) + '.wav n\'existe pas')
+
+                    # check unicity
+                    if not item in temp_list:
+                        temp_list.append(item)
+                    else:
+                        self.logger.write_error(collection.dir, 'La référence ' + str(item) + ' est multiple')
+
+            #self.logger.write(collection.dir, 'FIN')
+
+
+def main():
+    c = CremCheck(sys.argv[-2], sys.argv[-1])
+    c.check()
+
+if __name__ == '__main__':
+    main()