From: Guillaume Pellerin Date: Mon, 27 Apr 2015 16:38:18 +0000 (+0200) Subject: rename X-Git-Tag: 1.6a~4^2~105 X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=54e0700f84a2b1a2cffd1b150aeddc634a4c0b59;p=telemeta.git rename --- diff --git a/telemeta/management/commands/telemeta-import-collection-from-crem.py b/telemeta/management/commands/telemeta-import-collection-from-crem.py new file mode 100644 index 00000000..f73cd402 --- /dev/null +++ b/telemeta/management/commands/telemeta-import-collection-from-crem.py @@ -0,0 +1,234 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2010 Guillaume Pellerin +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://svn.parisson.org/telemeta/TelemetaLicense. +# +# Author: Guillaume Pellerin +# + +import logging +import codecs +import os +import sys +import csv +import logging +import datetime +from optparse import make_option + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError +from django.contrib.auth.models import User +from django.core.management import setup_environ +from django.core.files.base import ContentFile +from django.contrib.auth.models import User +from django.contrib.sites.models import Site +from django.template.defaultfilters import slugify + +from telemeta.models import * +from telemeta.util.unaccent import unaccent + + +class Logger: + + def __init__(self, file): + self.logger = logging.getLogger('myapp') + self.hdlr = logging.FileHandler(file) + self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') + self.hdlr.setFormatter(self.formatter) + self.logger.addHandler(self.hdlr) + self.logger.setLevel(logging.INFO) + + def info(self, prefix, message): + self.logger.info(' ' + prefix + ' : ' + message.decode('utf8')) + + def error(self, prefix, message): + self.logger.error(prefix + ' : ' + message.decode('utf8')) + + +class Command(BaseCommand): + + """Import CREM collections from collection directories containing media files + and eventually a XLS files representing the relation between old codes and new codes + """ + + help = "import CREM collections (special usecase)" + admin_email = 'webmaster@parisson.com' + media_root = settings.MEDIA_ROOT + + option_list = BaseCommand.option_list + ( + make_option('-d', '--dry-run', + action='store_true', + dest='dry-run', + help='Do NOT write anything'), + make_option('-f', '--force', + action='store_true', + dest='force', + help='Force overwrite data'), + make_option('-s', '--source', + dest='source_dir', + help='define the source directory'), + make_option('-l', '--log', + dest='log', + help='define log file'), + make_option('-p', '--pattern', + dest='pattern', + help='define the pattern'), + ) + + + def write_file(self, item, media): + filename = media.split(os.sep)[-1] + if os.path.exists(media): + if not item.file or self.force: + if not self.dry_run: + if not self.media_root in self.source_dir: + f = open(media, 'r') + file_content = ContentFile(f.read()) + item.file.save(filename, file_content) + f.close() + else: + path = media[len(self.media_root)+1:] + item.file = path + item.save() + item.set_revision(self.user) + else: + msg = item.code + " : pas d'écriture, utiliser l'option --write " + self.logger.info('item', msg) + else: + msg = item.code + ' : fichier ' + item.file.name + ' deja inscrit dans la base de donnees et pas de forcage !' + self.logger.info('item', msg) + else: + msg = item.code + ' : fichier audio ' + filename + ' inexistant dans le dossier !' + self.logger.error('item', msg) + + def handle(self, *args, **kwargs): + self.logger = Logger(kwargs.get('log')) + self.pattern = kwargs.get('pattern') + self.source_dir = kwargs.get('source_dir') + self.dry_run = kwargs.get('dry-run') + self.force = kwargs.get('force') + + self.domain = Site.objects.all()[0].domain + self.user = User.objects.filter(username='admin')[0] + self.collections = os.listdir(self.source_dir) + + collections = [] + for collection in self.collections: + collection_dir = self.source_dir + os.sep + collection + collection_files = os.listdir(collection_dir) + + + if not '/.' in collection_dir and self.pattern in collection_dir: + collection_name = collection.split(os.sep)[-1] + collections.append(collection_name) + c = MediaCollection.objects.filter(code=collection_name) + + if not c and collection + '.csv' in collection_files: + msg = collection + ' collection NON présente dans la base de données, SORTIE ' + self.logger.error(collection, msg) + sys.exit(msg) + elif not c: + msg = 'collection NON présente dans la base de données, CREATION ' + self.logger.info(collection, msg) + if not self.dry_run: + c = MediaCollection(code=collection_name, title=collection_name) + c.save() + c.set_revision(self.user) + else: + msg = 'collection présente dans la base de données, SELECTION' + self.logger.info(collection, msg) + + for collection in collections: + collection_dir = self.source_dir + os.sep + collection + collection_name = collection + collection_files = os.listdir(collection_dir) + msg = '************************ ' + collection + ' ******************************' + self.logger.info(collection, msg[:70]) + csv_file = '' + rows = {} + + if collection + '.csv' in collection_files: + csv_file = self.source_dir + os.sep + collection + os.sep + collection + '.csv' + csv_data = csv.reader(open(csv_file), delimiter=';') + for row in csv_data: + rows[row[1].strip()] = row[0].strip() + msg = collection + ' import du fichier CSV de la collection' + self.logger.info(collection, msg[:70]) + else: + msg = collection + ' pas de fichier CSV dans la collection' + self.logger.info(collection, msg[:70]) + + c = MediaCollection.objects.filter(code=collection_name) + if not c: + if not self.dry_run: + c = MediaCollection(code=collection_name) + c.save() + msg = ' collection NON présente dans la BDD, CREATION ' + self.logger.info(c.code, msg) + else: + c = c[0] + msg = ' id = '+str(c.id) + self.logger.info(c.code, msg) + + audio_files = [] + for file in collection_files: + ext = ['WAV', 'wav'] + if file.split('.')[-1] in ext and file[0] != '.': + audio_files.append(file) + + audio_files.sort() + nb_items = c.items.count() + counter = 0 + + for file in audio_files: + code = file.split('.')[0] + wav_file = self.source_dir + os.sep + collection + os.sep + file + + if len(audio_files) <= nb_items: + items = MediaItem.objects.filter(code=code) + + old_ref = '' + if code in rows and not items: + old_ref = rows[code] + items = MediaItem.objects.filter(old_code=old_ref) + + if items: + item = items[0] + msg = code + ' : ' + item.old_code + ' : Cas 1 ou 2 : id = ' + str(item.id) + self.logger.info('item', msg) + item.code = code + else: + item = MediaItem(code=code, collection=c) + msg = code + ' : ' + old_ref + ' : Cas 1 ou 2 : item NON présent dans la base de données, CREATION' + self.logger.info('item', msg) + + self.write_file(item, wav_file) + + elif nb_items == 1 and len(audio_files) > 1: + if counter == 0: + msg = code + ' : Cas 3a : item n°01 présent dans la base de données, PASSE' + self.logger.info('item', msg) + else: + item = MediaItem(code=code, collection=c) + msg = code + ' : Cas 3a : item NON présent dans la base de données, CREATION' + self.logger.info('item', msg) + self.write_file(item, wav_file) + + elif nb_items > 1 and nb_items < len(audio_files): + msg = code + ' : Cas 3b : nb items < nb de fichiers audio, PAS de creation' + self.logger.info('item', msg) + + counter += 1 + + msg = 'Liste des URLs des collections importées :' + self.logger.info('INFO', msg) + for collection in collections: + msg = 'http://'+self.domain+'/archives/collections/'+collection + self.logger.info(collection, msg) + + diff --git a/telemeta/management/commands/telemeta-import-corpus-epub.py b/telemeta/management/commands/telemeta-import-corpus-epub.py deleted file mode 100644 index 539ac87e..00000000 --- a/telemeta/management/commands/telemeta-import-corpus-epub.py +++ /dev/null @@ -1,141 +0,0 @@ -from optparse import make_option -from django.conf import settings -from django.core.management.base import BaseCommand, CommandError -from django.core.files.base import ContentFile -from telemeta.models import * -from telemeta.util.unaccent import unaccent -import os, re - -try: - from django.utils.text import slugify -except ImportError: - def slugify(string): - killed_chars = re.sub('[\(\),]', '', string) - return re.sub(' ', '_', killed_chars) - -def beautify(string): - return os.path.splitext(string)[0].replace('_',' ') - -def cleanup_dir(root_dir): - for resource in os.listdir(root_dir): - path = os.path.join(root_dir, resource) - if os.path.isdir(path): - new_path = path.replace(' ', '_') - new_path = new_path.replace('son_', '') - new_path = new_path.replace('son', '') - if new_path != path: - os.rename(path, new_path) - cleanup_dir(new_path) - -def trim_list(list): - new = [] - for item in list: - if item: - new.append(item) - return new - -def reset(): - for i in MediaItem.objects.all(): - i.delete() - for c in MediaCollection.objects.all(): - c.delete() - - -class Command(BaseCommand): - help = "import media files from a directory to a corpus" - args = "root_dir" - media_formats = ['mp3'] - image_formats = ['png', 'jpg'] - text_formats = ['txt'] - - def handle(self, *args, **options): - # NOT4PROD!! - reset() - - root_dir = args[-1] - cleanup_dir(root_dir) - chapters = os.listdir(root_dir) - - for chapter in chapters: - chapter_dir = os.path.join(root_dir, chapter) - metadata = {} - - for filename in os.listdir(chapter_dir): - path = os.path.join(chapter_dir, filename) - if os.path.isfile(path) and '.txt' == os.path.splitext(filename)[1]: - f = open(path, 'r') - i = 0 - for line in f.readlines(): - data = re.split(r'\t+', line.rstrip('\t')) - if i == 0: - chapter_title = data[1] - print chapter_title - else: - metadata[data[0]] = data[1:] - i += 1 - print metadata - break - - for root, dirs, files in os.walk(chapter_dir): - for media_file in files: - path = os.path.join(root, media_file) - - if ' ' in media_file: - new_media_file = media_file.replace(' ', '_') - new_media_path = os.path.join(root, new_media_file) - os.rename(path, new_media_path) - media_file = new_media_file - print media_file - - media_name = os.path.splitext(media_file)[0] - media_ext = os.path.splitext(media_file)[1][1:] - - if media_ext and media_ext in self.media_formats and media_name[0] != '.': - root_list = root.split(os.sep) - media_path = os.sep.join(root_list[-4:]) + os.sep + media_file - - item_name = root_list[-1] - collection_name = root_list[-2] - corpus_name = root_list[-3] - data = metadata[item_name] - - corpus_id = slugify(unicode(corpus_name)) - collection_id = corpus_id + '_' + slugify(unicode(collection_name)) - item_id = collection_id + '_' + slugify(unicode(item_name)) - - corpus, c = MediaCorpus.objects.get_or_create(code=corpus_id, title=corpus_name) - - collection_title = collection_name.replace('_', ' ') + ' : ' + chapter_title - print collection_title - collection, c = MediaCollection.objects.get_or_create(code=collection_id, title=collection_title) - if not collection in corpus.children.all(): - corpus.children.add(collection) - - item, c = MediaItem.objects.get_or_create(collection=collection, code=item_id) - if c: - item.old_code = item_name - # item.track = item_name - item.file = media_path - item.save() - - title = data[0].split('.') - item.title = title[0] - print data - item.track = data[1].replace('\n', '') - if len(title) > 1: - item.comment = '. '.join(title[1:]) - item.save() - - for related_file in os.listdir(root): - related_path = os.sep.join(root_list[-4:]) + os.sep + related_file - related_name = os.path.splitext(related_file)[0] - related_ext = os.path.splitext(related_file)[1][1:] - - if related_ext in self.image_formats: - related, c = MediaItemRelated.objects.get_or_create(item=item, file=related_path) - if c: - if len(data) > 2: - related.title = item.track - related.set_mime_type() - related.save() - diff --git a/telemeta/management/commands/telemeta-import-corpus-from-dir.py b/telemeta/management/commands/telemeta-import-corpus-from-dir.py new file mode 100644 index 00000000..539ac87e --- /dev/null +++ b/telemeta/management/commands/telemeta-import-corpus-from-dir.py @@ -0,0 +1,141 @@ +from optparse import make_option +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError +from django.core.files.base import ContentFile +from telemeta.models import * +from telemeta.util.unaccent import unaccent +import os, re + +try: + from django.utils.text import slugify +except ImportError: + def slugify(string): + killed_chars = re.sub('[\(\),]', '', string) + return re.sub(' ', '_', killed_chars) + +def beautify(string): + return os.path.splitext(string)[0].replace('_',' ') + +def cleanup_dir(root_dir): + for resource in os.listdir(root_dir): + path = os.path.join(root_dir, resource) + if os.path.isdir(path): + new_path = path.replace(' ', '_') + new_path = new_path.replace('son_', '') + new_path = new_path.replace('son', '') + if new_path != path: + os.rename(path, new_path) + cleanup_dir(new_path) + +def trim_list(list): + new = [] + for item in list: + if item: + new.append(item) + return new + +def reset(): + for i in MediaItem.objects.all(): + i.delete() + for c in MediaCollection.objects.all(): + c.delete() + + +class Command(BaseCommand): + help = "import media files from a directory to a corpus" + args = "root_dir" + media_formats = ['mp3'] + image_formats = ['png', 'jpg'] + text_formats = ['txt'] + + def handle(self, *args, **options): + # NOT4PROD!! + reset() + + root_dir = args[-1] + cleanup_dir(root_dir) + chapters = os.listdir(root_dir) + + for chapter in chapters: + chapter_dir = os.path.join(root_dir, chapter) + metadata = {} + + for filename in os.listdir(chapter_dir): + path = os.path.join(chapter_dir, filename) + if os.path.isfile(path) and '.txt' == os.path.splitext(filename)[1]: + f = open(path, 'r') + i = 0 + for line in f.readlines(): + data = re.split(r'\t+', line.rstrip('\t')) + if i == 0: + chapter_title = data[1] + print chapter_title + else: + metadata[data[0]] = data[1:] + i += 1 + print metadata + break + + for root, dirs, files in os.walk(chapter_dir): + for media_file in files: + path = os.path.join(root, media_file) + + if ' ' in media_file: + new_media_file = media_file.replace(' ', '_') + new_media_path = os.path.join(root, new_media_file) + os.rename(path, new_media_path) + media_file = new_media_file + print media_file + + media_name = os.path.splitext(media_file)[0] + media_ext = os.path.splitext(media_file)[1][1:] + + if media_ext and media_ext in self.media_formats and media_name[0] != '.': + root_list = root.split(os.sep) + media_path = os.sep.join(root_list[-4:]) + os.sep + media_file + + item_name = root_list[-1] + collection_name = root_list[-2] + corpus_name = root_list[-3] + data = metadata[item_name] + + corpus_id = slugify(unicode(corpus_name)) + collection_id = corpus_id + '_' + slugify(unicode(collection_name)) + item_id = collection_id + '_' + slugify(unicode(item_name)) + + corpus, c = MediaCorpus.objects.get_or_create(code=corpus_id, title=corpus_name) + + collection_title = collection_name.replace('_', ' ') + ' : ' + chapter_title + print collection_title + collection, c = MediaCollection.objects.get_or_create(code=collection_id, title=collection_title) + if not collection in corpus.children.all(): + corpus.children.add(collection) + + item, c = MediaItem.objects.get_or_create(collection=collection, code=item_id) + if c: + item.old_code = item_name + # item.track = item_name + item.file = media_path + item.save() + + title = data[0].split('.') + item.title = title[0] + print data + item.track = data[1].replace('\n', '') + if len(title) > 1: + item.comment = '. '.join(title[1:]) + item.save() + + for related_file in os.listdir(root): + related_path = os.sep.join(root_list[-4:]) + os.sep + related_file + related_name = os.path.splitext(related_file)[0] + related_ext = os.path.splitext(related_file)[1][1:] + + if related_ext in self.image_formats: + related, c = MediaItemRelated.objects.get_or_create(item=item, file=related_path) + if c: + if len(data) > 2: + related.title = item.track + related.set_mime_type() + related.save() + diff --git a/telemeta/management/commands/telemeta-import-crem.py b/telemeta/management/commands/telemeta-import-crem.py deleted file mode 100644 index f73cd402..00000000 --- a/telemeta/management/commands/telemeta-import-crem.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2010 Guillaume Pellerin -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://svn.parisson.org/telemeta/TelemetaLicense. -# -# Author: Guillaume Pellerin -# - -import logging -import codecs -import os -import sys -import csv -import logging -import datetime -from optparse import make_option - -from django.conf import settings -from django.core.management.base import BaseCommand, CommandError -from django.contrib.auth.models import User -from django.core.management import setup_environ -from django.core.files.base import ContentFile -from django.contrib.auth.models import User -from django.contrib.sites.models import Site -from django.template.defaultfilters import slugify - -from telemeta.models import * -from telemeta.util.unaccent import unaccent - - -class Logger: - - def __init__(self, file): - self.logger = logging.getLogger('myapp') - self.hdlr = logging.FileHandler(file) - self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - self.hdlr.setFormatter(self.formatter) - self.logger.addHandler(self.hdlr) - self.logger.setLevel(logging.INFO) - - def info(self, prefix, message): - self.logger.info(' ' + prefix + ' : ' + message.decode('utf8')) - - def error(self, prefix, message): - self.logger.error(prefix + ' : ' + message.decode('utf8')) - - -class Command(BaseCommand): - - """Import CREM collections from collection directories containing media files - and eventually a XLS files representing the relation between old codes and new codes - """ - - help = "import CREM collections (special usecase)" - admin_email = 'webmaster@parisson.com' - media_root = settings.MEDIA_ROOT - - option_list = BaseCommand.option_list + ( - make_option('-d', '--dry-run', - action='store_true', - dest='dry-run', - help='Do NOT write anything'), - make_option('-f', '--force', - action='store_true', - dest='force', - help='Force overwrite data'), - make_option('-s', '--source', - dest='source_dir', - help='define the source directory'), - make_option('-l', '--log', - dest='log', - help='define log file'), - make_option('-p', '--pattern', - dest='pattern', - help='define the pattern'), - ) - - - def write_file(self, item, media): - filename = media.split(os.sep)[-1] - if os.path.exists(media): - if not item.file or self.force: - if not self.dry_run: - if not self.media_root in self.source_dir: - f = open(media, 'r') - file_content = ContentFile(f.read()) - item.file.save(filename, file_content) - f.close() - else: - path = media[len(self.media_root)+1:] - item.file = path - item.save() - item.set_revision(self.user) - else: - msg = item.code + " : pas d'écriture, utiliser l'option --write " - self.logger.info('item', msg) - else: - msg = item.code + ' : fichier ' + item.file.name + ' deja inscrit dans la base de donnees et pas de forcage !' - self.logger.info('item', msg) - else: - msg = item.code + ' : fichier audio ' + filename + ' inexistant dans le dossier !' - self.logger.error('item', msg) - - def handle(self, *args, **kwargs): - self.logger = Logger(kwargs.get('log')) - self.pattern = kwargs.get('pattern') - self.source_dir = kwargs.get('source_dir') - self.dry_run = kwargs.get('dry-run') - self.force = kwargs.get('force') - - self.domain = Site.objects.all()[0].domain - self.user = User.objects.filter(username='admin')[0] - self.collections = os.listdir(self.source_dir) - - collections = [] - for collection in self.collections: - collection_dir = self.source_dir + os.sep + collection - collection_files = os.listdir(collection_dir) - - - if not '/.' in collection_dir and self.pattern in collection_dir: - collection_name = collection.split(os.sep)[-1] - collections.append(collection_name) - c = MediaCollection.objects.filter(code=collection_name) - - if not c and collection + '.csv' in collection_files: - msg = collection + ' collection NON présente dans la base de données, SORTIE ' - self.logger.error(collection, msg) - sys.exit(msg) - elif not c: - msg = 'collection NON présente dans la base de données, CREATION ' - self.logger.info(collection, msg) - if not self.dry_run: - c = MediaCollection(code=collection_name, title=collection_name) - c.save() - c.set_revision(self.user) - else: - msg = 'collection présente dans la base de données, SELECTION' - self.logger.info(collection, msg) - - for collection in collections: - collection_dir = self.source_dir + os.sep + collection - collection_name = collection - collection_files = os.listdir(collection_dir) - msg = '************************ ' + collection + ' ******************************' - self.logger.info(collection, msg[:70]) - csv_file = '' - rows = {} - - if collection + '.csv' in collection_files: - csv_file = self.source_dir + os.sep + collection + os.sep + collection + '.csv' - csv_data = csv.reader(open(csv_file), delimiter=';') - for row in csv_data: - rows[row[1].strip()] = row[0].strip() - msg = collection + ' import du fichier CSV de la collection' - self.logger.info(collection, msg[:70]) - else: - msg = collection + ' pas de fichier CSV dans la collection' - self.logger.info(collection, msg[:70]) - - c = MediaCollection.objects.filter(code=collection_name) - if not c: - if not self.dry_run: - c = MediaCollection(code=collection_name) - c.save() - msg = ' collection NON présente dans la BDD, CREATION ' - self.logger.info(c.code, msg) - else: - c = c[0] - msg = ' id = '+str(c.id) - self.logger.info(c.code, msg) - - audio_files = [] - for file in collection_files: - ext = ['WAV', 'wav'] - if file.split('.')[-1] in ext and file[0] != '.': - audio_files.append(file) - - audio_files.sort() - nb_items = c.items.count() - counter = 0 - - for file in audio_files: - code = file.split('.')[0] - wav_file = self.source_dir + os.sep + collection + os.sep + file - - if len(audio_files) <= nb_items: - items = MediaItem.objects.filter(code=code) - - old_ref = '' - if code in rows and not items: - old_ref = rows[code] - items = MediaItem.objects.filter(old_code=old_ref) - - if items: - item = items[0] - msg = code + ' : ' + item.old_code + ' : Cas 1 ou 2 : id = ' + str(item.id) - self.logger.info('item', msg) - item.code = code - else: - item = MediaItem(code=code, collection=c) - msg = code + ' : ' + old_ref + ' : Cas 1 ou 2 : item NON présent dans la base de données, CREATION' - self.logger.info('item', msg) - - self.write_file(item, wav_file) - - elif nb_items == 1 and len(audio_files) > 1: - if counter == 0: - msg = code + ' : Cas 3a : item n°01 présent dans la base de données, PASSE' - self.logger.info('item', msg) - else: - item = MediaItem(code=code, collection=c) - msg = code + ' : Cas 3a : item NON présent dans la base de données, CREATION' - self.logger.info('item', msg) - self.write_file(item, wav_file) - - elif nb_items > 1 and nb_items < len(audio_files): - msg = code + ' : Cas 3b : nb items < nb de fichiers audio, PAS de creation' - self.logger.info('item', msg) - - counter += 1 - - msg = 'Liste des URLs des collections importées :' - self.logger.info('INFO', msg) - for collection in collections: - msg = 'http://'+self.domain+'/archives/collections/'+collection - self.logger.info(collection, msg) - -