From f47a7721594da99cc2fd839799e47cbef4dc3a31 Mon Sep 17 00:00:00 2001 From: Guillaume Pellerin Date: Wed, 22 Apr 2015 00:48:49 +0200 Subject: [PATCH] fix epub generator against new corpus import method --- .../commands/telemeta-import-corpus-epub.py | 161 ++++++++++-------- .../templates/telemeta/collection_epub.html | 2 +- telemeta/views/resource.py | 8 +- 3 files changed, 95 insertions(+), 76 deletions(-) diff --git a/telemeta/management/commands/telemeta-import-corpus-epub.py b/telemeta/management/commands/telemeta-import-corpus-epub.py index 8fbb21d4..a9f05561 100644 --- a/telemeta/management/commands/telemeta-import-corpus-epub.py +++ b/telemeta/management/commands/telemeta-import-corpus-epub.py @@ -4,7 +4,7 @@ from django.core.management.base import BaseCommand, CommandError from django.core.files.base import ContentFile from telemeta.models import * from telemeta.util.unaccent import unaccent -import os +import os, re try: from django.utils.text import slugify @@ -16,14 +16,16 @@ except ImportError: def beautify(string): return os.path.splitext(string)[0].replace('_',' ') -def remove_dir_spaces(root_dir): +def cleanup_dir(root_dir): for resource in os.listdir(root_dir): path = os.path.join(root_dir, resource) if os.path.isdir(path): new_path = path.replace(' ', '_') + new_path = new_path.replace('son_', '') + new_path = new_path.replace('son', '') if new_path != path: os.rename(path, new_path) - remove_dir_spaces(new_path) + cleanup_dir(new_path) def trim_list(list): new = [] @@ -32,6 +34,13 @@ def trim_list(list): new.append(item) return new +def reset(): + for i in MediaItem.objects.all(): + i.delete() + for c in MediaCollection.objects.all(): + c.delete() + + class Command(BaseCommand): help = "import media files from a directory to a corpus" args = "root_dir" @@ -40,72 +49,84 @@ class Command(BaseCommand): text_formats = ['txt'] def handle(self, *args, **options): + # NOT4PROD!! + reset() + root_dir = args[-1] - remove_dir_spaces(root_dir) - - for root, dirs, files in os.walk(root_dir): - for media_file in files: - path = os.path.join(root, media_file) - - if ' ' in media_file: - new_media_file = media_file.replace(' ', '_') - new_media_path = os.path.join(root, new_media_file) - os.rename(path, new_media_path) - media_file = new_media_file - print media_file - - media_name = os.path.splitext(media_file)[0] - media_ext = os.path.splitext(media_file)[1][1:] - - if media_ext and media_ext in self.media_formats and media_name[0] != '.': - root_list = root.split(os.sep) - media_path = os.sep.join(root_list[-4:]) + os.sep + media_file - - item_name = root_list[-1] - collection_name = root_list[-2] - corpus_name = root_list[-3] - - corpus_id = slugify(unicode(corpus_name)) - collection_id = corpus_id + '_' + slugify(unicode(collection_name)) - item_id = collection_id + '_' + slugify(unicode(item_name)) - - corpus, c = MediaCorpus.objects.get_or_create(code=corpus_id, title=corpus_name) - - collection, c = MediaCollection.objects.get_or_create(code=collection_id, title=collection_name) - if not collection in corpus.children.all(): - corpus.children.add(collection) - - item, c = MediaItem.objects.get_or_create(collection=collection, code=item_id) - if c: - item.old_code = item_name - item.file = media_path - item.save() - - for related_file in os.listdir(root): - related_path = root + os.sep + related_file - related_ext = os.path.splitext(related_file)[1][1:] - if related_ext in self.text_formats: - text = open(related_path, 'r') - lines = trim_list(text.read().splitlines()) - print lines - break - - if lines: - item.track = lines[2] - item.title = lines[3][:255] - item.save() - - for related_file in os.listdir(root): - related_path = os.sep.join(root_list[-4:]) + os.sep + related_file - related_name = os.path.splitext(related_file)[0] - related_ext = os.path.splitext(related_file)[1][1:] - - print related_path - if related_ext in self.image_formats: - related, c = MediaItemRelated.objects.get_or_create(item=item, file=related_path) - if c: - if lines: - related.title = lines[4] - related.set_mime_type() - related.save() + cleanup_dir(root_dir) + chapters = os.listdir(root_dir) + + for chapter in chapters: + chapter_dir = os.path.join(root_dir, chapter) + metadata = {} + + for filename in os.listdir(chapter_dir): + path = os.path.join(chapter_dir, filename) + if os.path.isfile(path) and '.txt' == os.path.splitext(filename)[1]: + f = open(path, 'r') + for line in f.readlines(): + data = re.split(r'\t+', line.rstrip('\t')) + metadata[data[0]] = data[1:] + print metadata + break + + for root, dirs, files in os.walk(chapter_dir): + for media_file in files: + path = os.path.join(root, media_file) + + if ' ' in media_file: + new_media_file = media_file.replace(' ', '_') + new_media_path = os.path.join(root, new_media_file) + os.rename(path, new_media_path) + media_file = new_media_file + print media_file + + media_name = os.path.splitext(media_file)[0] + media_ext = os.path.splitext(media_file)[1][1:] + + if media_ext and media_ext in self.media_formats and media_name[0] != '.': + root_list = root.split(os.sep) + media_path = os.sep.join(root_list[-4:]) + os.sep + media_file + + item_name = root_list[-1] + collection_name = root_list[-2] + corpus_name = root_list[-3] + data = metadata[item_name] + + corpus_id = slugify(unicode(corpus_name)) + collection_id = corpus_id + '_' + slugify(unicode(collection_name)) + item_id = collection_id + '_' + slugify(unicode(item_name)) + + corpus, c = MediaCorpus.objects.get_or_create(code=corpus_id, title=corpus_name) + + collection, c = MediaCollection.objects.get_or_create(code=collection_id, title=collection_name) + if not collection in corpus.children.all(): + corpus.children.add(collection) + + item, c = MediaItem.objects.get_or_create(collection=collection, code=item_id) + if c: + item.old_code = item_name + # item.track = item_name + item.file = media_path + item.save() + + title = data[1].split('.') + item.title = title[0] + item.track = data[2].replace('\n', '') + if len(title) > 1: + item.comment = '. '.join(title[1:]) + item.save() + + for related_file in os.listdir(root): + related_path = os.sep.join(root_list[-4:]) + os.sep + related_file + related_name = os.path.splitext(related_file)[0] + related_ext = os.path.splitext(related_file)[1][1:] + + if related_ext in self.image_formats: + related, c = MediaItemRelated.objects.get_or_create(item=item, file=related_path) + if c: + if len(data) > 2: + related.title = item.track + related.set_mime_type() + related.save() diff --git a/telemeta/templates/telemeta/collection_epub.html b/telemeta/templates/telemeta/collection_epub.html index 75c86313..e401f1db 100644 --- a/telemeta/templates/telemeta/collection_epub.html +++ b/telemeta/templates/telemeta/collection_epub.html @@ -3,7 +3,7 @@ {% for item in items %}

- {{ item.old_code }} : {{ item.title }} (p. {{ item.track }}) + Son {{ item.old_code }} : {{ item.title }}. {{ item.comment }} (fig. {{ item.track }}, p. )

diff --git a/telemeta/views/resource.py b/telemeta/views/resource.py index a4a4d7ee..cdf39c10 100644 --- a/telemeta/views/resource.py +++ b/telemeta/views/resource.py @@ -394,10 +394,8 @@ class CorpusEpubView(View): for collection in corpus.children.all(): items = {} for item in collection.items.all(): - id = item.old_code.split('_') - if len(id) > 1: - id = id[1] - items[item] = int(id.split('.')[1]) + id = item.old_code.split('.')[1].replace('a', '.1').replace('b', '.2') + items[item] = float(id) items = OrderedDict(sorted(items.items(), key=lambda t: t[1])) for item in items: @@ -442,7 +440,7 @@ class CorpusEpubView(View): # create epub file filename = '/tmp/test.epub' epub.write_epub(filename, book, {}) - epub_file = open(filename, 'r') + epub_file = open(filename, 'rb') response = HttpResponse(epub_file.read(), content_type='application/epub+zip') response['Content-Disposition'] = "attachment; filename=%s.%s" % \ -- 2.39.5