From: Guillaume Pellerin Date: Tue, 24 Feb 2015 22:11:17 +0000 (+0100) Subject: Add epub directory import script X-Git-Tag: 1.6a^2~19^2~6 X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=bf97c7e613645aba7c001cd13b449c57dfdcc3b9;p=telemeta.git Add epub directory import script Update corpus epub generator including more metadata --- diff --git a/telemeta/management/commands/telemeta-import-corpus-epub.py b/telemeta/management/commands/telemeta-import-corpus-epub.py new file mode 100644 index 00000000..909b8b5a --- /dev/null +++ b/telemeta/management/commands/telemeta-import-corpus-epub.py @@ -0,0 +1,93 @@ +from optparse import make_option +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError +from django.core.files.base import ContentFile +from telemeta.models import * +from telemeta.util.unaccent import unaccent +import os + +try: + from django.utils.text import slugify +except ImportError: + def slugify(string): + killed_chars = re.sub('[\(\),]', '', string) + return re.sub(' ', '_', killed_chars) + +def beautify(string): + return os.path.splitext(string)[0].replace('_',' ') + +def trim_list(list): + new = [] + for item in list: + if item: + new.append(item) + return new + +class Command(BaseCommand): + help = "import media files from a directory to a corpus" + args = "root_dir" + media_formats = ['mp3'] + image_formats = ['png', 'jpg'] + text_formats = ['txt'] + + def handle(self, *args, **options): + root_dir = args[-1] + + for root, dirs, files in os.walk(root_dir): + for media_file in files: + media_name = os.path.splitext(media_file)[0] + media_ext = os.path.splitext(media_file)[1][1:] + + if media_ext and media_ext in self.media_formats and media_name[0] != '.': + root_list = root.split(os.sep) + + media_path = os.sep.join(root_list[-4:]) + os.sep + media_file + print media_path + item_name = root_list[-1] + collection_name = root_list[-2] + corpus_name = root_list[-3] + + corpus_id = slugify(unicode(corpus_name)) + collection_id = corpus_id + '_' + slugify(unicode(collection_name)) + item_id = collection_id + '_' + slugify(unicode(item_name)) + + corpus, c = MediaCorpus.objects.get_or_create(code=corpus_id, title=corpus_name) + + collection, c = MediaCollection.objects.get_or_create(code=collection_id, title=collection_name) + if not collection in corpus.children.all(): + corpus.children.add(collection) + + item, c = MediaItem.objects.get_or_create(collection=collection, code=item_id) + if c: + item.old_code = item_name + item.file = media_path + item.save() + + for related_file in os.listdir(root): + related_path = root + os.sep + related_file + related_ext = os.path.splitext(related_file)[1][1:] + if related_ext in self.text_formats: + text = open(related_path, 'r') + lines = trim_list(text.read().splitlines()) + print lines + break + + if lines: + item.track = lines[2] + item.title = lines[3] + item.save() + + for related_file in os.listdir(root): + related_path = os.sep.join(root_list[-4:]) + os.sep + related_file + related_name = os.path.splitext(related_file)[0] + related_ext = os.path.splitext(related_file)[1][1:] + + print related_path + if related_ext in self.image_formats: + related, c = MediaItemRelated.objects.get_or_create(item=item, file=unicode(related_path)) + if c: + if lines: + related.title = lines[4] + related.set_mime_type() + related.save() + diff --git a/telemeta/models/collection.py b/telemeta/models/collection.py index eb2d7a62..d3be102a 100644 --- a/telemeta/models/collection.py +++ b/telemeta/models/collection.py @@ -136,9 +136,6 @@ class MediaCollection(MediaResource): def __unicode__(self): return self.code - def save(self, force_insert=False, force_update=False, user=None, code=None): - super(MediaCollection, self).save(force_insert, force_update) - @property def public_id(self): return self.code @@ -264,19 +261,6 @@ class MediaCollection(MediaResource): return metadata - def epub(self, filename): - from epub.models import EPub - e = EPub() - e.metadata.title = self.title - # e.metadata.add_creator(self.metadata_author) - e.metadata.description = self.description - e.metadata.publisher = self.publisher - e.metadata.language = 'fr-FR' - for item in self.items.all(): - e.add_article(item.title, item.comment) - e.generate_epub(filename) - - class MediaCollectionRelated(MediaRelated): "Collection related media" diff --git a/telemeta/models/item.py b/telemeta/models/item.py index 832f17f7..8acf2c86 100644 --- a/telemeta/models/item.py +++ b/telemeta/models/item.py @@ -182,8 +182,8 @@ class MediaItem(MediaResource): raise ValidationError("%s is not a valid item code for collection %s" % (self.code, self.collection.code)) - def save(self, force_insert=False, force_update=False): - super(MediaItem, self).save(force_insert, force_update) + def save(self, *args, **kwargs): + super(MediaItem, self).save(*args, **kwargs) def computed_duration(self): "Tell the length in seconds of this item media data" @@ -322,9 +322,6 @@ class MediaItemRelated(MediaRelated): item = ForeignKey('MediaItem', related_name="related", verbose_name=_('item')) - def save(self, force_insert=False, force_update=False, using=False): - super(MediaItemRelated, self).save(force_insert, force_update) - def parse_markers(self, **kwargs): # Parse KDEnLive session if self.file: diff --git a/telemeta/models/resource.py b/telemeta/models/resource.py index 2b5d8677..8ea69b6c 100644 --- a/telemeta/models/resource.py +++ b/telemeta/models/resource.py @@ -80,8 +80,8 @@ class MediaBaseResource(MediaResource): def public_id(self): return self.code - def save(self, force_insert=False, force_update=False, user=None, code=None): - super(MediaBaseResource, self).save(force_insert, force_update) + def save(self, *args, **kwargs): + super(MediaBaseResource, self).save(*args, **kwargs) def get_fields(self): return self._meta.fields @@ -112,8 +112,8 @@ class MediaRelated(MediaResource): is_url_image = True return 'image' in self.mime_type or is_url_image - def save(self, force_insert=False, force_update=False, author=None): - super(MediaRelated, self).save(force_insert, force_update) + def save(self, *args, **kwargs): + super(MediaRelated, self).save(*args, **kwargs) def set_mime_type(self): if self.file: diff --git a/telemeta/static/telemeta/css/telemeta_epub.css b/telemeta/static/telemeta/css/telemeta_epub.css index 3cd6cb6e..c01fa2a0 100644 --- a/telemeta/static/telemeta/css/telemeta_epub.css +++ b/telemeta/static/telemeta/css/telemeta_epub.css @@ -1,4 +1,4 @@ -@namespace epub "http://www.idpf.org/2007/ops"; +@namespace EPUB "http://www.idpf.org/2007/ops"; body { font-family: Cambria, Liberation Serif, Bitstream Vera Serif, Georgia, Times, Times New Roman, serif; @@ -13,7 +13,7 @@ h2 { h3 { text-align: left; - font-weight: 1em; + font-size: 0.8em; margin-top: 0px; } diff --git a/telemeta/templates/telemeta/collection_epub.html b/telemeta/templates/telemeta/collection_epub.html index d717a60b..cc9ea38c 100644 --- a/telemeta/templates/telemeta/collection_epub.html +++ b/telemeta/templates/telemeta/collection_epub.html @@ -1,20 +1,28 @@ -{% for item in collection.items.all %} +{% for item in items %} -
-

{{ item.title }}

-
+ + + + + +
+ {% for image in item.related.all %} + {% if 'image' in image.mime_type %} +
+ +
+ {% endif %} + {% endfor %} -
-
+
+ + {{ item.old_code }} : {{ item.title }} (p. {{ item.track }}) + +
- {% for image in item.related.all %} - {% if 'image' in image.mime_type %} -
- -
- {% endif %} - {% endfor %} +
+ +
{% endfor %} diff --git a/telemeta/views/resource.py b/telemeta/views/resource.py index 4e99bf89..c1f3cfd5 100644 --- a/telemeta/views/resource.py +++ b/telemeta/views/resource.py @@ -360,6 +360,7 @@ class CorpusEpubView(View): """ Stream an Epub file of collection data """ + from collections import OrderedDict from ebooklib import epub from django.template.loader import render_to_string @@ -374,9 +375,9 @@ class CorpusEpubView(View): book.set_identifier(corpus.public_id) book.set_title(corpus.title) book.set_language('fr') - book.add_author(corpus.descriptions) + # add cover image # for media in corpus.related.all(): # if 'cover' in media.title or 'Cover' in media.title: @@ -385,7 +386,15 @@ class CorpusEpubView(View): chapters = [] for collection in corpus.children.all(): + items = {} for item in collection.items.all(): + id = item.old_code.split(' ') + if len(id) > 1: + id = id[1] + items[item] = int(id.split('.')[1]) + items = OrderedDict(sorted(items.items(), key=lambda t: t[1])) + # items = collection.items.all().order_by('old_code') + for item in items: if item.file: audio = open(item.file.path, 'r') epub_item = epub.EpubItem(file_name=str(item.file), content=audio.read()) @@ -395,10 +404,9 @@ class CorpusEpubView(View): image = open(related.file.path, 'r') epub_item = epub.EpubItem(file_name=str(related.file), content=image.read()) book.add_item(epub_item) - context = {'collection': collection, 'site': site} + context = {'collection': collection, 'site': site, 'items': items} c = epub.EpubHtml(title=collection.title, file_name=collection.code + '.xhtml', lang='fr') c.content = render_to_string(collection_template, context) - print c.content chapters.append(c) # add chapters to the book book.add_item(c) @@ -414,7 +422,8 @@ class CorpusEpubView(View): book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) - # define css style + + # add css style style = open(css, 'r') nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style.read()) book.add_item(nav_css)