]> git.parisson.com Git - telemeta.git/commitdiff
add MediaCollection dublin core mapping
authorolivier <>
Thu, 21 Jan 2010 21:20:35 +0000 (21:20 +0000)
committerolivier <>
Thu, 21 Jan 2010 21:20:35 +0000 (21:20 +0000)
telemeta/interop/oaidatasource.py
telemeta/models/core.py [new file with mode: 0644]
telemeta/models/crem.py
telemeta/models/dublincore.py
telemeta/tests/model_tests.py

index 6d5897568b5dec827fb3bd5cd32ef223fecccd27..d0ebc5784c18f03017a8e214d6f937e98b2f174e 100644 (file)
@@ -49,7 +49,7 @@ class TelemetaOAIDataSource(object):
         _dc = record.to_dublincore().to_list()
         for k, v in _dc:
             if k == 'identifier':
-                dc.append((k, type + ':' + v))
+                dc.append((k, type + ':' + v)) # FIXME: type prepended by CREM model
             else:
                 dc.append((k, v))
         return (dc, ctime)
@@ -60,7 +60,8 @@ class TelemetaOAIDataSource(object):
             type, id = id.split(':')
         except ValueError:
             return None
-            
+        
+        #FIXME: search by code
         if (type == 'collection'):
             try:
                 record  = MediaCollection.objects.get(id=id)
@@ -68,6 +69,7 @@ class TelemetaOAIDataSource(object):
                 return None
         elif (type == 'item'):
             try:
+                #FIXME: also search by old_code if code is not found
                 record = MediaItem.objects.get(id=id)
             except MediaItem.DoesNotExist:
                 return None
diff --git a/telemeta/models/core.py b/telemeta/models/core.py
new file mode 100644 (file)
index 0000000..a220409
--- /dev/null
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2007-2010 Samalyse SARL
+
+# This software is a computer program whose purpose is to backup, analyse,
+# transcode and stream any audio content with its metadata over a web frontend.
+
+# This software is governed by the CeCILL  license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+# Authors: Olivier Guilyardi <olivier@samalyse.com>
+
+from django.db import models
+import datetime
+from django.utils.translation import ugettext_lazy as _
+import re
+
+class Duration(object):
+
+    def __init__(self, *args, **kwargs):
+        if len(args) and isinstance(args[0], datetime.timedelta):
+            self._delta = datetime.timedelta(days=args[0].days, seconds=args[0].seconds)
+        else:
+            self._delta = datetime.timedelta(*args, **kwargs)
+
+    def __decorate(self, method, other):
+        if isinstance(other, Duration):
+            res = method(other._delta)
+        else:    
+            res = method(other)
+        if type(res) == datetime.timedelta:
+            return Duration(res)
+        
+        return res
+        
+    def __add__(self, other):
+        return self.__decorate(self._delta.__add__, other)
+
+    def __str__(self):
+        hours   = self._delta.days * 24 + self._delta.seconds / 3600
+        minutes = (self._delta.seconds % 3600) / 60
+        seconds = self._delta.seconds % 60
+    
+        return "%.2d:%.2d:%.2d" % (hours, minutes, seconds)
+
+    @staticmethod
+    def fromstr(str):
+        if not str:
+            return Duration()
+
+        test = re.match('^([0-9]+)(?::([0-9]+)(?::([0-9]+))?)?$', str)
+        if test:
+            groups = test.groups()
+            try:
+                hours = minutes = seconds = 0
+                if groups[0]:
+                    hours = int(groups[0])
+                    if groups[1]:
+                        minutes = int(groups[1])
+                        if groups[2]:
+                            seconds = int(groups[2])
+
+                return Duration(hours=hours, minutes=minutes, seconds=seconds)
+            except TypeError:
+                print groups
+                raise
+        else:
+            raise ValueError("Malformed duration string: " + str)
+
+    def as_seconds(self):
+        return self._delta.days * 24 * 3600 + self._delta.seconds
+            
+# The following is based on Django TimeField
+class DurationField(models.Field):
+    description = _("Duration")
+
+    __metaclass__ = models.SubfieldBase
+
+    default_error_messages = {
+        'invalid': _('Enter a valid duration in HH:MM[:ss[.uuuuuu]] format.'),
+    }
+
+    def get_internal_type(self):
+        return 'TimeField'
+
+    def to_python(self, value):
+        if value is None:
+            return None
+        if isinstance(value, datetime.time):
+            return Duration(hours=value.hour, minutes=value.minute, seconds=value.second)
+        if isinstance(value, datetime.datetime):
+            # Not usually a good idea to pass in a datetime here (it loses
+            # information), but this can be a side-effect of interacting with a
+            # database backend (e.g. Oracle), so we'll be accommodating.
+            return self.to_python(value.time())
+
+        try:
+            return Duration.fromstr(value)
+        except ValueError:
+            raise exceptions.ValidationError(self.error_messages['invalid'])
+            
+    def get_prep_value(self, value):
+        return self.to_python(value)
+
+    def get_db_prep_value(self, value, connection, prepared=False):
+        # Casts times into the format expected by the backend
+        return unicode(value)
+
+    def value_to_string(self, obj):
+        val = self._get_val_from_obj(obj)
+        if val is None:
+            data = ''
+        else:
+            data = unicode(val)
+        return data
+
+    def formfield(self, **kwargs):
+        defaults = {'form_class': forms.TimeField}
+        defaults.update(kwargs)
+        return super(DurationField, self).formfield(**defaults)
+            
index a6662410e408d9e7c37b056f5e4cb1fcd993df68..caeff312ceb6cde904708903cdce519ea7a4645a 100755 (executable)
@@ -39,6 +39,8 @@ import cremquery as query
 from xml.dom.minidom import getDOMImplementation
 from telemeta.util.unaccent import unaccent_icmp
 import re
+from telemeta.models.core import DurationField, Duration
+from telemeta.models import dublincore as dc
 
 class ModelCore(models.Model):
 
@@ -118,6 +120,20 @@ class MediaResource(ModelCore):
     def get_revision(self):
         return Revision.objects.filter(element_type=self.element_type, element_id=self.id).order_by('-time')[0]
 
+    def dc_access_rights(self):
+        if self.public_access == 'full':
+            return 'public'
+        if self.public_access == 'metadata':
+            return 'restricted'
+        return 'private'
+
+    def dc_identifier(self):
+        if self.code:
+            return self.element_type + ':' + self.code
+        elif self.old_code:
+            return self.element_type + ':' + self.old_code
+        return None
+
     class Meta:
         abstract = True
 
@@ -167,7 +183,7 @@ class MediaCollection(MediaResource):
     recorded_to_year      = models.IntegerField(default=0)
     recording_context     = models.ForeignKey('RecordingContext', related_name="collections",
                                               null=True)
-    approx_duration       = models.TimeField(default='00:00')
+    approx_duration       = DurationField(default='00:00')
     doctype_code          = models.IntegerField(default=0)
     travail               = models.CharField(max_length=250, default="")
     state                 = models.TextField(default="")
@@ -239,6 +255,56 @@ class MediaCollection(MediaResource):
             raise MediaInvalidCodeError("%s is not a valid code for this collection" % self.code)
         super(MediaCollection, self).save(force_insert, force_update, using)
 
+    def to_dublincore(self):
+        "Express this collection as a Dublin Core resource"
+
+        if self.collector:
+            creator = (dc.Element('creator', self.collector), 
+                       dc.Element('contributor', self.creator))
+        else:                        
+            creator = dc.Element('creator', self.creator)
+
+        resource = dc.Resource(
+            dc.Element('identifier',  self.dc_identifier()),
+            dc.Element('type',        'Collection'),
+            dc.Element('title',       self.title),
+            dc.Element('title',       self.alt_title),
+            creator,
+            dc.Element('contributor', self.metadata_author),
+            dc.Element('subject',     'Ethnologie'),
+            dc.Element('subject',     'Ethnomusicologie'),
+            dc.Element('publisher',   self.publisher),
+            dc.Element('publisher',   u'CNRS - Musée de l\'homme'),
+            dc.Date(self.recorded_from_year, self.recorded_to_year, 'created'),
+            dc.Date(self.year_published, refinement='issued'),
+            dc.Element('rightsHolder', self.creator),
+            dc.Element('rightsHolder', self.collector),
+            dc.Element('rightsHolder', self.publisher),
+        )
+           
+        duration = Duration()
+        parts = []
+        for item in self.items.all():
+            duration += item.duration()
+
+            id = item.dc_identifier()
+            if id:
+                parts.append(dc.Element('relation', id, 'hasPart'))
+
+        if duration < self.approx_duration:            
+            duration = self.approx_duration
+
+        resource.add(
+            dc.Element('rights', self.legal_rights, 'license'),
+            dc.Element('rights', self.dc_access_rights(), 'accessRights'),
+            dc.Element('format', duration, 'extent'),
+            dc.Element('format', self.physical_format, 'medium'),
+            #FIXME: audio mime types are missing,
+            parts
+        )
+
+        return resource
+
     class Meta(MetaCore):
         db_table = 'media_collections'
 
@@ -251,7 +317,7 @@ class MediaItem(MediaResource):
     track                 = models.CharField(max_length=250, default="")
     old_code              = models.CharField(unique=True, max_length=250, null=True)
     code                  = models.CharField(unique=True, max_length=250, null=True)
-    approx_duration       = models.TimeField(default='00:00')
+    approx_duration       = DurationField(default='00:00')
     recorded_from_date    = models.DateField(default=0)
     recorded_to_date      = models.DateField(default=0)
     location              = models.ForeignKey('Location', related_name="items",
@@ -305,6 +371,21 @@ class MediaItem(MediaResource):
                                         % (self.code, self.collection.code))
         super(MediaItem, self).save(force_insert, force_update, using)
 
+    def duration(self):
+        "Tell the length in seconds of this item media data"
+        # FIXME: use TimeSide?
+        seconds = 0
+        if self.file:
+            import wave
+            media = wave.open(self.file.path, "rb")
+            seconds = media.getnframes() / media.getframerate()
+            media.close()
+
+        if seconds:
+            return Duration(seconds=seconds)
+
+        return self.approx_duration
+
     def __unicode__(self):
         if self.code:
             return self.code
index bda9c9748f0600b14b74bbdb0c759ec97cde7c95..3e76fe29b43f36388f57358c06cb67d43b7b80c7 100644 (file)
 class Resource(object):
     "Represent a Dublin Core resource"
 
-    elements = []
-
     def __init__(self, *args):
-        self.elements = args  
+        self.elements = []
+        self.add(*args)
 
     def flatten(self):
         """Convert the resource to a dictionary with element names as keys.
@@ -63,12 +62,50 @@ class Resource(object):
             result.append((element.name, unicode(element.value)))
         return result
 
+    def add(self, *elements):
+        for e in elements:
+            if isinstance(e, Element):
+                if not e in self.elements:
+                    self.elements.append(e)
+            else:
+                try:
+                    iter(e)
+                except TypeError: 
+                    raise Exception("add() only accepts elements or sequences of elements")
+
+                self.add(*e)
+
+    def __unicode__(self):
+        dump = u''
+        for e in self.elements:
+            key = unicode(e.name)
+            if e.refinement:
+                key += u'.' + unicode(e.refinement)
+            dump += u'%s:\t%s\n' % (key, unicode(e.value))
+        return dump            
+            
+
 class Element(object):
     "Represent a Dublin Core element"
 
-    def __init__(self, name, field=None, value=None, refinement=None):
+    def __init__(self, name, value=None, refinement=None):
         self.name = name
         self.value = value
         self.refinement = refinement
-        self.field = field
-        
+
+    def __eq__(self, other):
+        return self.name == other.name and self.value == other.value and self.refinement == self.refinement
+
+    def __ne__(self, other):
+        return not (self == other)
+
+class Date(Element):
+    "Dublin Core date element formatted according to W3C-DTF or DCMI Period"
+
+    def __init__(self, start, end=None, refinement=None):
+        value = str(start) 
+        if end and start != end:
+            value = 'start=' + value + '; end=' + unicode(end) + ';'
+        super(Date, self).__init__('date', value, refinement)            
+            
+
index 59a216281c4b94bea35024d1e9e9d9b80e6de018..45edeeb49b686ef971011310b5ce711ac129dfdb 100644 (file)
@@ -78,8 +78,8 @@ class CollectionItemTestCase(unittest.TestCase):
 
         self.volonte.save_with_revision(self.olivier)
 
-        self.nicolas = MediaCollection(id=3, reference="A3",  code="CNRSMH_E_1967_123_456", title="petit nicolas", 
-            creator="Georgette McKenic", collector="Paul MAILLE",  year_published=1999, is_published=True, 
+        self.nicolas = MediaCollection(id=3, reference="A3",  code="CNRSMH_I_1967_123", title="petit nicolas", 
+            creator="Georgette McKenic", collector="Paul MAILLE",  year_published=1999,  
             recorded_from_year=1967, recorded_to_year=1968)
                                    
         self.nicolas.save_with_revision(self.olivier)
@@ -92,13 +92,13 @@ class CollectionItemTestCase(unittest.TestCase):
 
         self.item_1.save_with_revision(self.david)
 
-        self.item_2 = MediaItem(id=2, collection=self.volonte, code="CNRSMH_I_1960_001_12_78", 
+        self.item_2 = MediaItem(id=2, collection=self.volonte, code="CNRSMH_I_1960_001_129", 
             recorded_from_date="1981-01-12", recorded_to_date="1991-02-24", location=self.france, 
             ethnic_group=self.a, title="item 2", author="Rick ROLL", comment="comment 2") 
 
         self.item_2.save_with_revision(self.david)
 
-        self.item_3 = MediaItem(id=3, collection=self.nicolas, code="CNRSMH_E_1967_123_456_01_99", 
+        self.item_3 = MediaItem(id=3, collection=self.nicolas, code="CNRSMH_I_1967_123_456_01", 
             recorded_from_date="1968-01-12", recorded_to_date="1968-02-24", location=self.belgique, 
             ethnic_group=self.b, title="item 3", author="John SMITH", collector="Paul CARLOS",
             comment="comment 3",  )
@@ -112,7 +112,7 @@ class CollectionItemTestCase(unittest.TestCase):
 
         self.item_4.save_with_revision(self.olivier)
 
-        self.item_5 = MediaItem(id=5, collection=self.volonte,code="CNRSMH_I_1960_001_85", 
+        self.item_5 = MediaItem(id=5, collection=self.volonte,code="CNRSMH_I_1960_001_789_85_22", 
             approx_duration="00:05:00", recorded_from_date="1978-01-12", recorded_to_date="1978-02-24", 
             location=self.belgique, ethnic_group=self.a, title="item 5", alt_title="I5", 
             author="Simon PAUL", collector="Javier BARDEM", 
@@ -275,3 +275,5 @@ class CollectionItemTestCase(unittest.TestCase):
         "Test the MediaCollection.get_countries() method"
         self.assertEquals(self.volonte.get_countries(), [self.belgique, self.france])
 
+        
+