]> git.parisson.com Git - timeside.git/commitdiff
Add a sha1sum computation for decoders
authorThomas Fillon <thomas@parisson.com>
Thu, 20 Mar 2014 12:50:11 +0000 (13:50 +0100)
committerThomas Fillon <thomas@parisson.com>
Thu, 20 Mar 2014 12:50:11 +0000 (13:50 +0100)
sha1sum will be compute for file, url and numpy arrays

timeside/analyzer/core.py
timeside/decoder/array.py
timeside/decoder/core.py
timeside/decoder/file.py
timeside/decoder/live.py
timeside/decoder/utils.py

index b3eafc99e83ebd0d5793837ad5fddb04bac9d986..486eb0cb857a22ab59fcf45ffe5c7fdf47fe8a0b 100644 (file)
@@ -241,6 +241,10 @@ class AudioMetadata(MetadataObject):
             Start time of the segment in seconds
         duration : float
             Duration of the segment in seconds
+        is_segment : boolean
+            Is the media a segment of an audio source
+        sha1 : str
+            Sha1 hexadecimal digest of the audio source
         channels : int
             Number of channels
         channelsManagement : str
@@ -257,6 +261,7 @@ class AudioMetadata(MetadataObject):
                                   ('start', 0),
                                   ('duration', None),
                                   ('is_segment', None),
+                                  ('sha1', ''),
                                   ('channels', None),
                                   ('channelsManagement', '')])
 
@@ -1100,6 +1105,7 @@ class Analyzer(Processor):
         result.id_metadata.uuid = self.uuid()
 
         result.audio_metadata.uri = self.mediainfo()['uri']
+        result.audio_metadata.sha1 = self.mediainfo()['sha1']
         result.audio_metadata.start = self.mediainfo()['start']
         result.audio_metadata.duration = self.mediainfo()['duration']
         result.audio_metadata.is_segment = self.mediainfo()['is_segment']
index 581603a9d9c168e9c7bd768018dc99ba4cd290e6..bb66dbf3179255ee72cfe7ce0c80f9cecf5e6dd1 100644 (file)
@@ -72,7 +72,8 @@ class ArrayDecoder(Decoder):
         self.uri = '_'.join(['raw_audio_array',
                             'x'.join([str(dim) for dim in samples.shape]),
                              samples.dtype.type.__name__])
-
+        from .utils import sha1sum_numpy
+        self._sha1 = sha1sum_numpy(self.samples)
         self.frames = self.get_frames()
 
     def setup(self, channels=None, samplerate=None, blocksize=None):
index 1eba57768a8a0a5eaa7653d6967bf2cd85521a3f..60355e2a31a40190abcc144108b81081a13e4eda 100644 (file)
@@ -32,7 +32,7 @@ from timeside.core import Processor, implements, interfacedoc, abstract
 from timeside.api import IDecoder
 from timeside.tools import *
 
-from utils import get_uri, get_media_uri_info, stack
+from utils import get_uri, get_media_uri_info, stack, get_sha1
 
 import Queue
 from gst import _gst as gst
@@ -93,7 +93,12 @@ class Decoder(Processor):
                     duration=self.uri_duration,
                     start=self.uri_start,
                     is_segment=self.is_segment,
-                    samplerate=self.input_samplerate)
+                    samplerate=self.input_samplerate,
+                    sha1=self.sha1)
+
+    @property
+    def sha1(self):
+        return self._sha1
 
     def __del__(self):
         self.release()
index 9f29b4e27f6e493204a1334f1d9045a07a6d9efa..3397227b5aab2664202aa22751f66e42219a3cd5 100644 (file)
@@ -70,6 +70,8 @@ class FileDecoder(Decoder):
         self.stack = stack
 
         self.uri = get_uri(uri)
+        self._sha1 = get_sha1(uri)
+
         self.uri_total_duration = get_media_uri_info(self.uri)['duration']
 
         self.mimetype = None
index fdb596b7a1291ee8ed50a74b30a220f2beb3305c..925233008e6c7e22b2c9075e5fabd83a1aa13c00 100644 (file)
@@ -85,6 +85,7 @@ class LiveDecoder(Decoder):
         self.uri_duration = None
         self.is_segment = False
         self.input_src = input_src
+        self._sha1 = ''
 
     def setup(self, channels=None, samplerate=None, blocksize=None):
 
index 0b21f3ec48b9c81ccc48d16ea0c6d4112f28d883..50e9a719f03a7e8d6449d3ac1c0dff6eb0a3a7eb 100644 (file)
@@ -26,7 +26,7 @@
 
 from __future__ import division
 
-import numpy
+import numpy as np
 
 class Noise(object):
     """A class that mimics audiolab.sndfile but generates noise instead of reading
@@ -60,7 +60,7 @@ class Noise(object):
         else:
             will_read = frames_to_read
         self.seekpoint += will_read
-        return numpy.random.random(will_read)*2 - 1
+        return np.random.random(will_read)*2 - 1
 
 
 def path2uri(path):
@@ -78,33 +78,43 @@ def path2uri(path):
     return urlparse.urljoin('file:', urllib.pathname2url(path))
 
 
+def source_info(source):
+    import os.path
+
+    src_info = {'is_file': False,
+                'uri': '',
+                'pathname': ''}
+
+    if os.path.exists(source):
+        src_info['is_file'] = True
+        # get the absolute path
+        src_info['pathname'] = os.path.abspath(source)
+        # and make a uri of it
+        src_info['uri'] = path2uri(src_info['pathname'])
+    return src_info
+
+
 def get_uri(source):
     """
     Check a media source as a valid file or uri and return the proper uri
     """
 
     import gst
-    # Is this an valid URI source
-    if gst.uri_is_valid(source):
+
+    src_info = source_info(source)
+
+    if src_info['is_file']:  # Is this a file?
+        return get_uri(src_info['uri'])
+
+    elif gst.uri_is_valid(source):  # Is this a valid URI source for Gstreamer
         uri_protocol = gst.uri_get_protocol(source)
         if gst.uri_protocol_is_supported(gst.URI_SRC, uri_protocol):
             return source
         else:
             raise IOError('Invalid URI source for Gstreamer')
-
-    # is this a file?
-    import os.path
-    if os.path.exists(source):
-        # get the absolute path
-        pathname = os.path.abspath(source)
-        # and make a uri of it
-        uri = path2uri(pathname)
-
-        return get_uri(uri)
     else:
-        raise IOError('Failed getting uri for path %s: not such file or directoy' % source)
+        raise IOError('Failed getting uri for path %s: no such file' % source)
 
-    return uri
 
 def get_media_uri_info(uri):
 
@@ -151,6 +161,73 @@ def stack(process_func):
     return wrapper
 
 
+def get_sha1(source):
+    src_info = source_info(source)
+
+    if src_info['is_file']:  # Is this a file?
+        return sha1sum_file(src_info['pathname'])
+    else:  # Then it should be an url
+        return sha1sum_url(source)
+
+
+def sha1sum_file(filename):
+    '''
+    Return the secure hash digest with sha1 algorithm for a given file
+
+    >>> print sha1sum_file('../../tests/samples/guitar.wav')
+    08301c3f9a8d60926f31e253825cc74263e52ad1
+    '''
+    import hashlib
+    import io
+
+    sha1 = hashlib.sha1()
+    chunk_size = sha1.block_size * io.DEFAULT_BUFFER_SIZE
+
+    with open(filename, 'rb') as f:
+        for chunk in iter(lambda: f.read(chunk_size), b''):
+            sha1.update(chunk)
+    return sha1.hexdigest()
+
+
+def sha1sum_url(url):
+    '''Return the secure hash digest with sha1 algorithm for a given url
+
+    >>> url = 'https://github.com/yomguy/timeside-samples/raw/master/samples/guitar.wav'
+    >>> print sha1sum_url(url)
+    08301c3f9a8d60926f31e253825cc74263e52ad1
+    >>> uri = get_uri('../../tests/samples/guitar.wav')
+    >>> print sha1sum_url(uri)
+    08301c3f9a8d60926f31e253825cc74263e52ad1
+
+    '''
+    import hashlib
+    import urllib
+    from contextlib import closing
+
+    sha1 = hashlib.sha1()
+    chunk_size = sha1.block_size * 8192
+
+    max_file_size = 10*1024*1024  # 10Mo limit in case of very large file
+
+    total_read = 0
+    with closing(urllib.urlopen(url)) as url_obj:
+        for chunk in iter(lambda: url_obj.read(chunk_size), b''):
+            sha1.update(chunk)
+            total_read += chunk_size
+            if total_read > max_file_size:
+                break
+
+    return sha1.hexdigest()
+
+
+def sha1sum_numpy(np_array):
+    '''
+    Return the secure hash digest with sha1 algorithm for a numpy array
+    '''
+    import hashlib
+    return hashlib.sha1(np_array.view(np.uint8)).hexdigest()
+
+
 if __name__ == "__main__":
     import doctest
     doctest.testmod()