From 98ea0a83e998cbe5bb2c7faa7bb78858d8cb3d7b Mon Sep 17 00:00:00 2001 From: Thomas Fillon Date: Fri, 19 Jul 2013 18:36:23 +0200 Subject: [PATCH] Add new structure for the analyzerResult + Add Analyzer generic processor Add a new structure to handle the different kind of metadata : - data - idMetadata - audioMetadata - frameMetadata - labelMetadata - parameters Automatic setting of some metadata is providing through a generic Analyzer processor class from which the common analyzer derives. Modifiy setup() parameters for all processors to diffenriate between source_* parameter and input_*, output_* parameters for the non-source processors --- timeside/analyzer/aubio_pitch.py | 43 ++++----- timeside/analyzer/aubio_temporal.py | 131 ++++++++++++++++----------- timeside/analyzer/core.py | 135 +++++++++++++++++++++++++--- timeside/api.py | 7 ++ timeside/core.py | 40 ++++++--- timeside/decoder/core.py | 8 +- timeside/encoder/ogg.py | 1 - 7 files changed, 268 insertions(+), 97 deletions(-) diff --git a/timeside/analyzer/aubio_pitch.py b/timeside/analyzer/aubio_pitch.py index dc502b0..dba7fd9 100644 --- a/timeside/analyzer/aubio_pitch.py +++ b/timeside/analyzer/aubio_pitch.py @@ -24,15 +24,18 @@ from timeside.analyzer.core import * from timeside.api import IValueAnalyzer from aubio import pitch -class AubioPitch(Processor): - implements(IValueAnalyzer) +class AubioPitch(Analyzer): + implements(IAnalyzer) # TODO check if needed with inheritance + + def __init__(self): + self.input_blocksize = 2048 + self.input_stepsize = self.input_blocksize / 2 @interfacedoc def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(AubioPitch, self).setup(channels, samplerate, blocksize, totalframes) - self.win_s = 2048 - self.hop_s = self.win_s / 2 - self.p = pitch("default", self.win_s, self.hop_s, samplerate) + self.p = pitch("default", self.input_blocksize, self.input_stepsize, + samplerate) self.p.set_unit("freq") self.block_read = 0 self.pitches = [] @@ -56,32 +59,30 @@ class AubioPitch(Processor): return "pitch values" def process(self, frames, eod=False): - for samples in downsample_blocking(frames, self.hop_s): - #time = self.block_read * self.hop_s * 1. / self.samplerate() + for samples in downsample_blocking(frames, self.input_stepsize): + #time = self.block_read * self.input_stepsize * 1. / self.samplerate() self.pitches += [self.p(samples)[0]] self.block_read += 1 return frames, eod def results(self): - container = AnalyzerResultContainer() - pitch = AnalyzerResult() + container = super(AubioPitch, self).results() + + pitch = self.new_result(dataMode='value', resultType='framewise') + pitch.idMetadata.id = "aubio_pitch" + pitch.idMetadata.name = "f0 (aubio)" + pitch.idMetadata.unit = 'Hz' # Get metadata - samplerate = self.samplerate() - blocksize = self.win_s - stepsize = self.hop_s - - # Set metadata - pitch.metadata = AnalyzerMetadata(id="aubio_pitch", - name="f0 (aubio)", - unit='Hz', - samplerate = samplerate, - blocksize = blocksize, - stepsize = stepsize) + + # parameters : None # TODO check with Piem "default" and "freq" in setup + + # Set Data self.pitches = numpy.array(self.pitches) - pitch.data = self.pitches + pitch.data.data = self.pitches + pitch.data.dataType = float container.add_result(pitch) return container diff --git a/timeside/analyzer/aubio_temporal.py b/timeside/analyzer/aubio_temporal.py index 71082d8..4dcd24d 100644 --- a/timeside/analyzer/aubio_temporal.py +++ b/timeside/analyzer/aubio_temporal.py @@ -25,16 +25,18 @@ from timeside.api import IAnalyzer from aubio import onset, tempo -class AubioTemporal(Processor): +class AubioTemporal(Analyzer): implements(IAnalyzer) + def __init__(self): + self.input_blocksize = 1024 + self.input_stepsize = 256 + @interfacedoc def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(AubioTemporal, self).setup(channels, samplerate, blocksize, totalframes) - self.win_s = 1024 - self.hop_s = 256 - self.o = onset("default", self.win_s, self.hop_s, samplerate) - self.t = tempo("default", self.win_s, self.hop_s, samplerate) + self.o = onset("default", self.input_blocksize, self.input_stepsize, samplerate) + self.t = tempo("default", self.input_blocksize, self.input_stepsize, samplerate) self.block_read = 0 self.onsets = [] self.beats = [] @@ -58,7 +60,7 @@ class AubioTemporal(Processor): return "%s %s" % (str(self.value), unit()) def process(self, frames, eod=False): - for samples in downsample_blocking(frames, self.hop_s): + for samples in downsample_blocking(frames, self.input_stepsize): if self.o(samples): self.onsets += [self.o.get_last_s()] if self.t(samples): @@ -67,60 +69,85 @@ class AubioTemporal(Processor): return frames, eod def results(self): - # Get common metadata - commonAttr = dict(samplerate=self.samplerate(), - blocksize=self.win_s, - stepsize=self.hop_s) - # FIXME : Onsets, beat and onset rate are not frame based Results - # samplerate, blocksize, etc. are not appropriate here - # Those might be some kind of "AnalyzerSegmentResults" - - # list of onset locations - onsets = AnalyzerResult() - # Set metadata - onsetsAttr = dict(id="aubio_onset", - name="onsets (aubio)", - unit="s") - onsets.metadata = dict(onsetsAttr.items() + commonAttr.items()) - # Set Data - onsets.data = self.onsets - - # list of inter-onset intervals, in beats per minute - onsetrate = AnalyzerResult() + + container = super(AubioTemporal, self).results() + + #--------------------------------- + # Onsets + #--------------------------------- + onsets = self.new_result(dataMode='label', resultType='event') + + onsets.idMetadata.id = "aubio_onset" + onsets.idMetadata.name = "onsets (aubio)" + onsets.idMetadata.unit = 's' + + # Set Data , dataMode='label', resultType='event' + # Event = list of (time, labelId) + onsets.data.data = [(time,1) for time in self.onsets] + + onsets.labelMetadata.label = {1: 'Onset'} + + container.add_result(onsets) + + #--------------------------------- + # Onset Rate + #--------------------------------- + onsetrate = self.new_result(dataMode='value', resultType='event') # Set metadata - onsetrateAttr = dict(id="aubio_onset_rate", - name="onset rate (aubio)", - unit="bpm") - onsetrate.metadata = dict(onsetrateAttr.items() + commonAttr.items()) - # Set Data + onsetrate.idMetadata.id = "aubio_onset_rate" + onsetrate.idMetadata.name="onset rate (aubio)" + onsetrate.idMetadata.unit="bpm" + + # Set Data , dataMode='value', resultType='event' + # Event = list of (time, value) if len(self.onsets) > 1: periods = 60. / numpy.diff(self.onsets) - onsetrate.data = periods + onsetrate.data.data = zip(periods,self.onsets[:-1]) else: - onsetrate.data = [] + onsetrate.data.data = [] - # list of beat locations - beats = AnalyzerResult() + container.add_result(onsetrate) + + #--------------------------------- + # Beats + #--------------------------------- + beats = self.new_result(dataMode='label', resultType='segment') # Set metadata - beatsAttr = dict(id="aubio_beat", - name="beats (aubio)", - unit="s") - beats.metadata = dict(beatsAttr.items() + commonAttr.items()) - # Set Data - beats.data = self.beats - - # list of inter-beat intervals, in beats per minute - bpm = AnalyzerResult() + beats.idMetadata.id="aubio_beat" + beats.idMetadata.name="beats (aubio)" + beats.idMetadata.unit="s" + + # Set Data, dataMode='label', resultType='segment' + # Segment = list of (time, duration, labelId) + if len(self.beats) > 1: + duration = numpy.diff(self.beats) + duration = numpy.append(duration,duration[-1]) + beats.data.data = [(time,dur,1) for (time, dur) in zip(self.beats, duration)] + else: + beats.data.data = [] + beats.labelMetadata.label = {1: 'Beat'} + + container.add_result(beats) + + #--------------------------------- + # BPM + #--------------------------------- + bpm = self.new_result(dataMode='value', resultType='segment') # Set metadata - bpmAttr = dict(id="aubio_bpm", - name="bpm (aubio)", - unit="bpm") - bpm.metadata = dict(bpmAttr.items() + commonAttr.items()) - # Set Data + bpm.idMetadata.id="aubio_bpm" + bpm.idMetadata.name="bpm (aubio)" + bpm.idMetadata.unit="bpm" + + # Set Data, dataMode='value', resultType='segment' if len(self.beats) > 1: periods = 60. / numpy.diff(self.beats) - bpm.data = periods + periods = numpy.append(periods,periods[-1]) + + bpm.data.data = zip(self.beats, duration, periods) + else: - bpm.data = [] + bpm.data.data = [] + + container.add_result(bpm) - return AnalyzerResultContainer([onsets, onsetrate, beats, bpm]) + return container diff --git a/timeside/analyzer/core.py b/timeside/analyzer/core.py index ee023d0..4027c1d 100644 --- a/timeside/analyzer/core.py +++ b/timeside/analyzer/core.py @@ -20,9 +20,12 @@ # Authors: # Guillaume Pellerin # Paul Brossier +# Thomas Fillon from utils import downsample_blocking - +from timeside.core import Processor, implements, interfacedoc +from timeside.api import IAnalyzer +from timeside import __version__ as TimeSideVersion import numpy numpy_data_types = [ #'float128', @@ -143,9 +146,7 @@ class IdMetadata(MetadataObject): ('date', ''), ('version', ''), ('author', '')]) - # HINT : - # from datetime import datetime - #date = datetime.now().replace(microsecond=0).isoformat(' ') + class AudioMetadata(MetadataObject): ''' @@ -207,7 +208,7 @@ class LabelMetadata(MetadataObject): # Define default values _default_value = OrderedDict([('label', None), ('description', None), - ('labelType', None)]) + ('labelType', 'mono')]) class FrameMetadata(MetadataObject): @@ -228,7 +229,6 @@ class FrameMetadata(MetadataObject): ('blocksize', None), ('stepsize', None)]) - class AnalyserData(MetadataObject): ''' Metadata object to handle Frame related Metadata @@ -248,8 +248,28 @@ class AnalyserData(MetadataObject): ('dataType', ''), ('dataMode', '')]) + def __setattr__(self, name, value): + # Set Data with the proper type + if name == 'data': + if value is None: + value = [] + # make a numpy.array out of list + if type(value) is list: + value = numpy.array(value) + # serialize using numpy + if type(value) in numpy_data_types: + value = value.tolist() + if type(value) not in [list, str, int, long, float, complex, type(None)] + numpy_data_types: + raise TypeError('AnalyzerResult can not accept type %s' % + type(value)) + + # TODO : guess dataType from value and set datType with: + #super(AnalyserData, self).__setattr__('dataType', dataType) -class newAnalyzerResults(MetadataObject): + super(AnalyserData, self).__setattr__(name, value) + + +class newAnalyzerResult(MetadataObject): """ Object that contains the metadata and parameters of an analyzer process @@ -267,8 +287,8 @@ class newAnalyzerResults(MetadataObject): from collections import OrderedDict # Define default values as an OrderDict # in order to keep the order of the keys for display - _default_value = OrderedDict([('data', None), - ('idMetadata', None), + _default_value = OrderedDict([('idMetadata', None), + ('data', None), ('audioMetadata', None), ('frameMetadata', None), ('labelMetadata', None), @@ -417,7 +437,8 @@ class AnalyzerResultContainer(object): for res in analyzer_result: self.add_result(res) return - if type(analyzer_result) != AnalyzerResult: + if not (isinstance(analyzer_result, AnalyzerResult) + or isinstance(analyzer_result, newAnalyzerResult)): raise TypeError('only AnalyzerResult can be added') self.results += [analyzer_result] @@ -588,4 +609,96 @@ class AnalyzerResultContainer(object): finally: h5_file.close() # Close the HDF5 file - return data_list \ No newline at end of file + return data_list + + +class Analyzer(Processor): + ''' + Generic class for the analyzers + ''' + + implements(IAnalyzer) + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): + super(Analyzer, self).setup(channels, samplerate, blocksize, totalframes) + + # Set default values for output_* attributes + # may be overwritten by the analyzer + self.output_channels = self.input_channels + self.output_samplerate = self.input_samplerate + self.output_blocksize = self.input_blocksize + self.output_stepsize = self.input_blocksize + + def results(self): + container = AnalyzerResultContainer() + return container + + @staticmethod + @interfacedoc + def id(): + return "analyzer" + + @staticmethod + @interfacedoc + def name(): + return "Generic analyzer" + + @staticmethod + @interfacedoc + def unit(): + return "" + + def new_result(self, dataMode='value', resultType='framewise'): + ''' + Create a new result + + Attributes + ---------- + data : MetadataObject + idMetadata : MetadataObject + audioMetadata : MetadataObject + frameMetadata : MetadataObject + labelMetadata : MetadataObject + parameters : dict + + ''' + + from datetime import datetime + + result = newAnalyzerResult() + # Automatically write known metadata + result.idMetadata = IdMetadata(date=datetime.now().replace(microsecond=0).isoformat(' '), + version=TimeSideVersion, + author='TimeSide') + result.audioMetadata = AudioMetadata(uri=self.mediainfo()['uri']) + + result.data = AnalyserData(dataMode=dataMode) + + if dataMode == 'value': + pass + elif dataMode == 'label': + result.labelMetadata = LabelMetadata() + else: + # raise ArgError('') + pass + + if resultType == 'framewise': + result.frameMetadata = FrameMetadata( + samplerate=self.output_samplerate, + blocksize=self.output_blocksize, + stepsize=self.input_stepsize) + elif resultType == 'value': + # None : handle by data + pass + elif resultType == 'segment': + # None : handle by data + pass + elif resultType == 'event': + # None : handle by data, duration = 0 + pass + else: + # raise ArgError('') + pass + + return result \ No newline at end of file diff --git a/timeside/api.py b/timeside/api.py index 116cc74..6d7f402 100644 --- a/timeside/api.py +++ b/timeside/api.py @@ -81,6 +81,13 @@ class IProcessor(Interface): # implementations should always call the parent method + def mediainfo(): + """ + Information about the media object + uri + start + duration + """ class IEncoder(IProcessor): """Encoder driver interface. Each encoder is expected to support a specific diff --git a/timeside/core.py b/timeside/core.py index 6979b8b..f477f80 100644 --- a/timeside/core.py +++ b/timeside/core.py @@ -61,30 +61,43 @@ class Processor(Component): implements(IProcessor) @interfacedoc - def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): - self.input_channels = channels - self.input_samplerate = samplerate - self.input_blocksize = blocksize - self.input_totalframes = totalframes + def setup(self, channels=None, samplerate=None, blocksize=None, + totalframes=None): + self.source_channels = channels + self.source_samplerate = samplerate + self.source_blocksize = blocksize + self.source_totalframes = totalframes + + # If empty Set default values for input_* attributes + # may be setted by the processor during __init__() + if not hasattr(self, 'input_channels'): + self.input_channels = self.source_channels + if not hasattr(self, 'input_samplerate'): + self.input_samplerate = self.source_samplerate + if not hasattr(self, 'input_blocksize'): + self.input_blocksize = self.source_blocksize + if not hasattr(self, 'input_stepsize'): + self.input_stepsize = self.source_blocksize + # default channels(), samplerate() and blocksize() implementations returns - # the input characteristics, but processors may change this behaviour by + # the source characteristics, but processors may change this behaviour by # overloading those methods @interfacedoc def channels(self): - return self.input_channels + return self.source_channels @interfacedoc def samplerate(self): - return self.input_samplerate + return self.source_samplerate @interfacedoc def blocksize(self): - return self.input_blocksize + return self.source_blocksize @interfacedoc def totalframes(self): - return self.input_totalframes + return self.source_totalframes @interfacedoc def process(self, frames, eod): @@ -94,6 +107,10 @@ class Processor(Component): def release(self): pass + @interfacedoc + def mediainfo(self): + return self.source_mediainfo + def __del__(self): self.release() @@ -220,6 +237,7 @@ class ProcessPipe(object): samplerate = last.samplerate(), blocksize = last.blocksize(), totalframes = last.totalframes()) + item.source_mediainfo = source.mediainfo() last = item # now stream audio data along the pipe @@ -232,4 +250,4 @@ class ProcessPipe(object): for item in items: item.release() - return self \ No newline at end of file + return self diff --git a/timeside/decoder/core.py b/timeside/decoder/core.py index fe5c680..a9199dc 100644 --- a/timeside/decoder/core.py +++ b/timeside/decoder/core.py @@ -316,6 +316,12 @@ class FileDecoder(Processor): def release(self): pass + @interfacedoc + def mediainfo(self): + return dict(uri=self.uri) + # TODO : for segment support : + #return dict(uri=self.uri, duration=self.input_duration, segment_start=self.segment_start, segment_duration=self.segment_duration) + def __del__(self): self.release() @@ -341,4 +347,4 @@ class FileDecoder(Processor): @interfacedoc def metadata(self): # TODO check - return self.tags \ No newline at end of file + return self.tags diff --git a/timeside/encoder/ogg.py b/timeside/encoder/ogg.py index b834e15..7bdddcc 100644 --- a/timeside/encoder/ogg.py +++ b/timeside/encoder/ogg.py @@ -31,7 +31,6 @@ class VorbisEncoder(GstEncoder): @interfacedoc def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(VorbisEncoder, self).setup(channels, samplerate, blocksize, totalframes) - self.pipe = ''' appsrc name=src ! audioconvert ! vorbisenc -- 2.39.5