From: Thomas Fillon Date: Fri, 5 Jul 2013 22:43:58 +0000 (+0200) Subject: Refactoring of AnalyzerResult in order to address issue #13 X-Git-Tag: 0.5.0~71^2~11 X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=ac4479b092b89f1e0c4183dfa0298a377fd09f11;p=timeside.git Refactoring of AnalyzerResult in order to address issue #13 Add AnalyzerAttributes to handle the metadata in AnalyzerResult and store the Analyzer parameters Attributes include : - [x] sampleRate, blockSize, stepSize - [ ] version of the analyzer (or Timeside release ?) - [ ] audio source file name - [x] analysis parameters - [ ] precision of the results data (float, long, ...) - [ ] stereo / mono / left / right channel management during analysis ? AnalyzerResult now include a @property field "properties" containing a dictionnary of the following stats about the results data : - mean - median, - std - min - max Also change the result value return by AubioTemporal because mean and median are now include in "properties" --- diff --git a/tests/test_AnalyzerResult.py b/tests/test_AnalyzerResult.py index 8985f4c..a2ef644 100755 --- a/tests/test_AnalyzerResult.py +++ b/tests/test_AnalyzerResult.py @@ -7,63 +7,65 @@ from math import pi verbose = 0 + class TestAnalyzerResult(TestCase): """ test AnalyzerResult """ def setUp(self): - self.result = AnalyzerResult(id = "foo_bar", name = "Foo bar", unit = "foo") + self.result = AnalyzerResult() + self.result.attributes=dict(id="foo_bar", name="Foo bar", unit="foo") def testOnFloat(self): "float result" - self.result.value = 1.2 + self.result.data = 1.2 def testOnInt(self): "integer result" - self.result.value = 1 + self.result.data = 1 def testOnList(self): "list result" - self.result.value = [1., 2.] + self.result.data = [1., 2.] def testOnString(self): "string result" - self.result.value = "hello" + self.result.data = "hello" def testOnListOfString(self): "list of strings result" - self.result.value = ["hello", "hola"] + self.result.data = ["hello", "hola"] def testOnListOfList(self): "list of lists result" - self.result.value = [[0,1], [0,1,2]] + self.result.data = [[0, 1], [0, 1, 2]] def testOnNumpyVectorOfFloat(self): "numpy vector of float" - self.result.value = ones(2, dtype = 'float') * pi + self.result.data = ones(2, dtype='float') * pi def testOnNumpy2DArrayOfFloat64(self): "numpy 2d array of float64" - self.result.value = ones([2,3], dtype = 'float64') * pi + self.result.data = ones([2, 3], dtype='float64') * pi def testOnNumpy3DArrayOfInt32(self): "numpy 3d array of int32" - self.result.value = ones([2,3,2], dtype = 'int32') * pi + self.result.data = ones([2, 3, 2], dtype='int32') * pi def testOnNumpyArrayOfStrings(self): "numpy array of strings" - self.result.value = array(['hello', 'hola']) + self.result.data = array(['hello', 'hola']) def testOnEmptyList(self): "empty list" - self.result.value = [] + self.result.data = [] def testOnNone(self): "None" - self.result.value = None + self.result.data = None def testOnUnicode(self): "None" - self.result.value = None + self.result.data = None def tearDown(self): pass @@ -94,35 +96,38 @@ bad_numpy_data_types = [ 'timedelta64', ] -def create_good_method_func (numpy_data_type): + +def create_good_method_func(numpy_data_type): def method(self): "numpy %s" % numpy_data_type import numpy - self.result.value = getattr(numpy, numpy_data_type)(pi) + self.result.data = getattr(numpy, numpy_data_type)(pi) return method -def create_bad_method_func (numpy_data_type): + +def create_bad_method_func(numpy_data_type): def method(self): "numpy %s" % numpy_data_type import numpy try: - value = getattr(numpy, numpy_data_type)(pi) + data = getattr(numpy, numpy_data_type)(pi) except ValueError: - value = getattr(numpy, numpy_data_type)() - self.assertRaises(TypeError, self.result.__setattr__, 'value', value) + data = getattr(numpy, numpy_data_type)() + self.assertRaises(TypeError, self.result.__setattr__, 'data', data) return method for numpy_data_type in good_numpy_data_types: - test_method = create_good_method_func (numpy_data_type) + test_method = create_good_method_func(numpy_data_type) test_method.__name__ = 'testOnNumpy_%s' % numpy_data_type test_method.__doc__ = 'groks a numpy %s' % numpy_data_type - setattr (TestAnalyzerResult, test_method.__name__, test_method) + setattr(TestAnalyzerResult, test_method.__name__, test_method) for numpy_data_type in bad_numpy_data_types: - test_method = create_bad_method_func (numpy_data_type) + test_method = create_bad_method_func(numpy_data_type) test_method.__name__ = 'testOnNumpy_%s' % numpy_data_type test_method.__doc__ = 'gasps on numpy %s' % numpy_data_type - setattr (TestAnalyzerResult, test_method.__name__, test_method) + setattr(TestAnalyzerResult, test_method.__name__, test_method) + class TestAnalyzerResultNumpy(TestAnalyzerResult): """ test AnalyzerResult numpy serialize """ @@ -134,8 +139,9 @@ class TestAnalyzerResultNumpy(TestAnalyzerResult): if verbose: print '%15s' % 'from numpy:', print d_numpy - for i in range(len(d_numpy)): - self.assertEquals(d_numpy[i], results[i]) + + self.assertEquals(d_numpy, results) + class TestAnalyzerResultHdf5(TestAnalyzerResult): """ test AnalyzerResult hdf5 serialize """ @@ -149,10 +155,11 @@ class TestAnalyzerResultHdf5(TestAnalyzerResult): print res_hdf5 self.assertEquals(res_hdf5, results) + class TestAnalyzerResultYaml(TestAnalyzerResult): """ test AnalyzerResult yaml serialize """ def tearDown(self): - results = AnalyzerResultContainer([self.result]) + results = AnalyzerResultContainer(self.result) r_yaml = results.to_yaml() if verbose: print 'to yaml:' @@ -161,8 +168,9 @@ class TestAnalyzerResultYaml(TestAnalyzerResult): if verbose: print '%15s' % 'from yaml:', print d_yaml - for i in range(len(d_yaml)): - self.assertEquals(results[i], d_yaml[i]) + #for i in range(len(d_yaml)): + self.assertEquals(results, d_yaml) + class TestAnalyzerResultXml(TestAnalyzerResult): """ test AnalyzerResult xml serialize """ @@ -178,14 +186,18 @@ class TestAnalyzerResultXml(TestAnalyzerResult): print '%15s' % 'from xml:', print d_xml - for i in range(len(d_xml)): - self.assertEquals(d_xml[i], results[i]) + #for i in range(len(d_xml)): + self.assertEquals(d_xml, results) + class TestAnalyzerResultJson(TestAnalyzerResult): """ test AnalyzerResult json serialize """ def tearDown(self): results = AnalyzerResultContainer([self.result]) - r_json = results.to_json() + try: + r_json = results.to_json() + except TypeError: + print('TYPE ERROR IN JSON') if verbose: print 'to json:' print r_json @@ -195,8 +207,8 @@ class TestAnalyzerResultJson(TestAnalyzerResult): print d_json print '%15s' % 'from yaml:', - for i in range(len(d_json)): - self.assertEquals(d_json[i], results[i]) + #for i in range(len(d_json)): + self.assertEquals(d_json, results) if __name__ == '__main__': - unittest.main(testRunner=TestRunner()) + unittest.main(testRunner=TestRunner()) \ No newline at end of file diff --git a/tests/test_analyzer_dc.py b/tests/test_analyzer_dc.py index a737ab2..99bd450 100755 --- a/tests/test_analyzer_dc.py +++ b/tests/test_analyzer_dc.py @@ -3,7 +3,8 @@ from unit_timeside import * from timeside.decoder import * from timeside.analyzer.dc import MeanDCShift -from timeside.analyzer.core import AnalyzerResultContainer +from timeside.analyzer.core import AnalyzerResult, AnalyzerAttributes +from numpy import round class TestAnalyzerDC(TestCase): @@ -13,18 +14,31 @@ class TestAnalyzerDC(TestCase): def testOnSweep(self): "runs on sweep" self.source = os.path.join (os.path.dirname(__file__), "samples", "sweep.wav") - self.expected = [{"value": -0.0, "name": "Mean DC shift", "unit": "%", "id": "mean_dc_shift"}] + attributes=AnalyzerAttributes(name="Mean DC shift", + unit="%", + id="mean_dc_shift", + sampleRate=44100, + blockSize=None, + stepSize=None) + + self.expected = AnalyzerResult(data=-0.000, attributes=attributes) def testOnGuitar(self): "runs on guitar" self.source = os.path.join (os.path.dirname(__file__), "samples", "guitar.wav") - self.expected = [{"value": 0.054, "name": "Mean DC shift", "unit": "%", "id": "mean_dc_shift"}] + attributes=AnalyzerAttributes(name="Mean DC shift", + unit="%", + id="mean_dc_shift", + sampleRate=44100, + blockSize=None, + stepSize=None) + self.expected = AnalyzerResult(data=0.054, attributes=attributes) def tearDown(self): decoder = FileDecoder(self.source) (decoder | self.analyzer).run() results = self.analyzer.results() - self.assertEquals(results, AnalyzerResultContainer(self.expected)) + self.assertEquals(results[0], self.expected) if __name__ == '__main__': unittest.main(testRunner=TestRunner()) diff --git a/tests/test_analyzer_level.py b/tests/test_analyzer_level.py index c5ea551..10248e1 100755 --- a/tests/test_analyzer_level.py +++ b/tests/test_analyzer_level.py @@ -3,6 +3,8 @@ from unit_timeside import * from timeside.decoder import * from timeside.analyzer.level import Level +from timeside.analyzer import AnalyzerResult, AnalyzerResultContainer +from timeside.analyzer import AnalyzerAttributes class TestAnalyzerLevel(TestCase): @@ -12,14 +14,40 @@ class TestAnalyzerLevel(TestCase): def testOnSweep(self): "runs on sweep" self.source = os.path.join (os.path.dirname(__file__), "samples", "sweep.wav") - self.expected = [{"value": -6.021, "name": "Max level", "unit": "dBFS", "id": "max_level"}, - {"value": -9.856, "name": "RMS level", "unit": "dBFS", "id": "rms_level"}] - + + # Max level + attr = AnalyzerAttributes(id="max_level", + name="Max level", + unit = "dBFS", + sampleRate=44100) + max_level = AnalyzerResult(-6.021, attr) + + # RMS level + attr = AnalyzerAttributes(id="rms_level", + name="RMS level", + unit="dBFS", + sampleRate=44100) + rms_level = AnalyzerResult(-9.856, attr) + self.expected = AnalyzerResultContainer([max_level,rms_level]) + def testOnGuitar(self): "runs on guitar" self.source = os.path.join (os.path.dirname(__file__), "samples", "guitar.wav") - self.expected = [{"value": -4.258, "name": "Max level", "unit": "dBFS", "id": "max_level"}, - {"value": -21.945, "name": "RMS level", "unit": "dBFS", "id": "rms_level"}] + + # Max level + attr = AnalyzerAttributes(id="max_level", + name="Max level", + unit = "dBFS", + sampleRate=44100) + max_level = AnalyzerResult(-4.258, attr) + + # RMS level + attr = AnalyzerAttributes(id="rms_level", + name="RMS level", + unit="dBFS", + sampleRate=44100) + rms_level = AnalyzerResult(-21.945, attr) + self.expected = AnalyzerResultContainer([max_level,rms_level]) def tearDown(self): decoder = FileDecoder(self.source) diff --git a/timeside/analyzer/aubio_melenergy.py b/timeside/analyzer/aubio_melenergy.py index dc39de0..bec64df 100644 --- a/timeside/analyzer/aubio_melenergy.py +++ b/timeside/analyzer/aubio_melenergy.py @@ -62,18 +62,24 @@ class AubioMelEnergy(Processor): def results(self): container = AnalyzerResultContainer() - - melenergy = AnalyzerResult(id = "aubio_melenergy", name = "melenergy (aubio)", unit = "") - melenergy.value = self.melenergy_results + melenergy = AnalyzerResult() + + # Get attributes + sampleRate = self.samplerate() + blockSize = self.win_s + stepSize = self.hop_s + parameters = dict(n_filters= self.n_filters, + n_coeffs= self.n_coeffs) + # Set attributes + melenergy.attributes = AnalyzerAttributes(id="aubio_melenergy", + name="melenergy (aubio)", + unit='', + sampleRate = sampleRate, + blockSize = blockSize, + stepSize = stepSize, + parameters = parameters) + # Set Data + melenergy.data = self.melenergy_results container.add_result(melenergy) - - melenergy_mean = AnalyzerResult(id = "aubio_melenergy_mean", name = "melenergy mean (aubio)", unit = "") - melenergy_mean.value = numpy.mean(self.melenergy_results,axis=0) - container.add_result(melenergy_mean) - - melenergy_median = AnalyzerResult(id = "aubio_melenergy_median", name = "melenergy median (aubio)", unit = "") - melenergy_median.value = numpy.median(self.melenergy_results,axis=0) - container.add_result(melenergy_median) - return container diff --git a/timeside/analyzer/aubio_mfcc.py b/timeside/analyzer/aubio_mfcc.py index 432b257..06fbbbe 100644 --- a/timeside/analyzer/aubio_mfcc.py +++ b/timeside/analyzer/aubio_mfcc.py @@ -63,14 +63,20 @@ class AubioMfcc(Processor): return frames, eod def results(self): - - mfcc = AnalyzerResult(id = "aubio_mfcc", name = "mfcc (aubio)", unit = "") - mfcc.value = [list(line) for line in self.mfcc_results] - - mfcc_mean = AnalyzerResult(id = "aubio_mfcc_mean", name = "mfcc mean (aubio)", unit = "") - mfcc_mean.value = list(self.mfcc_results.mean(axis=0)) - - mfcc_median = AnalyzerResult(id = "aubio_mfcc_median", name = "mfcc median (aubio)", unit = "") - mfcc_median.value = list(numpy.median(self.mfcc_results,axis=0)) - - return AnalyzerResultContainer([mfcc, mfcc_median, mfcc_mean]) + # MFCC + mfcc = AnalyzerResult() + sampleRate = self.samplerate() + blockSize = self.win_s + stepSize = self.hop_s + parameters = dict(n_filters= self.n_filters, + n_coeffs= self.n_coeffs) + mfcc.attributes = AnalyzerAttributes(id = "aubio_mfcc", + name = "mfcc (aubio)", + unit = "", + sampleRate = sampleRate, + blockSize = blockSize, + stepSize = stepSize, + parameters = parameters) + mfcc.data = [list(line) for line in self.mfcc_results] # TODO : type ? list list ? + + return AnalyzerResultContainer(mfcc) diff --git a/timeside/analyzer/aubio_pitch.py b/timeside/analyzer/aubio_pitch.py index 6871939..8a290ae 100644 --- a/timeside/analyzer/aubio_pitch.py +++ b/timeside/analyzer/aubio_pitch.py @@ -64,21 +64,25 @@ class AubioPitch(Processor): def results(self): - #container = AnalyzerResultContainer() - + container = AnalyzerResultContainer() + pitch = AnalyzerResult() + + # Get attributes + sampleRate = self.samplerate() + blockSize = self.win_s + stepSize = self.hop_s + # parameters : None # TODO check with Piem "default" and "freq" in setup + + # Set attributes + pitch.attributes = AnalyzerAttributes(id="aubio_pitch", + name="f0 (aubio)", + unit='Hz', + sampleRate = sampleRate, + blockSize = blockSize, + stepSize = stepSize) + # Set Data self.pitches = numpy.array(self.pitches) - - pitch = AnalyzerResult(id = "aubio_pitch", name = "f0 (aubio)", unit = "Hz") - pitch.value = self.pitches - #container.add_result(pitch) - - pitch_mean = AnalyzerResult(id = "aubio_pitch_mean", name = "f0 mean (aubio)", unit = "Hz") - pitch_mean.value = numpy.mean(self.pitches) - #container.add_result(pitch_mean) - - pitch_median = AnalyzerResult(id = "aubio_pitch_median", name = "f0 median (aubio)", unit = "Hz") - pitch_median.value = numpy.median(self.pitches) - #container.add_result(pitch_median) - - #return container - return AnalyzerResultContainer([pitch, pitch_mean, pitch_median]) + pitch.data = self.pitches + container.add_result(pitch) + + return container diff --git a/timeside/analyzer/aubio_specdesc.py b/timeside/analyzer/aubio_specdesc.py index 6c52031..e23f4af 100644 --- a/timeside/analyzer/aubio_specdesc.py +++ b/timeside/analyzer/aubio_specdesc.py @@ -53,7 +53,7 @@ class AubioSpecdesc(Processor): @staticmethod @interfacedoc def name(): - return "Mel Energy analysis (aubio)" + return "Spectral Descriptor (aubio)" def process(self, frames, eod=False): for samples in downsample_blocking(frames, self.hop_s): @@ -65,27 +65,29 @@ class AubioSpecdesc(Processor): def results(self): container = AnalyzerResultContainer() - + # Get common attributes + sampleRate = self.samplerate() + blockSize = self.win_s + stepSize = self.hop_s + unit = "" + # For each method store results in container for method in self.methods: + specdesc = AnalyzerResult() + # Set attributes id = '_'.join(["aubio_specdesc", method]) name = ' '.join(["spectral descriptor", method, "(aubio)"]) - unit = "" - - values = numpy.array(self.specdesc_results[method]) - - specdesc = AnalyzerResult(id = id, name = name, unit = unit) - specdesc.value = values - - mean_id = '_'.join([id, 'mean']) - mean_name = ' '.join(["spectral descriptor", method, "mean", "(aubio)"]) - specdesc_mean = AnalyzerResult(id = mean_id, name = mean_name, unit = "") - specdesc_mean.value = numpy.mean(values,axis=0) - - median_id = '_'.join([id, 'median']) - median_name = ' '.join(["spectral descriptor", method, "median", "(aubio)"]) - specdesc_median = AnalyzerResult(id = median_id , name = median_name , unit = "") - specdesc_median.value = numpy.median(values,axis=0) - - container.add_result([specdesc, specdesc_mean, specdesc_median]) + + + specdesc.attributes = AnalyzerAttributes(id = id, + name = name, + unit = unit, + sampleRate = sampleRate, + blockSize = blockSize, + stepSize = stepSize) + + # Set Data + specdesc.data = numpy.array(self.specdesc_results[method]) + + container.add_result(specdesc) return container diff --git a/timeside/analyzer/aubio_temporal.py b/timeside/analyzer/aubio_temporal.py index 34facbf..eb5b251 100644 --- a/timeside/analyzer/aubio_temporal.py +++ b/timeside/analyzer/aubio_temporal.py @@ -24,6 +24,7 @@ from timeside.analyzer.core import * from timeside.api import IAnalyzer from aubio import onset, tempo + class AubioTemporal(Processor): implements(IAnalyzer) @@ -66,29 +67,70 @@ class AubioTemporal(Processor): return frames, eod def results(self): - from numpy import mean, median - - onsets = AnalyzerResult(id = "aubio_onset", name = "onsets (aubio)", unit = "s") - onsets.value = self.onsets - - onsetrate_mean = AnalyzerResult(id = "aubio_onset_rate_mean", name = "onset rate (aubio)", unit = "bpm") - onsetrate_median = AnalyzerResult(id = "aubio_onset_rate_median", name = "onset rate (median) (aubio)", unit = "bpm") + # Get common attributes + commonAttr = dict(sampleRate=self.samplerate(), + blockSize=self.win_s, + stepSize=self.hop_s) + # FIXME : Onsets, beat and onset rate are not frame based Results + # sampleRate, blockSize, etc. are not appropriate here + # Those might be some kind of "AnalyzerSegmentResults" + + #--------------------------------- + # Onsets + #--------------------------------- + onsets = AnalyzerResult() + # Set attributes + onsetsAttr = dict(id="aubio_onset", + name="onsets (aubio)", + unit="s") + onsets.attributes = dict(onsetsAttr.items() + commonAttr.items()) + # Set Data + onsets.data = self.onsets + + #--------------------------------- + # Onset Rate + #--------------------------------- + onsetRate = AnalyzerResult() + # Set attributes + onsetRateAttr = dict(id="aubio_onset_rate", + name="onset rate (aubio)", + unit="bpm") + onsetRate.attributes = dict(onsetRateAttr.items() + commonAttr.items()) + # Set Data if len(self.onsets) > 1: - periods = [60./(b - a) for a,b in zip(self.onsets[:-1],self.onsets[1:])] - onsetrate_mean.value = mean (periods) - onsetrate_median.value = median (periods) + #periods = [60./(b - a) for a,b in zip(self.onsets[:-1],self.onsets[1:])] + periods = 60. / numpy.diff(self.onsets) + onsetRate.data = periods else: - onsetrate_mean.value = 0 - onsetrate_median.value = 0 - - beats = AnalyzerResult(id = "aubio_beat", name = "beats (aubio)", unit = "s") - beats.value = self.beats - - bpm = AnalyzerResult(id = "aubio_bpm", name = "bpm (aubio)", unit = "bpm") - if len(self.beats) > 2: - periods = [60./(b - a) for a,b in zip(self.beats[:-1],self.beats[1:])] - bpm.value = median (periods) + onsetRate.data = [] + + #--------------------------------- + # Beats + #--------------------------------- + beats = AnalyzerResult() + # Set attributes + beatsAttr = dict(id="aubio_beat", + name="beats (aubio)", + unit="s") + beats.attributes = dict(beatsAttr.items() + commonAttr.items()) + # Set Data + beats.data = self.beats + + #--------------------------------- + # BPM + #--------------------------------- + bpm = AnalyzerResult() + # Set attributes + bpmAttr = dict(id="aubio_bpm", + name="bpm (aubio)", + unit="bpm") + bpm.attributes = dict(bpmAttr.items() + commonAttr.items()) + # Set Data + if len(self.beats) > 1: + #periods = [60./(b - a) for a,b in zip(self.beats[:-1],self.beats[1:])] + periods = 60. / numpy.diff(self.beats) + bpm.data = periods else: - bpm.value = 0 + bpm.data = [] - return AnalyzerResultContainer([onsets, onsetrate_mean, onsetrate_median, beats, bpm]) + return AnalyzerResultContainer([onsets, onsetRate, beats, bpm]) \ No newline at end of file diff --git a/timeside/analyzer/core.py b/timeside/analyzer/core.py index 24f5a83..ae28251 100644 --- a/timeside/analyzer/core.py +++ b/timeside/analyzer/core.py @@ -21,7 +21,7 @@ # Guillaume Pellerin # Paul Brossier -from utils import * +from utils import downsample_blocking import numpy numpy_data_types = [ @@ -42,47 +42,182 @@ numpy_data_types = [ #'complex128',', #'complex64', ] -numpy_data_types = map(lambda x: getattr(numpy,x), numpy_data_types) +numpy_data_types = map(lambda x: getattr(numpy, x), numpy_data_types) numpy_data_types += [numpy.ndarray] -class AnalyzerResult(dict): - def __init__(self, id = "", name = "", unit = "s", value = None): - self['id'] = id - self['name'] = name - self['unit'] = unit - self['value'] = value +class AnalyzerAttributes(object): + """ + Object that contains the attributes and parameters of an analyzer process + stucture inspired by [1] + [1] : http://www.saltycrane.com/blog/2012/08/python-data-object-motivated-desire-mutable-namedtuple-default-values/ + + Attributes + ---------- + id : string + name : string + unit : string + sampleRate : int or float + blockSize : int + stepSize : int + parameters : dict + + Methods + ------- + asdict() + Return a dictionnary representation of the AnalyzerAttributes + """ + from collections import OrderedDict + # Define default values as an OrderDict + # in order to keep the order of the keys for display + _default_value = OrderedDict([('id', ''), + ('name', ''), + ('unit', ''), + ('sampleRate', None), + ('blockSize', None), + ('stepSize', None), + ('parameters', {}) + ]) + # TODO : rajouter + # - version timeside + # - date import datetime format iso + # - filename (audio) + # - (long) description --> à mettre dans l'API Processor + + def __init__(self, **kwargs): + ''' + Construct an AnalyzerAttributes object + + AnalyzerAttributes() + + Parameters + ---------- + id : string + name : string + unit : string + sampleRate : int or float + blockSize : int + stepSize : int + parameters : dict + + Returns + ------- + AnalyzerAttributes + ''' + # Set Default values + for key, value in self._default_value.items(): + setattr(self, key, value) + + # Set attributes passed in as arguments + #for k, v in zip(self._default_value.keys(), args): + # setattr(self, k, v) + # print 'args' + for key, value in kwargs.items(): + setattr(self, key, value) def __setattr__(self, name, value): - # make a numpy.array out of list - if type(value) is list: - value = numpy.array(value) - # serialize using numpy - if type(value) in numpy_data_types: - value = value.tolist() - if type(value) not in [list, str, int, long, float, complex, type(None)] + numpy_data_types: - raise TypeError, 'AnalyzerResult can not accept type %s' % type(value) - if name == 'value': self['value'] = value + if name not in self._default_value.keys(): + raise AttributeError("%s is not a valid attribute in %s" % + (name, self.__class__.__name__)) + super(AnalyzerAttributes, self).__setattr__(name, value) + + def asdict(self): + return dict((att, getattr(self, att)) + for att in self._default_value.keys()) + + def __repr__(self): + return '{}({})'.format( + self.__class__.__name__, + ', '.join('{}={}'.format( + att, repr(getattr(self, att))) + for att in self._default_value.keys())) + + def __eq__(self,other): + return (isinstance(other, self.__class__) + and self.asdict() == other.asdict()) + + +class AnalyzerResult(object): + """ + Object that contains results return by an analyzer process + Attributes : + - data : + - attributes : an AnalyzerAttributes object containing the attributes + """ + def __init__(self, data=None, attributes=None): + # Define Attributes + if attributes is None: + self.attributes = AnalyzerAttributes() + else: + self.attributes = attributes + + # Define Data + if data is None: + self.data = [] + else: + self.data = data + + def __setattr__(self, name, value): + # Set Data with the proper type + if name == 'data': + if value is None: + value = [] + # make a numpy.array out of list + if type(value) is list: + value = numpy.array(value) + # serialize using numpy + if type(value) in numpy_data_types: + value = value.tolist() + if type(value) not in [list, str, int, long, float, complex, type(None)] + numpy_data_types: + raise TypeError('AnalyzerResult can not accept type %s' % + type(value)) + elif name == 'attributes': + if not isinstance(value, AnalyzerAttributes): + value = AnalyzerAttributes(**value) + else: + raise AttributeError("%s is not a valid attribute in %s" % + (name, self.__class__.__name__)) + return super(AnalyzerResult, self).__setattr__(name, value) - def __getattr__(self, name): - if name in ['id', 'name', 'unit', 'value']: - return self[name] - return super(AnalyzerResult, self).__getattr__(name) + @property + def properties(self): + prop = dict(mean=numpy.mean(self.data, axis=0), + std=numpy.std(self.data, axis=0, ddof=1), + median=numpy.median(self.data, axis=0), + max=numpy.max(self.data, axis=0), + min=numpy.min(self.data, axis=0) + ) + # ajouter size + return(prop) +# def __getattr__(self, name): +# if name in ['id', 'name', 'unit', 'value', 'attributes']: +# return self[name] +# return super(AnalyzerResult, self).__getattr__(name) + + def asdict(self): + return(dict(data=self.data, attributes=self.attributes.asdict())) def to_json(self): import simplejson as json - data_dict = {} - for a in ['name', 'id', 'unit', 'value']: - data_dict[a] = self[a] - return json.dumps(data_dict) + return json.dumps(self.asdict()) + + def __repr__(self): + return self.to_json() + + def __eq__(self,other): + return (isinstance(other, self.__class__) + and self.asdict() == other.asdict()) + + def __ne__(self, other): + return not self.__eq__(other) class AnalyzerResultContainer(object): def __init__(self, analyzer_results=None): - if analyzer_results is None: - analyzer_results = [] - self.results = analyzer_results + self.results = [] + if analyzer_results is not None: + self.add_result(analyzer_results) def __getitem__(self, i): return self.results[i] @@ -91,91 +226,169 @@ class AnalyzerResultContainer(object): return len(self.results) def __repr__(self): - return self.to_json() + return [res.asdict() for res in self.results] - def __eq__(self, that): - if hasattr(that, 'results'): - that = that.results - for a, b in zip(self.results, that): - if a != b: return False + def __eq__(self, other): + if hasattr(other, 'results'): + other = other.results + for a, b in zip(self.results, other): + if a != b: + return False return True + + def __ne__(self, other): + return not self.__eq__(other) def add_result(self, analyzer_result): if type(analyzer_result) == list: - for a in analyzer_result: - self.add_result(a) + for res in analyzer_result: + self.add_result(res) return if type(analyzer_result) != AnalyzerResult: raise TypeError('only AnalyzerResult can be added') self.results += [analyzer_result] - def to_xml(self, data_list = None): - if data_list == None: data_list = self.results - import xml.dom.minidom - doc = xml.dom.minidom.Document() - root = doc.createElement('telemeta') - doc.appendChild(root) - for data in data_list: - node = doc.createElement('data') - for a in ['name', 'id', 'unit']: - node.setAttribute(a, str(data[a]) ) - if type(data['value']) in [str, unicode]: - node.setAttribute('value', data['value'] ) + def to_xml(self, data_list=None): + if data_list is None: + data_list = self.results + import xml.etree.ElementTree as ET + # TODO : cf. telemeta util + root = ET.Element('timeside') + + for result in data_list: + res_node = ET.SubElement(root, 'result') + res_node.attrib = {'name': result.attributes.name, + 'id': result.attributes.id} + # Serialize Data + data_node = ET.SubElement(res_node, 'data') + if type(result.data) in [str, unicode]: + data_node.text = result.data else: - node.setAttribute('value', repr(data['value']) ) - root.appendChild(node) - return xml.dom.minidom.Document.toprettyxml(doc) + data_node.text = repr(result.data) + # Serialize Attributes + attr_node = ET.SubElement(res_node, 'attributes') + for (name, val) in result.attributes.asdict().items(): + # TODO reorder keys + child = ET.SubElement(attr_node, name) + if name == 'parameters': + for (par_key, par_val) in val.items(): + par_child = ET.SubElement(child, par_key) + par_child.text = repr(par_val) + else: + child.text = repr(val) + + #tree = ET.ElementTree(root) + return ET.tostring(root, encoding="utf-8", method="xml") +# import xml.dom.minidom +# doc = xml.dom.minidom.Document() +# +# root = doc.createElement('telemeta') +# doc.appendChild(root) +# for result in data_list: +# node = doc.createElement('dataset') +# # Serialize Data +# if type(result.data) in [str, unicode]: +# node.setAttribute('data', result.data ) +# else: +# node.setAttribute('data', repr(result.data)) +# # Serialize Attributes +# +# node_attr = doc.createElement('attributes') +# for name in result.attributes._default_value.keys(): +# node_attr.setAttribute(name, str(result.attributes.name) ) +# node.appendChild(node_attr) +# root.appendChild(node) +# return xml.dom.minidom.Document.toprettyxml(doc) def from_xml(self, xml_string): - import xml.dom.minidom + import xml.etree.ElementTree as ET import ast - doc = xml.dom.minidom.parseString(xml_string) - root = doc.getElementsByTagName('telemeta')[0] - results = [] - for child in root.childNodes: - if child.nodeType != child.ELEMENT_NODE: continue - child_dict = {} - for a in ['name', 'id', 'unit']: - child_dict[a] = str(child.getAttribute(a)) + + results = AnalyzerResultContainer() + # TODO : from file + #tree = ET.parse(xml_file) + #root = tree.getroot() + root = ET.fromstring(xml_string) + for result_child in root.iter('result'): + result = AnalyzerResult() + # Get data try: - child_dict['value'] = ast.literal_eval(child.getAttribute('value')) + result.data = ast.literal_eval(result_child.find('data').text) except: - child_dict['value'] = child.getAttribute('value') - results.append(child_dict) + result.data = result_child.find('data').text + + # Get attributes + for attr_child in result_child.find('attributes'): + name = attr_child.tag + if name == 'parameters': + parameters = dict() + for param_child in attr_child: + par_key = param_child.tag + par_val = param_child.text + parameters[par_key] = ast.literal_eval(par_val) + value = parameters + else: + value = ast.literal_eval(attr_child.text) + result.attributes.__setattr__(name, value) + results.add_result(result) + return results +# +# +# import xml.dom.minidom +# import ast +# doc = xml.dom.minidom.parseString(xml_string) +# root = doc.getElementsByTagName('telemeta')[0] +# results = [] +# for child in root.childNodes: +# if child.nodeType != child.ELEMENT_NODE: continue +# result = AnalyzerResult() +# for a in ['name', 'id', 'unit']: +# child_dict[a] = str(child.getAttribute(a)) +# # Get Data +# try: +# result.data = ast.literal_eval(child.getAttribute('data')) +# except: +# results.data = child.getAttribute('data') +# # Get Attributes +# node_attr = root.childNodes +# results.append(results) +# return results - def to_json(self, data_list = None): - if data_list == None: data_list = self.results + def to_json(self): + #if data_list == None: data_list = self.results import simplejson as json - data_strings = [] - for data in data_list: - data_dict = {} - for a in ['name', 'id', 'unit', 'value']: - data_dict[a] = data[a] - data_strings.append(data_dict) - return json.dumps(data_strings) + return json.dumps([res.asdict() for res in self]) def from_json(self, json_str): import simplejson as json - return json.loads(json_str) + results_json = json.loads(json_str) + results = AnalyzerResultContainer() + for res_json in results_json: + res = AnalyzerResult(data=res_json['data'], + attributes=res_json['attributes']) + results.add_result(res) + return results - def to_yaml(self, data_list = None): - if data_list == None: data_list = self.results + def to_yaml(self): + #if data_list == None: data_list = self.results import yaml - data_strings = [] - for f in data_list: - f_dict = {} - for a in f.keys(): - f_dict[a] = f[a] - data_strings.append(f_dict) - return yaml.dump(data_strings) + return yaml.dump([res.asdict() for res in self]) def from_yaml(self, yaml_str): import yaml - return yaml.load(yaml_str) - def to_numpy(self, output_file, data_list = None): - if data_list == None: data_list = self.results + results_yaml = yaml.load(yaml_str) + results = AnalyzerResultContainer() + for res_yaml in results_yaml: + res = AnalyzerResult(data=res_yaml['data'], + attributes=res_yaml['attributes']) + results.add_result(res) + return results + + def to_numpy(self, output_file, data_list=None): + if data_list is None: + data_list = self.results import numpy numpy.save(output_file, data_list) @@ -183,11 +396,12 @@ class AnalyzerResultContainer(object): import numpy return numpy.load(input_file) - def to_hdf5(self, output_file, data_list = None): - if data_list == None: data_list = self.results - + def to_hdf5(self, output_file, data_list=None): + if data_list is None: + data_list = self.results + import h5py - + # Open HDF5 file and save dataset # TODO : Check self.results format # as it asumes 'id', 'name', 'value' and 'units' keys @@ -200,13 +414,13 @@ class AnalyzerResultContainer(object): dset.attrs["unit"] = data['unit'] dset.attrs["name"] = data['name'] except TypeError: - print('TypeError for HDF5 serialization') - finally: - h5_file.close() # Close the HDF5 file + pass + finally: + h5_file.close() # Close the HDF5 file def from_hdf5(self, input_file): import h5py - + # Open HDF5 file for reading and get results h5_file = h5py.File(input_file, 'r') data_list = AnalyzerResultContainer() @@ -217,27 +431,27 @@ class AnalyzerResultContainer(object): # Read Attributes unit = dset.attrs['unit'] name = dset.attrs['name'] - # Create new AnalyzerResult - data = AnalyzerResult(id = id, name = name, unit = unit) - + # Create new AnalyzerResult + data = AnalyzerResult(id=id, name=name, unit=unit) + # Load value from the hdf5 dataset and store in data - # FIXME : the following conditional statement is to prevent + # FIXME : the following conditional statement is to prevent # reading an empty dataset. # see : https://github.com/h5py/h5py/issues/281 - # It should be fixed by the next h5py version - if dset.shape!=(0,): + # It should be fixed by the next h5py version + if dset.shape != (0,): data.value = dset[...] else: data.value = [] - + # TODO : enable import from yaafe hdf5 format #for attr_name in dset.attrs.keys(): # data[attr_name] = dset.attrs[attr_name] - + data_list.add_result(data) except TypeError: print('TypeError for HDF5 serialization') finally: h5_file.close() # Close the HDF5 file - + return data_list \ No newline at end of file diff --git a/timeside/analyzer/dc.py b/timeside/analyzer/dc.py index 9c25c8b..f22c395 100644 --- a/timeside/analyzer/dc.py +++ b/timeside/analyzer/dc.py @@ -48,6 +48,16 @@ class MeanDCShift(Processor): return frames, eod def results(self): - result = AnalyzerResult(id = "mean_dc_shift", name = "Mean DC shift", unit = "%") - result.value = numpy.round(100*numpy.mean(self.values),3) - return AnalyzerResultContainer([result]) + result = AnalyzerResult() + # Set attributes + # FIXME : blockSize and stepSize are not appropriate here + result.attributes = AnalyzerAttributes(id="mean_dc_shift", + name = "Mean DC shift", + unit = "%", + sampleRate=self.samplerate(), + blockSize=None, + stepSize=None) + + # Set Data + result.data = numpy.round(numpy.mean(100*self.values),3) + return AnalyzerResultContainer(result) diff --git a/timeside/analyzer/level.py b/timeside/analyzer/level.py index ed585ce..bf061f4 100644 --- a/timeside/analyzer/level.py +++ b/timeside/analyzer/level.py @@ -58,8 +58,22 @@ class Level(Processor): return frames, eod def results(self): - max_level = AnalyzerResult(id = "max_level", name = "Max level", unit = "dBFS") - max_level.value = numpy.round(20*numpy.log10(self.max_value), 3) - rms_level = AnalyzerResult(id = "rms_level", name = "RMS level", unit = "dBFS") - rms_level.value = numpy.round(20*numpy.log10(numpy.sqrt(numpy.mean(self.mean_values))), 3) + # Max level + # FIXME : blockSize and stepSize are not appropriate here + attr = AnalyzerAttributes(id="max_level", + name="Max level", + unit = "dBFS", + sampleRate=self.samplerate()) + data = numpy.round(20*numpy.log10(self.max_value), 3) + max_level = AnalyzerResult(data, attr) + + # RMS level + # FIXME : blockSize and stepSize are not appropriate here + attr = AnalyzerAttributes(id="rms_level", + name="RMS level", + unit="dBFS", + sampleRate=self.samplerate()) + data = numpy.round(20*numpy.log10(numpy.sqrt(numpy.mean(self.mean_values))), 3) + rms_level = AnalyzerResult(data, attr) + return AnalyzerResultContainer([max_level, rms_level]) diff --git a/timeside/analyzer/yaafe.py b/timeside/analyzer/yaafe.py index 50c85bd..4887920 100644 --- a/timeside/analyzer/yaafe.py +++ b/timeside/analyzer/yaafe.py @@ -81,18 +81,26 @@ class Yaafe(Processor): def results(self): # Get back current container container = AnalyzerResultContainer() - # Get feature extraction results from yaafe + # Get feature extraction results from yaafe + map_keys = {'sampleRate': 'sampleRate', + 'frameLength': 'blockSize', + 'sampleStep': 'stepSize', + 'parameters': 'parameters', + } featNames = self.yaafe_engine.getOutputs().keys() for featName in featNames: + # Map Yaafe attributes into AnalyzerResults dict + res_dict = {map_keys[name]: self.yaafe_engine.getOutputs()['mfcc'][name] for name in map_keys.keys()} # Define ID fields - id = 'yaafe_' + featName - name = 'Yaafe ' + featName - unit = '' + res_dict['id'] = 'yaafe_' + featName + res_dict['name'] = 'Yaafe ' + featName + res_dict['unit'] = '' + # create AnalyzerResult and set its attributes + result = AnalyzerResult(attributes=res_dict) # Get results from Yaafe engine - result = AnalyzerResult(id = id, name = name, unit = unit) - result.value = self.yaafe_engine.readOutput(featName) # Read Yaafe Results + result.data = self.yaafe_engine.readOutput(featName) # Read Yaafe Results # Store results in Container - if len(result.value): + if len(result.data): container.add_result(result) return container