From: Thomas Fillon Date: Fri, 29 Nov 2013 19:24:07 +0000 (+0100) Subject: Analyzer: hdf5 serialization -> Fix bug + code refactoring X-Git-Tag: 0.5.2~26 X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=23bb2de2ed35724e2e49db7ec04d973538bc6825;p=timeside.git Analyzer: hdf5 serialization -> Fix bug + code refactoring --- diff --git a/tests/test_AnalyzerResult.py b/tests/test_AnalyzerResult.py index 912cd5b..c718ba6 100755 --- a/tests/test_AnalyzerResult.py +++ b/tests/test_AnalyzerResult.py @@ -15,6 +15,7 @@ class TestAnalyzerResult(unittest.TestCase): def setUp(self): self.result = analyzer_result_factory(data_mode='value', time_mode='framewise') + from datetime import datetime self.result.id_metadata = dict(date=datetime.now().replace(microsecond=0).isoformat(' '), version=__version__, @@ -154,7 +155,7 @@ class TestAnalyzerResultNumpy(TestAnalyzerResult): if verbose: print '%15s' % 'from numpy:', print d_numpy - self.assertEquals(d_numpy, results) + self.assertEqual(d_numpy, results) class TestAnalyzerResultHdf5(TestAnalyzerResult): @@ -167,7 +168,7 @@ class TestAnalyzerResultHdf5(TestAnalyzerResult): if verbose: print '%15s' % 'from hdf5:', print res_hdf5 - self.assertEquals(res_hdf5, results) + self.assertEqual(results, res_hdf5) class TestAnalyzerResultYaml(TestAnalyzerResult): @@ -183,7 +184,7 @@ class TestAnalyzerResultYaml(TestAnalyzerResult): print '%15s' % 'from yaml:', print d_yaml #for i in range(len(d_yaml)): - self.assertEquals(results, d_yaml) + self.assertEqual(results, d_yaml) class TestAnalyzerResultXml(TestAnalyzerResult): @@ -201,7 +202,7 @@ class TestAnalyzerResultXml(TestAnalyzerResult): print d_xml #for i in range(len(d_xml)): - self.assertEquals(d_xml, results) + self.assertEqual(d_xml, results) class TestAnalyzerResultJson(TestAnalyzerResult): @@ -222,7 +223,7 @@ class TestAnalyzerResultJson(TestAnalyzerResult): print '%15s' % 'from yaml:', #for i in range(len(d_json)): - self.assertEquals(d_json, results) + self.assertEqual(d_json, results) if __name__ == '__main__': unittest.main(testRunner=TestRunner()) \ No newline at end of file diff --git a/timeside/analyzer/core.py b/timeside/analyzer/core.py index 81f57e7..c189667 100644 --- a/timeside/analyzer/core.py +++ b/timeside/analyzer/core.py @@ -27,6 +27,8 @@ from timeside.core import Processor from timeside.__init__ import __version__ import numpy from collections import OrderedDict +import h5py +import h5tools numpy_data_types = [ @@ -173,6 +175,12 @@ class MetadataObject(object): if child.text: self[key] = ast.literal_eval(child.text) + def to_hdf5(self, h5group): + h5tools.dict_to_hdf5(self, h5group) + + def from_hdf5(self, h5group): + h5tools.dict_from_hdf5(self, h5group) + class IdMetadata(MetadataObject): @@ -275,6 +283,19 @@ class LabelMetadata(MetadataObject): ('description', {}), ('label_type', 'mono')]) + def to_hdf5(self, h5group): + """ + Save a dictionnary-like object inside a h5 file group + """ + # Write attributes + name = 'label_type' + if self.__getattribute__(name) is not None: + h5group.attrs[name] = self.__getattribute__(name) + + for name in ['label', 'description']: + subgroup = h5group.create_group(name) + h5tools.dict_to_hdf5(self.__getattribute__(name), subgroup) + class FrameMetadata(MetadataObject): @@ -389,9 +410,43 @@ class DataObject(MetadataObject): self[key] = numpy.asarray(ast.literal_eval(child.text), dtype=child.get('dtype')) + def to_hdf5(self, h5group): + # Write Datasets + for key in self.keys(): + if self.__getattribute__(key) is None: + continue + if self.__getattribute__(key).dtype == 'object': + # Handle numpy type = object as vlen string + h5group.create_dataset(key, + data=self.__getattribute__( + key).tolist().__repr__(), + dtype=h5py.special_dtype(vlen=str)) + else: + h5group.create_dataset(key, data=self.__getattribute__(key)) + + def from_hdf5(self, h5group): + for key, dataset in h5group.items(): + # Load value from the hdf5 dataset and store in data + # FIXME : the following conditional statement is to prevent + # reading an empty dataset. + # see : https://github.com/h5py/h5py/issues/281 + # It should be fixed by the next h5py version + if dataset.shape != (0,): + if h5py.check_dtype(vlen=dataset.dtype): + # to deal with VLEN data used for list of + # list + self.__setattr__(key, eval(dataset[...].tolist())) + else: + self.__setattr__(key, dataset[...]) + else: + self.__setattr__(key, []) + class AnalyzerParameters(dict): + def as_dict(self): + return self + def to_xml(self): import xml.etree.ElementTree as ET root = ET.Element('Metadata') @@ -410,9 +465,11 @@ class AnalyzerParameters(dict): if child.text: self.set(child.tag, ast.literal_eval(child.text)) - def as_dict(self): - return self + def to_hdf5(self, subgroup): + h5tools.dict_to_hdf5(self, subgroup) + def from_hdf5(self, h5group): + h5tools.dict_from_hdf5(self, h5group) class AnalyzerResult(MetadataObject): @@ -473,7 +530,7 @@ class AnalyzerResult(MetadataObject): return elif name in self.keys(): - if isinstance(value, dict) and value : + if isinstance(value, dict) and value: for (sub_name, sub_value) in value.items(): self[name][sub_name] = sub_value return @@ -528,6 +585,22 @@ class AnalyzerResult(MetadataObject): return result + def to_hdf5(self, h5_file): + # Save results in HDF5 Dataset + group = h5_file.create_group(self.id_metadata.id) + group.attrs['data_mode'] = self.__getattribute__('data_mode') + group.attrs['time_mode'] = self.__getattribute__('time_mode') + for key in self.keys(): + if key in ['data_mode', 'time_mode']: + continue + subgroup = group.create_group(key) + self.__getattribute__(key).to_hdf5(subgroup) + + def from_hdf5(self, h5group): + # Read Sub-Group + for subgroup_name, h5subgroup in h5group.items(): + self.__getattribute__(subgroup_name).from_hdf5(h5subgroup) + @property def data_mode(self): return self._data_mode @@ -868,41 +941,10 @@ class AnalyzerResultContainer(dict): return numpy.load(input_file) def to_hdf5(self, output_file): - - import h5py - # Open HDF5 file and save dataset (overwrite any existing file) with h5py.File(output_file, 'w') as h5_file: for res in self.values(): - # Save results in HDF5 Dataset - group = h5_file.create_group(res.id_metadata.id) - group.attrs['data_mode'] = res['data_mode'] - group.attrs['time_mode'] = res['time_mode'] - for key in res.keys(): - if key not in ['data_mode', 'time_mode', 'data_object']: - subgroup = group.create_group(key) - - # Write attributes - attrs = res[key].keys() - for name in attrs: - if res[key][name] is not None: - subgroup.attrs[name] = res[key][name] - - # Write Datasets - key = 'data_object' - subgroup = group.create_group(key) - for dsetName in res[key].keys(): - if res[key][dsetName] is not None: - if res[key][dsetName].dtype == 'object': - # Handle numpy type = object as vlen string - subgroup.create_dataset(dsetName, - data=res[key][ - dsetName].tolist( - ).__repr__(), - dtype=h5py.special_dtype(vlen=str)) - else: - subgroup.create_dataset(dsetName, - data=res[key][dsetName]) + res.to_hdf5(h5_file) def from_hdf5(self, input_file): import h5py @@ -912,33 +954,11 @@ class AnalyzerResultContainer(dict): h5_file = h5py.File(input_file, 'r') data_list = AnalyzerResultContainer() try: - for (group_name, group) in h5_file.items(): + for group in h5_file.values(): result = analyzer_result_factory(data_mode=group.attrs['data_mode'], time_mode=group.attrs['time_mode']) - # Read Sub-Group - for subgroup_name, subgroup in group.items(): - # Read attributes - for name, value in subgroup.attrs.items(): - result[subgroup_name][name] = value - - if subgroup_name == 'data_object': - for dsetName, dset in subgroup.items(): - # Load value from the hdf5 dataset and store in data - # FIXME : the following conditional statement is to prevent - # reading an empty dataset. - # see : https://github.com/h5py/h5py/issues/281 - # It should be fixed by the next h5py version - if dset.shape != (0,): - if h5py.check_dtype(vlen=dset.dtype): - # to deal with VLEN data used for list of - # list - result[subgroup_name][dsetName] = eval( - dset[...].tolist()) - else: - result[subgroup_name][dsetName] = dset[...] - else: - result[subgroup_name][dsetName] = [] + result.from_hdf5(group) data_list.add(result) except TypeError: diff --git a/timeside/analyzer/h5tools.py b/timeside/analyzer/h5tools.py new file mode 100644 index 0000000..2db446e --- /dev/null +++ b/timeside/analyzer/h5tools.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2007-2013 Parisson SARL + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: +# Thomas Fillon + + +def dict_to_hdf5(dict_like, h5group): + """ + Save a dictionnary-like object inside a h5 file group + """ + # Write attributes + attrs = dict_like.keys() + for name in attrs: + if dict_like[name] is not None: + h5group.attrs[str(name)] = dict_like[name] + + +def dict_from_hdf5(dict_like, h5group): + """ + Load a dictionnary-like object from a h5 file group + """ + # Read attributes + for name, value in h5group.attrs.items(): + dict_like[name] = value