Analyzer: hdf5 serialization -> Fix bug + code refactoring

author Thomas Fillon <thomas@parisson.com>

Fri, 29 Nov 2013 19:24:07 +0000 (20:24 +0100)

committer Thomas Fillon <thomas@parisson.com>

Sun, 1 Dec 2013 19:53:05 +0000 (20:53 +0100)
author Thomas Fillon <thomas@parisson.com>
Fri, 29 Nov 2013 19:24:07 +0000 (20:24 +0100)
committer Thomas Fillon <thomas@parisson.com>
Sun, 1 Dec 2013 19:53:05 +0000 (20:53 +0100)
diff --git a/tests/test_AnalyzerResult.py b/tests/test_AnalyzerResult.py

index 912cd5b536b177fb344c3d4a6c13a042bd5a2d06..c718ba6bf107b32032ea960690ebe91673928da3 100755 (executable)
--- a/tests/test_AnalyzerResult.py
+++ b/tests/test_AnalyzerResult.py
@@ -15,6 +15,7 @@ class TestAnalyzerResult(unittest.TestCase):
  
      def setUp(self):
          self.result = analyzer_result_factory(data_mode='value', time_mode='framewise')
+
          from datetime import datetime
          self.result.id_metadata = dict(date=datetime.now().replace(microsecond=0).isoformat(' '),
                                         version=__version__,
@@ -154,7 +155,7 @@ class TestAnalyzerResultNumpy(TestAnalyzerResult):
          if verbose:
              print '%15s' % 'from numpy:',
              print d_numpy
-        self.assertEquals(d_numpy, results)
+        self.assertEqual(d_numpy, results)
  
  
  class TestAnalyzerResultHdf5(TestAnalyzerResult):
@@ -167,7 +168,7 @@ class TestAnalyzerResultHdf5(TestAnalyzerResult):
          if verbose:
              print '%15s' % 'from hdf5:',
              print res_hdf5
-        self.assertEquals(res_hdf5, results)
+        self.assertEqual(results, res_hdf5)
  
  
  class TestAnalyzerResultYaml(TestAnalyzerResult):
@@ -183,7 +184,7 @@ class TestAnalyzerResultYaml(TestAnalyzerResult):
              print '%15s' % 'from yaml:',
              print d_yaml
          #for i in range(len(d_yaml)):
-        self.assertEquals(results, d_yaml)
+        self.assertEqual(results, d_yaml)
  
  
  class TestAnalyzerResultXml(TestAnalyzerResult):
@@ -201,7 +202,7 @@ class TestAnalyzerResultXml(TestAnalyzerResult):
              print d_xml
  
          #for i in range(len(d_xml)):
-        self.assertEquals(d_xml, results)
+        self.assertEqual(d_xml, results)
  
  
  class TestAnalyzerResultJson(TestAnalyzerResult):
@@ -222,7 +223,7 @@ class TestAnalyzerResultJson(TestAnalyzerResult):
              print '%15s' % 'from yaml:',
  
          #for i in range(len(d_json)):
-        self.assertEquals(d_json, results)
+        self.assertEqual(d_json, results)
  
  if __name__ == '__main__':
      unittest.main(testRunner=TestRunner())
 \ No newline at end of file
diff --git a/timeside/analyzer/core.py b/timeside/analyzer/core.py

index 81f57e73aaa3cf78505f6ffd92a6b67eb0ffa6fe..c18966746cf32d9c923b469dc0a2c09f12da924a 100644 (file)
--- a/timeside/analyzer/core.py
+++ b/timeside/analyzer/core.py
@@ -27,6 +27,8 @@ from timeside.core import Processor
  from timeside.__init__ import __version__
  import numpy
  from collections import OrderedDict
+import h5py
+import h5tools
  
  
  numpy_data_types = [
@@ -173,6 +175,12 @@ class MetadataObject(object):
              if child.text:
                  self[key] = ast.literal_eval(child.text)
  
+    def to_hdf5(self, h5group):
+        h5tools.dict_to_hdf5(self, h5group)
+
+    def from_hdf5(self, h5group):
+        h5tools.dict_from_hdf5(self, h5group)
+
  
  class IdMetadata(MetadataObject):
  
@@ -275,6 +283,19 @@ class LabelMetadata(MetadataObject):
                                    ('description', {}),
                                    ('label_type', 'mono')])
  
+    def to_hdf5(self, h5group):
+        """
+        Save a dictionnary-like object inside a h5 file group
+        """
+        # Write attributes
+        name = 'label_type'
+        if self.__getattribute__(name) is not None:
+            h5group.attrs[name] = self.__getattribute__(name)
+
+        for name in ['label', 'description']:
+            subgroup = h5group.create_group(name)
+            h5tools.dict_to_hdf5(self.__getattribute__(name), subgroup)
+
  
  class FrameMetadata(MetadataObject):
  
@@ -389,9 +410,43 @@ class DataObject(MetadataObject):
                  self[key] = numpy.asarray(ast.literal_eval(child.text),
                                            dtype=child.get('dtype'))
  
+    def to_hdf5(self, h5group):
+        # Write Datasets
+        for key in self.keys():
+            if self.__getattribute__(key) is None:
+                continue
+            if self.__getattribute__(key).dtype == 'object':
+                # Handle numpy type = object as vlen string
+                h5group.create_dataset(key,
+                                       data=self.__getattribute__(
+                                           key).tolist().__repr__(),
+                                       dtype=h5py.special_dtype(vlen=str))
+            else:
+                h5group.create_dataset(key, data=self.__getattribute__(key))
+
+    def from_hdf5(self, h5group):
+        for key, dataset in h5group.items():
+            # Load value from the hdf5 dataset and store in data
+            # FIXME : the following conditional statement is to prevent
+            # reading an empty dataset.
+            # see : https://github.com/h5py/h5py/issues/281
+            # It should be fixed by the next h5py version
+            if dataset.shape != (0,):
+                if h5py.check_dtype(vlen=dataset.dtype):
+                    # to deal with VLEN data used for list of
+                    # list
+                    self.__setattr__(key, eval(dataset[...].tolist()))
+                else:
+                    self.__setattr__(key, dataset[...])
+            else:
+                self.__setattr__(key, [])
+
  
  class AnalyzerParameters(dict):
  
+    def as_dict(self):
+        return self
+
      def to_xml(self):
          import xml.etree.ElementTree as ET
          root = ET.Element('Metadata')
@@ -410,9 +465,11 @@ class AnalyzerParameters(dict):
              if child.text:
                  self.set(child.tag, ast.literal_eval(child.text))
  
-    def as_dict(self):
-        return self
+    def to_hdf5(self, subgroup):
+        h5tools.dict_to_hdf5(self, subgroup)
  
+    def from_hdf5(self, h5group):
+        h5tools.dict_from_hdf5(self, h5group)
  
  
  class AnalyzerResult(MetadataObject):
@@ -473,7 +530,7 @@ class AnalyzerResult(MetadataObject):
              return
  
          elif name in self.keys():
-            if isinstance(value, dict) and value :
+            if isinstance(value, dict) and value:
                  for (sub_name, sub_value) in value.items():
                      self[name][sub_name] = sub_value
                  return
@@ -528,6 +585,22 @@ class AnalyzerResult(MetadataObject):
  
          return result
  
+    def to_hdf5(self, h5_file):
+        # Save results in HDF5 Dataset
+        group = h5_file.create_group(self.id_metadata.id)
+        group.attrs['data_mode'] = self.__getattribute__('data_mode')
+        group.attrs['time_mode'] = self.__getattribute__('time_mode')
+        for key in self.keys():
+            if key in ['data_mode', 'time_mode']:
+                continue
+            subgroup = group.create_group(key)
+            self.__getattribute__(key).to_hdf5(subgroup)
+
+    def from_hdf5(self, h5group):
+        # Read Sub-Group
+        for subgroup_name, h5subgroup in h5group.items():
+            self.__getattribute__(subgroup_name).from_hdf5(h5subgroup)
+
      @property
      def data_mode(self):
          return self._data_mode
@@ -868,41 +941,10 @@ class AnalyzerResultContainer(dict):
          return numpy.load(input_file)
  
      def to_hdf5(self, output_file):
-
-        import h5py
-
          # Open HDF5 file and save dataset (overwrite any existing file)
          with h5py.File(output_file, 'w') as h5_file:
              for res in self.values():
-                # Save results in HDF5 Dataset
-                group = h5_file.create_group(res.id_metadata.id)
-                group.attrs['data_mode'] = res['data_mode']
-                group.attrs['time_mode'] = res['time_mode']
-                for key in res.keys():
-                    if key not in ['data_mode', 'time_mode', 'data_object']:
-                        subgroup = group.create_group(key)
-
-                        # Write attributes
-                        attrs = res[key].keys()
-                        for name in attrs:
-                            if res[key][name] is not None:
-                                subgroup.attrs[name] = res[key][name]
-
-                # Write Datasets
-                key = 'data_object'
-                subgroup = group.create_group(key)
-                for dsetName in res[key].keys():
-                    if res[key][dsetName] is not None:
-                        if res[key][dsetName].dtype == 'object':
-                            # Handle numpy type = object as vlen string
-                            subgroup.create_dataset(dsetName,
-                                                    data=res[key][
-                                                        dsetName].tolist(
-                                                    ).__repr__(),
-                                                    dtype=h5py.special_dtype(vlen=str))
-                        else:
-                            subgroup.create_dataset(dsetName,
-                                                    data=res[key][dsetName])
+                res.to_hdf5(h5_file)
  
      def from_hdf5(self, input_file):
          import h5py
@@ -912,33 +954,11 @@ class AnalyzerResultContainer(dict):
          h5_file = h5py.File(input_file, 'r')
          data_list = AnalyzerResultContainer()
          try:
-            for (group_name, group) in h5_file.items():
+            for group in h5_file.values():
  
                  result = analyzer_result_factory(data_mode=group.attrs['data_mode'],
                                          time_mode=group.attrs['time_mode'])
-                # Read Sub-Group
-                for subgroup_name, subgroup in group.items():
-                    # Read attributes
-                    for name, value in subgroup.attrs.items():
-                            result[subgroup_name][name] = value
-
-                    if subgroup_name == 'data_object':
-                        for dsetName, dset in subgroup.items():
-                            # Load value from the hdf5 dataset and store in data
-                            # FIXME : the following conditional statement is to prevent
-                            # reading an empty dataset.
-                            # see : https://github.com/h5py/h5py/issues/281
-                            # It should be fixed by the next h5py version
-                            if dset.shape != (0,):
-                                if h5py.check_dtype(vlen=dset.dtype):
-                                    # to deal with VLEN data used for list of
-                                    # list
-                                    result[subgroup_name][dsetName] = eval(
-                                        dset[...].tolist())
-                                else:
-                                    result[subgroup_name][dsetName] = dset[...]
-                            else:
-                                result[subgroup_name][dsetName] = []
+                result.from_hdf5(group)
  
                  data_list.add(result)
          except TypeError:
diff --git a/timeside/analyzer/h5tools.py b/timeside/analyzer/h5tools.py

new file mode 100644 (file)

index 0000000..2db446e
--- /dev/null
+++ b/timeside/analyzer/h5tools.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2007-2013 Parisson SARL
+
+# This file is part of TimeSide.
+
+# TimeSide is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+
+# TimeSide is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with TimeSide.  If not, see <http://www.gnu.org/licenses/>.
+
+# Author:
+#   Thomas Fillon <thomas  at parisson.com>
+
+
+def dict_to_hdf5(dict_like, h5group):
+    """
+    Save a dictionnary-like object inside a h5 file group
+    """
+    # Write attributes
+    attrs = dict_like.keys()
+    for name in attrs:
+        if dict_like[name] is not None:
+            h5group.attrs[str(name)] = dict_like[name]
+
+
+def dict_from_hdf5(dict_like, h5group):
+    """
+    Load a dictionnary-like object from a h5 file group
+    """
+    # Read attributes
+    for name, value in h5group.attrs.items():
+        dict_like[name] = value
author	Thomas Fillon <thomas@parisson.com>
	Fri, 29 Nov 2013 19:24:07 +0000 (20:24 +0100)
committer	Thomas Fillon <thomas@parisson.com>
	Sun, 1 Dec 2013 19:53:05 +0000 (20:53 +0100)
tests/test_AnalyzerResult.py		patch \| blob \| history
timeside/analyzer/core.py		patch \| blob \| history
timeside/analyzer/h5tools.py	[new file with mode: 0644]	patch \| blob