From 77410c7c6d0ec70d1b75352b70b8d9677d62fb6e Mon Sep 17 00:00:00 2001
From: Thomas Fillon <thomas@parisson.com>
Date: Tue, 25 Jun 2013 15:31:03 +0200
Subject: [PATCH] Add HDF5 I/O for AnalyzerResultContainer, Fixes #7

- Add functions from_hdf5 and to_hdf5 to AnalyzerResultContainer.
- Add corresponding tests
- Function to_hdf5(output_file) store an entire Container in an hdf5 file using one dataset for each AnalyzerResults and using dataset attributes to store 'unit' and 'name'. The dataset Name is the AnalyzerResult 'id'.
- Function from_hdf5(input_file) load the data store in a hdf5 file and return an AnalyzerResultContainer
---
 tests/test_AnalyzerResult.py | 12 ++++++++
 timeside/analyzer/core.py    | 59 ++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/tests/test_AnalyzerResult.py b/tests/test_AnalyzerResult.py
index 510fe67..8985f4c 100755
--- a/tests/test_AnalyzerResult.py
+++ b/tests/test_AnalyzerResult.py
@@ -137,6 +137,18 @@ class TestAnalyzerResultNumpy(TestAnalyzerResult):
         for i in range(len(d_numpy)):
             self.assertEquals(d_numpy[i], results[i])
 
+class TestAnalyzerResultHdf5(TestAnalyzerResult):
+    """ test AnalyzerResult hdf5 serialize """
+
+    def tearDown(self):
+        results = AnalyzerResultContainer([self.result])
+        results.to_hdf5('/tmp/t.h5')
+        res_hdf5 = results.from_hdf5('/tmp/t.h5')
+        if verbose:
+            print '%15s' % 'from hdf5:',
+            print res_hdf5
+        self.assertEquals(res_hdf5, results)
+
 class TestAnalyzerResultYaml(TestAnalyzerResult):
     """ test AnalyzerResult yaml serialize """
     def tearDown(self):
diff --git a/timeside/analyzer/core.py b/timeside/analyzer/core.py
index b1553b1..24f5a83 100644
--- a/timeside/analyzer/core.py
+++ b/timeside/analyzer/core.py
@@ -182,3 +182,62 @@ class AnalyzerResultContainer(object):
     def from_numpy(self, input_file):
         import numpy
         return numpy.load(input_file)
+
+    def to_hdf5(self, output_file, data_list = None):
+        if data_list == None: data_list = self.results
+        
+        import h5py
+        
+        # Open HDF5 file and save dataset
+        # TODO : Check self.results format
+        # as it asumes 'id', 'name', 'value' and 'units' keys
+        h5_file = h5py.File(output_file, 'w')  # overwrite any existing file
+        try:
+            for data in data_list:
+                # Save results in HDF5 Dataset
+                dset = h5_file.create_dataset(data['id'], data=data['value'])
+                # Save associated attributes
+                dset.attrs["unit"] = data['unit']
+                dset.attrs["name"] = data['name']
+        except TypeError:
+            print('TypeError for HDF5 serialization')
+        finally:        
+            h5_file.close()  # Close the HDF5 file 
+
+    def from_hdf5(self, input_file):
+        import h5py
+        
+        # Open HDF5 file for reading and get results
+        h5_file = h5py.File(input_file, 'r')
+        data_list = AnalyzerResultContainer()
+        try:
+            for name in h5_file.keys():
+                dset = h5_file.get(name)  # Read Dataset
+                id = name
+                # Read Attributes
+                unit = dset.attrs['unit']
+                name = dset.attrs['name']
+                # Create new AnalyzerResult        
+                data = AnalyzerResult(id = id, name = name, unit = unit)
+                
+                # Load value from the hdf5 dataset and store in data
+                # FIXME : the following conditional statement is to prevent 
+                # reading an empty dataset.
+                # see : https://github.com/h5py/h5py/issues/281
+                # It should be fixed by the next h5py version                
+                if dset.shape!=(0,): 
+                    data.value = dset[...]
+                else:
+                    data.value = []
+                    
+                # TODO : enable import from yaafe hdf5 format
+                #for attr_name in dset.attrs.keys():
+                #   data[attr_name] = dset.attrs[attr_name]
+                
+                data_list.add_result(data)
+        except TypeError:
+            print('TypeError for HDF5 serialization')
+        finally:
+            h5_file.close()  # Close the HDF5 file
+        
+        return data_list
\ No newline at end of file
-- 
2.39.5