From 35209fd0ec9dd54d9fe9a98b63b66c30c7bed715 Mon Sep 17 00:00:00 2001
From: Thomas Fillon <thomas@parisson.com>
Date: Tue, 15 Oct 2013 23:42:23 +0200
Subject: [PATCH] switch IRIT analyzers to the new Analyzers structure

---
 timeside/analyzer/__init__.py            |   4 +-
 timeside/analyzer/irit_speech_4hz.py     | 180 +++++++++++++----------
 timeside/analyzer/irit_speech_entropy.py |  64 +++++---
 3 files changed, 143 insertions(+), 105 deletions(-)

diff --git a/timeside/analyzer/__init__.py b/timeside/analyzer/__init__.py
index d9c7444..663ce64 100644
--- a/timeside/analyzer/__init__.py
+++ b/timeside/analyzer/__init__.py
@@ -11,5 +11,5 @@ from yaafe import * # TF : add Yaafe analyzer
 from spectrogram import Spectrogram
 from waveform import Waveform
 from vamp_plugin import VampSimpleHost
-#from irit_speech_entropy import *
-#from irit_speech_4hz import *
+from irit_speech_entropy import IRITSpeechEntropy
+from irit_speech_4hz import IRITSpeech4Hz
diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py
index c3f0923..8d05487 100644
--- a/timeside/analyzer/irit_speech_4hz.py
+++ b/timeside/analyzer/irit_speech_4hz.py
@@ -19,51 +19,52 @@
 
 # Author: Maxime Le Coz <lecoz@irit.fr>
 
-from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter
-from timeside.analyzer.core import *
+from timeside.core import implements, interfacedoc
+from timeside.analyzer.core import Analyzer
+from timeside.analyzer.utils import melFilterBank, computeModulation
+from timeside.analyzer.utils import segmentFromValues
 from timeside.api import IAnalyzer
-from numpy import array,hamming,dot,mean
+from numpy import array, hamming, dot, mean, float
 from numpy.fft import rfft
-from scipy.signal import firwin,lfilter
+from scipy.signal import firwin, lfilter
 
 
-class IRITSpeech4Hz(Processor):
+class IRITSpeech4Hz(Analyzer):
     implements(IAnalyzer)
     '''
     Segmentor based on the analysis of the 4Hz energy modulation.
 
     Properties:
-		- energy4hz 		(list) 		: List of the 4Hz energy by frame for the modulation computation
-		- threshold 		(float) 	: Threshold for the classification Speech/NonSpeech
-		- frequency_center	(float)		: Center of the frequency range where the energy is extracted
-		- frequency_width	(float)		: Width of the frequency range where the energy is extracted
-		- orderFilter		(int)		: Order of the pass-band filter extracting the frequency range
-		- normalizeEnergy	(boolean)	: Whether the energy must be normalized or not
-		- nFFT 				(int)		: Number of points for the FFT. Better if 512 <= nFFT <= 2048
-		- nbFilters			(int)		: Length of the Mel Filter bank
-		- melFilter		(numpy array)	: Mel Filter bank
-		- modulLen			(float)		: Length (in second) of the modulation computation window
+        - energy4hz 		(list) 		: List of the 4Hz energy by frame for the modulation computation
+        - threshold 		(float) 	: Threshold for the classification Speech/NonSpeech
+        - frequency_center	(float)		: Center of the frequency range where the energy is extracted
+        - frequency_width	(float)		: Width of the frequency range where the energy is extracted
+        - orderFilter		(int)		: Order of the pass-band filter extracting the frequency range
+        - normalizeEnergy	(boolean)	: Whether the energy must be normalized or not
+        - nFFT 				(int)		: Number of points for the FFT. Better if 512 <= nFFT <= 2048
+        - nbFilters			(int)		: Length of the Mel Filter bank
+        - melFilter		(numpy array)	: Mel Filter bank
+        - modulLen			(float)		: Length (in second) of the modulation computation window
     '''
 
     @interfacedoc
     def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
-        super(IRITSpeech4Hz, self).setup(channels, samplerate, blocksize, totalframes)
+        super(IRITSpeech4Hz, self).setup(
+            channels, samplerate, blocksize, totalframes)
         self.energy4hz = []
-        print "top"
         # Classification
         self.threshold = 2.0
 
         # Pass-band Filter
         self.frequency_center = 4.0
         self.frequency_width = 0.5
-        self.orderFilter=100
-
+        self.orderFilter = 100
 
         self.normalizeEnergy = True
-        self.nFFT=2048
-        self.nbFilters =30
+        self.nFFT = 2048
+        self.nbFilters = 30
         self.modulLen = 2.0
-        self.melFilter = melFilterBank(self.nbFilters,self.nFFT,samplerate);
+        self.melFilter = melFilterBank(self.nbFilters, self.nFFT, samplerate)
 
     @staticmethod
     @interfacedoc
@@ -73,7 +74,7 @@ class IRITSpeech4Hz(Processor):
     @staticmethod
     @interfacedoc
     def name():
-        return "Speech entropy (IRIT)"
+        return "IRIT Speech 4Hz Modulation"
 
     @staticmethod
     @interfacedoc
@@ -84,61 +85,78 @@ class IRITSpeech4Hz(Processor):
         return "Speech confidences indexes"
 
     def process(self, frames, eod=False):
-		'''
-				
-		'''
-		
-		frames = frames.T[0]
-		# windowing of the frame (could be a changeable property)
-		w = frames * hamming(len(frames));
-		
-		# Mel scale spectrum extraction
-		f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT])
-		e = dot(f**2,self.melFilter)
-		
-		self.energy4hz.append(e)
-		
-		return frames, eod
-
-    def results(self):
-	'''
-		
-	'''	
-	print "Results"
-	# Creation of the pass-band filter	
-	Wo = self.frequency_center/self.samplerate()  ;
-	Wn = [ Wo-(self.frequency_width/2)/self.samplerate() , Wo+(self.frequency_width/2)/self.samplerate()];
-	num = firwin(self.orderFilter, Wn,pass_zero=False);
-		
-		
-	# Energy on the frequency range
-	self.energy4hz=numpy.array(self.energy4hz)		
-	energy = lfilter(num,1,self.energy4hz.T,0)
-	energy = sum(energy)
-		
-	# Normalization
-	if self.normalizeEnergy :
-		energy =energy/mean(energy)
-			
-	# Energy Modulation
-	frameLenModulation = int(self.modulLen*self.samplerate()/self.blocksize())
-	modEnergyValue =computeModulation(energy,frameLenModulation,True)
-		
-	# Confidence Index	
-	conf = array(modEnergyValue-self.threshold)/self.threshold
-	conf[conf>1] = 1
-
-	modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
-	modEnergy.value = conf
-	convert = {False:'NonSpeech',True:'Speech'}
-		
-	segList = segmentFromValues(modEnergyValue>self.threshold)
-	segmentsEntropy =[]
-        for s in segList :
-            segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
-                                    numpy.float(s[1])*self.blocksize()/self.samplerate(),
-                                    convert[s[2]]))
-
-        segs = AnalyzerResult(id="irit_4hzenergy_segments", name="seg 4Hz (IRIT)", unit="s")
-        segs.value = segmentsEntropy
-        return AnalyzerResultContainer([modEnergy,segs])
+        '''
+
+        '''
+
+        frames = frames.T[0]
+        # windowing of the frame (could be a changeable property)
+        w = frames * hamming(len(frames))
+
+        # Mel scale spectrum extraction
+        f = abs(rfft(w, n=2 * self.nFFT)[0:self.nFFT])
+        e = dot(f ** 2, self.melFilter)
+
+        self.energy4hz.append(e)
+
+        return frames, eod
+
+    def release(self):
+        '''
+
+        '''
+        # Creation of the pass-band filter
+        Wo = self.frequency_center / self.samplerate()
+        Wn = [Wo - (self.frequency_width / 2) / self.samplerate(),
+              Wo + (self.frequency_width / 2) / self.samplerate()]
+        num = firwin(self.orderFilter, Wn, pass_zero=False)
+
+        # Energy on the frequency range
+        self.energy4hz = array(self.energy4hz)
+        energy = lfilter(num, 1, self.energy4hz.T, 0)
+        energy = sum(energy)
+
+        # Normalization
+        if self.normalizeEnergy:
+            energy = energy / mean(energy)
+
+        # Energy Modulation
+        frameLenModulation = int(
+            self.modulLen * self.samplerate() / self.blocksize())
+        modEnergyValue = computeModulation(energy, frameLenModulation, True)
+
+        # Confidence Index
+        conf = array(modEnergyValue - self.threshold) / self.threshold
+        conf[conf > 1] = 1
+
+        modEnergy = self.new_result(data_mode='value', time_mode='framewise')
+        modEnergy.id_metadata.id += '.' + 'energy_confidence'
+        modEnergy.id_metadata.name += ' ' + 'Energy Confidence'
+
+        modEnergy.data_object.value = conf
+
+        self._results.add(modEnergy)
+
+        # Segment
+        convert = {False: 0, True: 1}
+        label = {0: 'nonSpeech', 1: 'Speech'}
+
+        segList = segmentFromValues(modEnergyValue > self.threshold)
+
+        segs = self.new_result(data_mode='label', time_mode='segment')
+        segs.id_metadata.id += '.' + 'segments'
+        segs.id_metadata.name += ' ' + 'Segments'
+
+        segs.label_metadata.label = label
+
+        segs.data_object.label = [convert[s[2]] for s in segList]
+        segs.data_object.time = [(float(s[0]) * self.blocksize() /
+                                  self.samplerate())
+                                  for s in segList]
+        segs.data_object.duration = [(float(s[1]-s[0]) * self.blocksize() /
+                                  self.samplerate())
+                                  for s in segList]
+
+        self._results.add(segs)
+
+        return
diff --git a/timeside/analyzer/irit_speech_entropy.py b/timeside/analyzer/irit_speech_entropy.py
index 73ff62b..bc034b4 100644
--- a/timeside/analyzer/irit_speech_entropy.py
+++ b/timeside/analyzer/irit_speech_entropy.py
@@ -20,18 +20,21 @@
 # Author: Maxime Le Coz <lecoz@irit.fr>
 
 from timeside.core import Processor, implements, interfacedoc
-from timeside.analyzer.core import *
+from timeside.analyzer.core import Analyzer
+from timeside.analyzer.utils import entropy, computeModulation
+from timeside.analyzer.utils import segmentFromValues
 from timeside.api import IAnalyzer
 from numpy import array
 from scipy.ndimage.morphology import binary_opening
 
 
-class IRITSpeechEntropy(Processor):
+class IRITSpeechEntropy(Analyzer):
     implements(IAnalyzer)
 
     @interfacedoc
     def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
-        super(IRITSpeechEntropy, self).setup(channels, samplerate, blocksize, totalframes)
+        super(IRITSpeechEntropy, self).setup(
+            channels, samplerate, blocksize, totalframes)
         self.entropyValue = []
         self.threshold = 0.4
         self.smoothLen = 5
@@ -45,7 +48,7 @@ class IRITSpeechEntropy(Processor):
     @staticmethod
     @interfacedoc
     def name():
-        return "Speech entropy (IRIT)"
+        return "IRIT Speech entropy"
 
     @staticmethod
     @interfacedoc
@@ -59,30 +62,47 @@ class IRITSpeechEntropy(Processor):
         self.entropyValue.append(entropy(frames))
         return frames, eod
 
-    def results(self):
+    def release(self):
 
-        entropyValue = numpy.array(self.entropyValue)
-        w = self.modulLen*self.samplerate()/self.blocksize()
-        modulentropy = computeModulation(entropyValue,w,False)
-        confEntropy=  array(modulentropy-self.threshold)/self.threshold
-        confEntropy[confEntropy>1] = 1
+        entropyValue = array(self.entropyValue)
+        w = self.modulLen * self.samplerate() / self.blocksize()
+        modulentropy = computeModulation(entropyValue, w, False)
+        confEntropy = array(modulentropy - self.threshold) / self.threshold
+        confEntropy[confEntropy > 1] = 1
 
-        conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?")
-        conf.value = confEntropy
+        conf = self.new_result(data_mode='value', time_mode='framewise')
 
+        conf.id_metadata.id += '.' + 'confidence'
+        conf.id_metadata.name += ' ' + 'Confidence'
+
+        conf.data_object.value = confEntropy
+        self._results.add(conf)
+
+        # Binary Entropy
         binaryEntropy = modulentropy > self.threshold
-        binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2))
+        binaryEntropy = binary_opening(
+            binaryEntropy, [1] * (self.smoothLen * 2))
 
-        convert = {False:'NonSpeech',True:'Speech'}
+        convert = {False: 0, True: 1}
+        label = {0: 'NonSpeech', 1: 'Speech'}
         segList = segmentFromValues(binaryEntropy)
 
-        segmentsEntropy =[]
-        for s in segList :
-            segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
-                                    numpy.float(s[1])*self.blocksize()/self.samplerate(),
-                                    convert[s[2]]))
 
-        segs = AnalyzerResult(id="irit_entropy_segments", name="seg entropy (IRIT)", unit="s")
-        segs.value = segmentsEntropy
 
-        return AnalyzerResultContainer([conf, segs])
+        segs = self.new_result(data_mode='label', time_mode='segment')
+        segs.id_metadata.id += '.' + 'segments'
+        segs.id_metadata.name += ' ' + 'Segments'
+
+        segs.data_object.label = segList
+
+        segs.data_object.label = [convert[s[2]] for s in segList]
+        segs.data_object.time = [(float(s[0]) * self.blocksize() /
+                                  self.samplerate())
+                                  for s in segList]
+        segs.data_object.duration = [(float(s[1]-s[0]) * self.blocksize() /
+                                  self.samplerate())
+                                  for s in segList]
+
+        self._results.add(segs)
+
+        return
-- 
2.39.5