]> git.parisson.com Git - timeside-diadems.git/commitdiff
switch IRIT analyzers to the new Analyzers structure
authorThomas Fillon <thomas@parisson.com>
Tue, 15 Oct 2013 21:42:23 +0000 (23:42 +0200)
committerThomas Fillon <thomas@parisson.com>
Tue, 15 Oct 2013 21:42:23 +0000 (23:42 +0200)
timeside/analyzer/__init__.py
timeside/analyzer/irit_speech_4hz.py
timeside/analyzer/irit_speech_entropy.py

index d9c7444db0d74757a17b64bc7f8a9984f612dc4f..663ce6497cd7804d76f115053cfe318d97aac86e 100644 (file)
@@ -11,5 +11,5 @@ from yaafe import * # TF : add Yaafe analyzer
 from spectrogram import Spectrogram
 from waveform import Waveform
 from vamp_plugin import VampSimpleHost
-#from irit_speech_entropy import *
-#from irit_speech_4hz import *
+from irit_speech_entropy import IRITSpeechEntropy
+from irit_speech_4hz import IRITSpeech4Hz
index c3f0923a8f0bc2bf4f30376a2042423d1f8210e4..8d054872157ee16e9ca7e75f9102adcb11cca101 100644 (file)
 
 # Author: Maxime Le Coz <lecoz@irit.fr>
 
-from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter
-from timeside.analyzer.core import *
+from timeside.core import implements, interfacedoc
+from timeside.analyzer.core import Analyzer
+from timeside.analyzer.utils import melFilterBank, computeModulation
+from timeside.analyzer.utils import segmentFromValues
 from timeside.api import IAnalyzer
-from numpy import array,hamming,dot,mean
+from numpy import array, hamming, dot, mean, float
 from numpy.fft import rfft
-from scipy.signal import firwin,lfilter
+from scipy.signal import firwin, lfilter
 
 
-class IRITSpeech4Hz(Processor):
+class IRITSpeech4Hz(Analyzer):
     implements(IAnalyzer)
     '''
     Segmentor based on the analysis of the 4Hz energy modulation.
 
     Properties:
-               - energy4hz             (list)          : List of the 4Hz energy by frame for the modulation computation
-               - threshold             (float)         : Threshold for the classification Speech/NonSpeech
-               - frequency_center      (float)         : Center of the frequency range where the energy is extracted
-               - frequency_width       (float)         : Width of the frequency range where the energy is extracted
-               - orderFilter           (int)           : Order of the pass-band filter extracting the frequency range
-               - normalizeEnergy       (boolean)       : Whether the energy must be normalized or not
-               - nFFT                          (int)           : Number of points for the FFT. Better if 512 <= nFFT <= 2048
-               - nbFilters                     (int)           : Length of the Mel Filter bank
-               - melFilter             (numpy array)   : Mel Filter bank
-               - modulLen                      (float)         : Length (in second) of the modulation computation window
+        - energy4hz            (list)          : List of the 4Hz energy by frame for the modulation computation
+        - threshold            (float)         : Threshold for the classification Speech/NonSpeech
+        - frequency_center     (float)         : Center of the frequency range where the energy is extracted
+        - frequency_width      (float)         : Width of the frequency range where the energy is extracted
+        - orderFilter          (int)           : Order of the pass-band filter extracting the frequency range
+        - normalizeEnergy      (boolean)       : Whether the energy must be normalized or not
+        - nFFT                                 (int)           : Number of points for the FFT. Better if 512 <= nFFT <= 2048
+        - nbFilters                    (int)           : Length of the Mel Filter bank
+        - melFilter            (numpy array)   : Mel Filter bank
+        - modulLen                     (float)         : Length (in second) of the modulation computation window
     '''
 
     @interfacedoc
     def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
-        super(IRITSpeech4Hz, self).setup(channels, samplerate, blocksize, totalframes)
+        super(IRITSpeech4Hz, self).setup(
+            channels, samplerate, blocksize, totalframes)
         self.energy4hz = []
-        print "top"
         # Classification
         self.threshold = 2.0
 
         # Pass-band Filter
         self.frequency_center = 4.0
         self.frequency_width = 0.5
-        self.orderFilter=100
-
+        self.orderFilter = 100
 
         self.normalizeEnergy = True
-        self.nFFT=2048
-        self.nbFilters =30
+        self.nFFT = 2048
+        self.nbFilters = 30
         self.modulLen = 2.0
-        self.melFilter = melFilterBank(self.nbFilters,self.nFFT,samplerate);
+        self.melFilter = melFilterBank(self.nbFilters, self.nFFT, samplerate)
 
     @staticmethod
     @interfacedoc
@@ -73,7 +74,7 @@ class IRITSpeech4Hz(Processor):
     @staticmethod
     @interfacedoc
     def name():
-        return "Speech entropy (IRIT)"
+        return "IRIT Speech 4Hz Modulation"
 
     @staticmethod
     @interfacedoc
@@ -84,61 +85,78 @@ class IRITSpeech4Hz(Processor):
         return "Speech confidences indexes"
 
     def process(self, frames, eod=False):
-               '''
-                               
-               '''
-               
-               frames = frames.T[0]
-               # windowing of the frame (could be a changeable property)
-               w = frames * hamming(len(frames));
-               
-               # Mel scale spectrum extraction
-               f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT])
-               e = dot(f**2,self.melFilter)
-               
-               self.energy4hz.append(e)
-               
-               return frames, eod
-
-    def results(self):
-       '''
-               
-       '''     
-       print "Results"
-       # Creation of the pass-band filter      
-       Wo = self.frequency_center/self.samplerate()  ;
-       Wn = [ Wo-(self.frequency_width/2)/self.samplerate() , Wo+(self.frequency_width/2)/self.samplerate()];
-       num = firwin(self.orderFilter, Wn,pass_zero=False);
-               
-               
-       # Energy on the frequency range
-       self.energy4hz=numpy.array(self.energy4hz)              
-       energy = lfilter(num,1,self.energy4hz.T,0)
-       energy = sum(energy)
-               
-       # Normalization
-       if self.normalizeEnergy :
-               energy =energy/mean(energy)
-                       
-       # Energy Modulation
-       frameLenModulation = int(self.modulLen*self.samplerate()/self.blocksize())
-       modEnergyValue =computeModulation(energy,frameLenModulation,True)
-               
-       # Confidence Index      
-       conf = array(modEnergyValue-self.threshold)/self.threshold
-       conf[conf>1] = 1
-
-       modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
-       modEnergy.value = conf
-       convert = {False:'NonSpeech',True:'Speech'}
-               
-       segList = segmentFromValues(modEnergyValue>self.threshold)
-       segmentsEntropy =[]
-        for s in segList :
-            segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
-                                    numpy.float(s[1])*self.blocksize()/self.samplerate(),
-                                    convert[s[2]]))
-
-        segs = AnalyzerResult(id="irit_4hzenergy_segments", name="seg 4Hz (IRIT)", unit="s")
-        segs.value = segmentsEntropy
-        return AnalyzerResultContainer([modEnergy,segs])
+        '''
+
+        '''
+
+        frames = frames.T[0]
+        # windowing of the frame (could be a changeable property)
+        w = frames * hamming(len(frames))
+
+        # Mel scale spectrum extraction
+        f = abs(rfft(w, n=2 * self.nFFT)[0:self.nFFT])
+        e = dot(f ** 2, self.melFilter)
+
+        self.energy4hz.append(e)
+
+        return frames, eod
+
+    def release(self):
+        '''
+
+        '''
+        # Creation of the pass-band filter
+        Wo = self.frequency_center / self.samplerate()
+        Wn = [Wo - (self.frequency_width / 2) / self.samplerate(),
+              Wo + (self.frequency_width / 2) / self.samplerate()]
+        num = firwin(self.orderFilter, Wn, pass_zero=False)
+
+        # Energy on the frequency range
+        self.energy4hz = array(self.energy4hz)
+        energy = lfilter(num, 1, self.energy4hz.T, 0)
+        energy = sum(energy)
+
+        # Normalization
+        if self.normalizeEnergy:
+            energy = energy / mean(energy)
+
+        # Energy Modulation
+        frameLenModulation = int(
+            self.modulLen * self.samplerate() / self.blocksize())
+        modEnergyValue = computeModulation(energy, frameLenModulation, True)
+
+        # Confidence Index
+        conf = array(modEnergyValue - self.threshold) / self.threshold
+        conf[conf > 1] = 1
+
+        modEnergy = self.new_result(data_mode='value', time_mode='framewise')
+        modEnergy.id_metadata.id += '.' + 'energy_confidence'
+        modEnergy.id_metadata.name += ' ' + 'Energy Confidence'
+
+        modEnergy.data_object.value = conf
+
+        self._results.add(modEnergy)
+
+        # Segment
+        convert = {False: 0, True: 1}
+        label = {0: 'nonSpeech', 1: 'Speech'}
+
+        segList = segmentFromValues(modEnergyValue > self.threshold)
+
+        segs = self.new_result(data_mode='label', time_mode='segment')
+        segs.id_metadata.id += '.' + 'segments'
+        segs.id_metadata.name += ' ' + 'Segments'
+
+        segs.label_metadata.label = label
+
+        segs.data_object.label = [convert[s[2]] for s in segList]
+        segs.data_object.time = [(float(s[0]) * self.blocksize() /
+                                  self.samplerate())
+                                  for s in segList]
+        segs.data_object.duration = [(float(s[1]-s[0]) * self.blocksize() /
+                                  self.samplerate())
+                                  for s in segList]
+
+        self._results.add(segs)
+
+        return
index 73ff62b61ffd7b68df0d4ff8c24cecf83f25600f..bc034b45a1509863694d8eca234e550ae1beebd7 100644 (file)
 # Author: Maxime Le Coz <lecoz@irit.fr>
 
 from timeside.core import Processor, implements, interfacedoc
-from timeside.analyzer.core import *
+from timeside.analyzer.core import Analyzer
+from timeside.analyzer.utils import entropy, computeModulation
+from timeside.analyzer.utils import segmentFromValues
 from timeside.api import IAnalyzer
 from numpy import array
 from scipy.ndimage.morphology import binary_opening
 
 
-class IRITSpeechEntropy(Processor):
+class IRITSpeechEntropy(Analyzer):
     implements(IAnalyzer)
 
     @interfacedoc
     def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
-        super(IRITSpeechEntropy, self).setup(channels, samplerate, blocksize, totalframes)
+        super(IRITSpeechEntropy, self).setup(
+            channels, samplerate, blocksize, totalframes)
         self.entropyValue = []
         self.threshold = 0.4
         self.smoothLen = 5
@@ -45,7 +48,7 @@ class IRITSpeechEntropy(Processor):
     @staticmethod
     @interfacedoc
     def name():
-        return "Speech entropy (IRIT)"
+        return "IRIT Speech entropy"
 
     @staticmethod
     @interfacedoc
@@ -59,30 +62,47 @@ class IRITSpeechEntropy(Processor):
         self.entropyValue.append(entropy(frames))
         return frames, eod
 
-    def results(self):
+    def release(self):
 
-        entropyValue = numpy.array(self.entropyValue)
-        w = self.modulLen*self.samplerate()/self.blocksize()
-        modulentropy = computeModulation(entropyValue,w,False)
-        confEntropy=  array(modulentropy-self.threshold)/self.threshold
-        confEntropy[confEntropy>1] = 1
+        entropyValue = array(self.entropyValue)
+        w = self.modulLen * self.samplerate() / self.blocksize()
+        modulentropy = computeModulation(entropyValue, w, False)
+        confEntropy = array(modulentropy - self.threshold) / self.threshold
+        confEntropy[confEntropy > 1] = 1
 
-        conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?")
-        conf.value = confEntropy
+        conf = self.new_result(data_mode='value', time_mode='framewise')
 
+        conf.id_metadata.id += '.' + 'confidence'
+        conf.id_metadata.name += ' ' + 'Confidence'
+
+        conf.data_object.value = confEntropy
+        self._results.add(conf)
+
+        # Binary Entropy
         binaryEntropy = modulentropy > self.threshold
-        binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2))
+        binaryEntropy = binary_opening(
+            binaryEntropy, [1] * (self.smoothLen * 2))
 
-        convert = {False:'NonSpeech',True:'Speech'}
+        convert = {False: 0, True: 1}
+        label = {0: 'NonSpeech', 1: 'Speech'}
         segList = segmentFromValues(binaryEntropy)
 
-        segmentsEntropy =[]
-        for s in segList :
-            segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
-                                    numpy.float(s[1])*self.blocksize()/self.samplerate(),
-                                    convert[s[2]]))
 
-        segs = AnalyzerResult(id="irit_entropy_segments", name="seg entropy (IRIT)", unit="s")
-        segs.value = segmentsEntropy
 
-        return AnalyzerResultContainer([conf, segs])
+        segs = self.new_result(data_mode='label', time_mode='segment')
+        segs.id_metadata.id += '.' + 'segments'
+        segs.id_metadata.name += ' ' + 'Segments'
+
+        segs.data_object.label = segList
+
+        segs.data_object.label = [convert[s[2]] for s in segList]
+        segs.data_object.time = [(float(s[0]) * self.blocksize() /
+                                  self.samplerate())
+                                  for s in segList]
+        segs.data_object.duration = [(float(s[1]-s[0]) * self.blocksize() /
+                                  self.samplerate())
+                                  for s in segList]
+
+        self._results.add(segs)
+
+        return