From: LeCoz <Maxime.lecoz@Gmail.com>
Date: Thu, 11 Jul 2013 11:48:48 +0000 (+0100)
Subject: Correction of the bug on the segmentation from value
X-Git-Tag: 0.5.0~31^2
X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=36a4f9e4f466617c5f7382f69a043748520ed047;p=timeside.git

Correction of the bug on the segmentation from value
Comments on the 4Hz Irit Speech segmentor
---

diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py
index 9605e33..5658916 100644
--- a/timeside/analyzer/irit_speech_4hz.py
+++ b/timeside/analyzer/irit_speech_4hz.py
@@ -31,21 +31,42 @@ from matplotlib import pylab
 
 class IRITSpeech4Hz(Processor):
     implements(IValueAnalyzer)
+    '''
+    Segmentor based on the analysis of the 4Hz energy modulation.
+
+    Properties:
+		- energy4hz 		(list) 		: List of the 4Hz energy by frame for the modulation computation
+		- threshold 		(float) 	: Threshold for the classification Speech/NonSpeech
+		- frequency_center	(float)		: Center of the frequency range where the energy is extracted
+		- frequency_width	(float)		: Width of the frequency range where the energy is extracted
+		- orderFilter		(int)		: Order of the pass-band filter extracting the frequency range
+		- normalizeEnergy	(boolean)	: Whether the energy must be normalized or not 
+		- nFFT 				(int)		: Number of points for the FFT. Better if 512 <= nFFT <= 2048
+		- nbFilters			(int)		: Length of the Mel Filter bank 
+		- melFilter		(numpy array)	: Mel Filter bank 
+		- modulLen			(float)		: Length (in second) of the modulation computation window 
+    '''
 
     @interfacedoc
     def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
         super(IRITSpeech4Hz, self).setup(channels, samplerate, blocksize, totalframes)
         self.energy4hz = []
+        print "top"
+        # Classification
         self.threshold = 2.0
-        self.smoothLen = 5
-        self.fCenter = 4.0
+        
+        # Pass-band Filter 
+        self.frequency_center = 4.0
+        self.frequency_width = 0.5
+        self.orderFilter=100
+
+
         self.normalizeEnergy = True
         self.nFFT=2048
-        self.orderFilter=100
         self.nbFilters =30
-        self.modulLen = 2
-        self.fwidth = 0.5
+        self.modulLen = 2.0
         self.melFilter = melFilterBank(self.nbFilters,self.nFFT,samplerate);
+
     @staticmethod
     @interfacedoc
     def id():
@@ -65,45 +86,56 @@ class IRITSpeech4Hz(Processor):
         return "Speech confidences indexes"
 
     def process(self, frames, eod=False):
-        '''
-
-        '''
+		'''
+				
+		'''
+		
+		frames = frames.T[0]
+		# windowing of the frame (could be a changeable property)
+		w = frames * hamming(len(frames));
+		
+		# Mel scale spectrum extraction
+		f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT])
+		e = dot(f**2,self.melFilter)
+		
+		self.energy4hz.append(e)
+		
+		return frames, eod
         
-        frames = frames.T[0]
-        w = frames * hamming(len(frames))
-        f = abs(rfft(w, n=2*self.nFFT)[0:self.nFFT])
-        e = dot(f**2, self.melFilter)
-        self.energy4hz.append(e)
-        return frames, eod
-
     def results(self):
-        '''
-
-        '''
-        #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16))
-
-        Wo = self.fCenter/self.samplerate()
-        Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()]
-        num = firwin(self.orderFilter, Wn, pass_zero=False)
-        self.energy4hz=numpy.array(self.energy4hz)
-        energy = lfilter(num, 1, self.energy4hz.T, 0)
-        energy = sum(energy)
-
-        if self.normalizeEnergy:
-            energy = energy / mean(energy)
-
-        w = int(float(self.modulLen) * self.samplerate() / self.blocksize())
-        modEnergyValue = computeModulation(energy, w, True)
-
-        conf = array(modEnergyValue-self.threshold)/self.threshold
-        conf[conf>1] = 1
-
-        modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
-        modEnergy.value = conf
-        convert = {False:'NonSpeech',True:'Speech'}
-
-        segList = segmentFromValues(modEnergyValue>self.threshold)
-        segmentsEntropy =[]
+	'''
+		
+	'''	
+	print "Results"
+	# Creation of the pass-band filter	
+	Wo = self.frequency_center/self.samplerate()  ;
+	Wn = [ Wo-(self.frequency_width/2)/self.samplerate() , Wo+(self.frequency_width/2)/self.samplerate()];
+	num = firwin(self.orderFilter, Wn,pass_zero=False);
+		
+		
+	# Energy on the frequency range 
+	self.energy4hz=numpy.array(self.energy4hz)		
+	energy = lfilter(num,1,self.energy4hz.T,0)
+	energy = sum(energy)
+		
+	# Normalization
+	if self.normalizeEnergy :
+		energy =energy/mean(energy)
+			
+	# Energy Modulation
+	frameLenModulation = int(self.modulLen*self.samplerate()/self.blocksize())
+	modEnergyValue =computeModulation(energy,frameLenModulation,True)
+		
+	# Confidence Index	
+	conf = array(modEnergyValue-self.threshold)/self.threshold
+	conf[conf>1] = 1
+
+	modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
+	modEnergy.value = conf
+	convert = {False:'NonSpeech',True:'Speech'}
+		
+	segList = segmentFromValues(modEnergyValue>self.threshold)
+	segmentsEntropy =[]
         for s in segList : 
             segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
                                     numpy.float(s[1])*self.blocksize()/self.samplerate(),
diff --git a/timeside/analyzer/utils.py b/timeside/analyzer/utils.py
index 0192ca7..27a1c35 100644
--- a/timeside/analyzer/utils.py
+++ b/timeside/analyzer/utils.py
@@ -74,11 +74,11 @@ def segmentFromValues(values,offset=0):
     for i,v in enumerate(values) :
 
         if not (v == seg[2]) :
-            seg[1] = i+offset
+            seg[1] = i+offset-1
             segList.append(tuple(seg))
             seg = [i+offset,-1,v]
 
-    seg[1] = i+offset+1
+    seg[1] = i+offset
     segList.append(tuple(seg))
 
     return segList