From: Thomas Fillon <thomas@parisson.com>
Date: Thu, 11 Jul 2013 08:28:13 +0000 (+0200)
Subject: Fix segment issues in IRIT Analyzers
X-Git-Tag: 0.5.0~31^2~1
X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=c00843cb8744d57fd0b6195aa9a5d4c41b1a471c;p=timeside.git

Fix segment issues in IRIT Analyzers

- Fix segmentFromValues to handleproperly handle the last segment
- Fix irit_entropy_segments and irit_4hzenergy_segments results format to have segment limits in seconds
---

diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py
index 3298be3..9605e33 100644
--- a/timeside/analyzer/irit_speech_4hz.py
+++ b/timeside/analyzer/irit_speech_4hz.py
@@ -65,48 +65,50 @@ class IRITSpeech4Hz(Processor):
         return "Speech confidences indexes"
 
     def process(self, frames, eod=False):
-		'''
-		
-		'''
-		
-		frames = frames.T[0]
-		w = frames * hamming(len(frames));
-		f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT])
-		e = dot(f**2,self.melFilter)
-		self.energy4hz.append(e)
-		return frames, eod
+        '''
+
+        '''
         
+        frames = frames.T[0]
+        w = frames * hamming(len(frames))
+        f = abs(rfft(w, n=2*self.nFFT)[0:self.nFFT])
+        e = dot(f**2, self.melFilter)
+        self.energy4hz.append(e)
+        return frames, eod
+
     def results(self):
-		'''
-		
-		'''		
-		#wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16))
-		
-		Wo = self.fCenter/self.samplerate()  ;
-		Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()];
-		num = firwin(self.orderFilter, Wn,pass_zero=False);
-		self.energy4hz=numpy.array(self.energy4hz)
-		energy = lfilter(num,1,self.energy4hz.T,0)
-		energy = sum(energy)
-		
-		if self.normalizeEnergy :
-			energy =energy/mean(energy)
-			
-		
-		w= int(float(self.modulLen)*self.samplerate()/self.blocksize())
-		modEnergyValue =computeModulation(energy,w,True)
-				
-		conf = array(modEnergyValue-self.threshold)/self.threshold
-		conf[conf>1] = 1
-
-		modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
-		modEnergy.value = conf
-		convert = {False:'NonSpeech',True:'Speech'}
-		
-		segList = segmentFromValues(modEnergyValue>self.threshold)
-		segmentsEntropy =[]
-		for s in segList : 
-			segmentsEntropy.append((s[0],s[1],convert[s[2]]))
-		segs = AnalyzerResult(id = "irit_4hzenergy_segments", name = "seg 4Hz (IRIT)", unit = "s")
-		segs.value = segmentsEntropy
-		return AnalyzerResultContainer([modEnergy,segs])
+        '''
+
+        '''
+        #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16))
+
+        Wo = self.fCenter/self.samplerate()
+        Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()]
+        num = firwin(self.orderFilter, Wn, pass_zero=False)
+        self.energy4hz=numpy.array(self.energy4hz)
+        energy = lfilter(num, 1, self.energy4hz.T, 0)
+        energy = sum(energy)
+
+        if self.normalizeEnergy:
+            energy = energy / mean(energy)
+
+        w = int(float(self.modulLen) * self.samplerate() / self.blocksize())
+        modEnergyValue = computeModulation(energy, w, True)
+
+        conf = array(modEnergyValue-self.threshold)/self.threshold
+        conf[conf>1] = 1
+
+        modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
+        modEnergy.value = conf
+        convert = {False:'NonSpeech',True:'Speech'}
+
+        segList = segmentFromValues(modEnergyValue>self.threshold)
+        segmentsEntropy =[]
+        for s in segList : 
+            segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
+                                    numpy.float(s[1])*self.blocksize()/self.samplerate(),
+                                    convert[s[2]])) 
+       
+        segs = AnalyzerResult(id="irit_4hzenergy_segments", name="seg 4Hz (IRIT)", unit="s")
+        segs.value = segmentsEntropy
+        return AnalyzerResultContainer([modEnergy,segs])
diff --git a/timeside/analyzer/irit_speech_entropy.py b/timeside/analyzer/irit_speech_entropy.py
index 7d72921..c84f76d 100644
--- a/timeside/analyzer/irit_speech_entropy.py
+++ b/timeside/analyzer/irit_speech_entropy.py
@@ -56,9 +56,9 @@ class IRITSpeechEntropy(Processor):
         return "Speech confidences indexes"
 
     def process(self, frames, eod=False):
-		self.entropyValue.append(entropy(frames))
-		return frames, eod        
-      
+        self.entropyValue.append(entropy(frames))
+        return frames, eod
+
     def results(self):
 
         entropyValue = numpy.array(self.entropyValue)
@@ -66,21 +66,23 @@ class IRITSpeechEntropy(Processor):
         modulentropy = computeModulation(entropyValue,w,False)
         confEntropy=  array(modulentropy-self.threshold)/self.threshold
         confEntropy[confEntropy>1] = 1
-        
+
         conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?")
         conf.value = confEntropy
-        
+
         binaryEntropy = modulentropy > self.threshold
         binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2))
-        
+
         convert = {False:'NonSpeech',True:'Speech'}
         segList = segmentFromValues(binaryEntropy)
+
         segmentsEntropy =[]
-        for s in segList : 
-            segmentsEntropy.append((s[0],s[1],convert[s[2]])) 
-        
-        segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s")
+        for s in segList :
+            segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
+                                    numpy.float(s[1])*self.blocksize()/self.samplerate(),
+                                    convert[s[2]]))
+
+        segs = AnalyzerResult(id="irit_entropy_segments", name="seg entropy (IRIT)", unit="s")
         segs.value = segmentsEntropy
 
-      
         return AnalyzerResultContainer([conf, segs])
diff --git a/timeside/analyzer/utils.py b/timeside/analyzer/utils.py
index 43b52e5..0192ca7 100644
--- a/timeside/analyzer/utils.py
+++ b/timeside/analyzer/utils.py
@@ -37,38 +37,38 @@ def downsample_blocking(frames, hop_s, dtype='float32'):
 def computeModulation(serie,wLen,withLog=True):
         '''
         Compute the modulation of a parameter centered. Extremums are set to zero.
-        
+
         Args :
             - serie       : list or numpy array containing the serie.
             - wLen        : Length of the analyzis window in samples.
-            - withLog     : Whether compute the var() or log(var()) .    
-        
+            - withLog     : Whether compute the var() or log(var()) .
+
         Returns :
             - modul       : Modulation of the serie.
-        
+
         '''
-        
+
         modul = numpy.zeros((1,len(serie)))[0];
         w = int(wLen/2)
-        
+
         for i in range(w,len(serie)-w):
-            
+
             d = serie[i-w:i+w]
             if withLog:
                 d = numpy.log(d)
             modul[i] = numpy.var(d)
-        
+
         modul[:w] = modul[w]
-        
+
         modul[-w:] = modul[-w-1]
-    
+
         return modul;
 
 def segmentFromValues(values,offset=0):
     '''
-    
+
     '''
-    
+
     seg = [offset,-1,values[0]]
     segList = []
     for i,v in enumerate(values) :
@@ -77,111 +77,111 @@ def segmentFromValues(values,offset=0):
             seg[1] = i+offset
             segList.append(tuple(seg))
             seg = [i+offset,-1,v]
-            
-    seg[1] = i+offset
+
+    seg[1] = i+offset+1
     segList.append(tuple(seg))
 
     return segList
 
 
-# Attention 
+# Attention
 # ---------
 #
 # Double emploi avec le calcul mfcc d'aubio. Voir pour la fusion...
-# 						Maxime
+#                         Maxime
 
 def melFilterBank(nbFilters,fftLen,sr) :
     '''
     Grenerate a Mel Filter-Bank
-        
+
     Args :
         - nbFilters  : Number of filters.
         - fftLen     : Length of the frequency range.
-        - sr         : Sampling rate of the signal to filter. 
+        - sr         : Sampling rate of the signal to filter.
     Returns :
         - filterbank : fftLen x nbFilters matrix containing one filter by column.
-                        The filter bank can be applied by matrix multiplication 
-                        (Use numpy *dot* function).      
+                        The filter bank can be applied by matrix multiplication
+                        (Use numpy *dot* function).
     '''
-        
-    fh = float(sr)/2.0    
+
+    fh = float(sr)/2.0
     mh = 2595*numpy.log10(1+fh/700)
-        
+
     step = mh/nbFilters;
-        
+
     mcenter = numpy.arange(step,mh,step)
-        
+
     fcenter = 700*(10**(mcenter/2595)-1)
-            
-    filterbank = numpy.zeros((fftLen,nbFilters));    
-        
+
+    filterbank = numpy.zeros((fftLen,nbFilters));
+
     for i,_ in enumerate(fcenter) :
-            
+
         if i == 0 :
             fmin = 0.0
         else :
             fmin = fcenter[i-1]
-                
+
         if i == len(fcenter)-1 :
             fmax = fh
         else :
-            fmax = fcenter[i+1]    
-            
+            fmax = fcenter[i+1]
+
         imin = numpy.ceil(fmin/fh*fftLen)
         imax = numpy.ceil(fmax/fh*fftLen)
-            
+
         filterbank[imin:imax,i] = triangle(imax-imin)
-    
+
     return filterbank
 
 
 def triangle(length):
     '''
     Generate a triangle filter.
-        
+
     Args :
          - length  : length of the filter.
     returns :
-        - triangle : triangle filter.    
-            
+        - triangle : triangle filter.
+
     '''
     triangle = numpy.zeros((1,length))[0]
     climax= numpy.ceil(length/2)
-        
+
     triangle[0:climax] = numpy.linspace(0,1,climax)
     triangle[climax:length] = numpy.linspace(1,0,length-climax)
     return triangle
-    
+
 
 def entropy(serie,nbins=10,base=numpy.exp(1),approach='unbiased'):
         '''
         Compute entropy of a serie using the histogram method.
-        
+
         Args :
             - serie     : Serie on witch compute the entropy
             - nbins     : Number of bins of the histogram
             - base      : Base used for normalisation
             - approach  : String in the following set : {unbiased,mmse}
                           for un-biasing value.
-    
+
         Returns :
             - estimate  : Entropy value
             - nbias     : N-bias of the estimate
             - sigma     : Estimated standard error
-            
-        Raises : 
-            A warning in case of unknown 'approach' value. 
-            No un-biasing is then performed 
-        
+
+        Raises :
+            A warning in case of unknown 'approach' value.
+            No un-biasing is then performed
+
         '''
-               
+
         estimate = 0
         sigma = 0
         bins,edges = numpy.histogram(serie,nbins);
         ncell = len(bins)
         norm = (numpy.max(edges)-numpy.min(edges))/len(bins)
-    
-    
+
+
         for b in bins :
             if b == 0 :
                 logf = 0
@@ -189,17 +189,17 @@ def entropy(serie,nbins=10,base=numpy.exp(1),approach='unbiased'):
                 logf = numpy.log(b)
             estimate = estimate - b*logf
             sigma = sigma + b * logf**2
-            
+
         count = numpy.sum(bins)
         estimate=estimate/count;
         sigma=numpy.sqrt( (sigma/count-estimate**2)/float(count-1) );
         estimate=estimate+numpy.log(count)+numpy.log(norm);
         nbias=-(ncell-1)/(2*count);
-    
+
         if approach =='unbiased' :
             estimate=estimate-nbias;
             nbias=0;
-            
+
         elif approach =='mmse' :
             estimate=estimate-nbias;
             nbias=0;
@@ -209,7 +209,7 @@ def entropy(serie,nbins=10,base=numpy.exp(1),approach='unbiased'):
             sigma   =lambda_value*sigma;
         else :
             return 0
-                
+
         estimate=estimate/numpy.log(base);
         nbias   =nbias   /numpy.log(base);
         sigma   =sigma   /numpy.log(base);