Fix segment issues in IRIT Analyzers

author Thomas Fillon <thomas@parisson.com>

Thu, 11 Jul 2013 08:28:13 +0000 (10:28 +0200)

committer Thomas Fillon <thomas@parisson.com>

Thu, 11 Jul 2013 08:28:13 +0000 (10:28 +0200)
author Thomas Fillon <thomas@parisson.com>
Thu, 11 Jul 2013 08:28:13 +0000 (10:28 +0200)
committer Thomas Fillon <thomas@parisson.com>
Thu, 11 Jul 2013 08:28:13 +0000 (10:28 +0200)
diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py

index 3298be39747978c9e91680ffb24b37827748a61d..9605e33825485950cc25b79c9b84cc5e50fe10d0 100644 (file)
--- a/timeside/analyzer/irit_speech_4hz.py
+++ b/timeside/analyzer/irit_speech_4hz.py
@@ -65,48 +65,50 @@ class IRITSpeech4Hz(Processor):
          return "Speech confidences indexes"
  
      def process(self, frames, eod=False):
-               '''
-               
-               '''
-               
-               frames = frames.T[0]
-               w = frames * hamming(len(frames));
-               f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT])
-               e = dot(f**2,self.melFilter)
-               self.energy4hz.append(e)
-               return frames, eod
+        '''
+
+        '''
          
+        frames = frames.T[0]
+        w = frames * hamming(len(frames))
+        f = abs(rfft(w, n=2*self.nFFT)[0:self.nFFT])
+        e = dot(f**2, self.melFilter)
+        self.energy4hz.append(e)
+        return frames, eod
+
      def results(self):
-               '''
-               
-               '''             
-               #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16))
-               
-               Wo = self.fCenter/self.samplerate()  ;
-               Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()];
-               num = firwin(self.orderFilter, Wn,pass_zero=False);
-               self.energy4hz=numpy.array(self.energy4hz)
-               energy = lfilter(num,1,self.energy4hz.T,0)
-               energy = sum(energy)
-               
-               if self.normalizeEnergy :
-                       energy =energy/mean(energy)
-                       
-               
-               w= int(float(self.modulLen)*self.samplerate()/self.blocksize())
-               modEnergyValue =computeModulation(energy,w,True)
-                               
-               conf = array(modEnergyValue-self.threshold)/self.threshold
-               conf[conf>1] = 1
-
-               modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
-               modEnergy.value = conf
-               convert = {False:'NonSpeech',True:'Speech'}
-               
-               segList = segmentFromValues(modEnergyValue>self.threshold)
-               segmentsEntropy =[]
-               for s in segList : 
-                       segmentsEntropy.append((s[0],s[1],convert[s[2]]))
-               segs = AnalyzerResult(id = "irit_4hzenergy_segments", name = "seg 4Hz (IRIT)", unit = "s")
-               segs.value = segmentsEntropy
-               return AnalyzerResultContainer([modEnergy,segs])
+        '''
+
+        '''
+        #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16))
+
+        Wo = self.fCenter/self.samplerate()
+        Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()]
+        num = firwin(self.orderFilter, Wn, pass_zero=False)
+        self.energy4hz=numpy.array(self.energy4hz)
+        energy = lfilter(num, 1, self.energy4hz.T, 0)
+        energy = sum(energy)
+
+        if self.normalizeEnergy:
+            energy = energy / mean(energy)
+
+        w = int(float(self.modulLen) * self.samplerate() / self.blocksize())
+        modEnergyValue = computeModulation(energy, w, True)
+
+        conf = array(modEnergyValue-self.threshold)/self.threshold
+        conf[conf>1] = 1
+
+        modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
+        modEnergy.value = conf
+        convert = {False:'NonSpeech',True:'Speech'}
+
+        segList = segmentFromValues(modEnergyValue>self.threshold)
+        segmentsEntropy =[]
+        for s in segList : 
+            segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
+                                    numpy.float(s[1])*self.blocksize()/self.samplerate(),
+                                    convert[s[2]])) 
+       
+        segs = AnalyzerResult(id="irit_4hzenergy_segments", name="seg 4Hz (IRIT)", unit="s")
+        segs.value = segmentsEntropy
+        return AnalyzerResultContainer([modEnergy,segs])
diff --git a/timeside/analyzer/irit_speech_entropy.py b/timeside/analyzer/irit_speech_entropy.py

index 7d72921017aa21c9f35cb6853acb5798322c34b6..c84f76d43211a58fc079780c215b3a2e333d5841 100644 (file)
--- a/timeside/analyzer/irit_speech_entropy.py
+++ b/timeside/analyzer/irit_speech_entropy.py
@@ -56,9 +56,9 @@ class IRITSpeechEntropy(Processor):
          return "Speech confidences indexes"
  
      def process(self, frames, eod=False):
-               self.entropyValue.append(entropy(frames))
-               return frames, eod        
-      
+        self.entropyValue.append(entropy(frames))
+        return frames, eod
+
      def results(self):
  
          entropyValue = numpy.array(self.entropyValue)
@@ -66,21 +66,23 @@ class IRITSpeechEntropy(Processor):
          modulentropy = computeModulation(entropyValue,w,False)
          confEntropy=  array(modulentropy-self.threshold)/self.threshold
          confEntropy[confEntropy>1] = 1
-        
+
          conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?")
          conf.value = confEntropy
-        
+
          binaryEntropy = modulentropy > self.threshold
          binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2))
-        
+
          convert = {False:'NonSpeech',True:'Speech'}
          segList = segmentFromValues(binaryEntropy)
+
          segmentsEntropy =[]
-        for s in segList : 
-            segmentsEntropy.append((s[0],s[1],convert[s[2]])) 
-        
-        segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s")
+        for s in segList :
+            segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
+                                    numpy.float(s[1])*self.blocksize()/self.samplerate(),
+                                    convert[s[2]]))
+
+        segs = AnalyzerResult(id="irit_entropy_segments", name="seg entropy (IRIT)", unit="s")
          segs.value = segmentsEntropy
  
-      
          return AnalyzerResultContainer([conf, segs])
diff --git a/timeside/analyzer/utils.py b/timeside/analyzer/utils.py

index 43b52e5d461a59b4132653a404f7dd23843ec332..0192ca7c10d4e0d0e8da5c9bc82342bbcf7c9041 100644 (file)
--- a/timeside/analyzer/utils.py
+++ b/timeside/analyzer/utils.py
@@ -37,38 +37,38 @@ def downsample_blocking(frames, hop_s, dtype='float32'):
  def computeModulation(serie,wLen,withLog=True):
          '''
          Compute the modulation of a parameter centered. Extremums are set to zero.
-        
+
          Args :
              - serie       : list or numpy array containing the serie.
              - wLen        : Length of the analyzis window in samples.
-            - withLog     : Whether compute the var() or log(var()) .    
-        
+            - withLog     : Whether compute the var() or log(var()) .
+
          Returns :
              - modul       : Modulation of the serie.
-        
+
          '''
-        
+
          modul = numpy.zeros((1,len(serie)))[0];
          w = int(wLen/2)
-        
+
          for i in range(w,len(serie)-w):
-            
+
              d = serie[i-w:i+w]
              if withLog:
                  d = numpy.log(d)
              modul[i] = numpy.var(d)
-        
+
          modul[:w] = modul[w]
-        
+
          modul[-w:] = modul[-w-1]
-    
+
          return modul;
  
  def segmentFromValues(values,offset=0):
      '''
-    
+
      '''
-    
+
      seg = [offset,-1,values[0]]
      segList = []
      for i,v in enumerate(values) :
@@ -77,111 +77,111 @@ def segmentFromValues(values,offset=0):
              seg[1] = i+offset
              segList.append(tuple(seg))
              seg = [i+offset,-1,v]
-            
-    seg[1] = i+offset
+
+    seg[1] = i+offset+1
      segList.append(tuple(seg))
  
      return segList
  
  
-# Attention 
+# Attention
  # ---------
  #
  # Double emploi avec le calcul mfcc d'aubio. Voir pour la fusion...
-#                                              Maxime
+#                         Maxime
  
  def melFilterBank(nbFilters,fftLen,sr) :
      '''
      Grenerate a Mel Filter-Bank
-        
+
      Args :
          - nbFilters  : Number of filters.
          - fftLen     : Length of the frequency range.
-        - sr         : Sampling rate of the signal to filter. 
+        - sr         : Sampling rate of the signal to filter.
      Returns :
          - filterbank : fftLen x nbFilters matrix containing one filter by column.
-                        The filter bank can be applied by matrix multiplication 
-                        (Use numpy *dot* function).      
+                        The filter bank can be applied by matrix multiplication
+                        (Use numpy *dot* function).
      '''
-        
-    fh = float(sr)/2.0    
+
+    fh = float(sr)/2.0
      mh = 2595*numpy.log10(1+fh/700)
-        
+
      step = mh/nbFilters;
-        
+
      mcenter = numpy.arange(step,mh,step)
-        
+
      fcenter = 700*(10**(mcenter/2595)-1)
-            
-    filterbank = numpy.zeros((fftLen,nbFilters));    
-        
+
+    filterbank = numpy.zeros((fftLen,nbFilters));
+
      for i,_ in enumerate(fcenter) :
-            
+
          if i == 0 :
              fmin = 0.0
          else :
              fmin = fcenter[i-1]
-                
+
          if i == len(fcenter)-1 :
              fmax = fh
          else :
-            fmax = fcenter[i+1]    
-            
+            fmax = fcenter[i+1]
+
          imin = numpy.ceil(fmin/fh*fftLen)
          imax = numpy.ceil(fmax/fh*fftLen)
-            
+
          filterbank[imin:imax,i] = triangle(imax-imin)
-    
+
      return filterbank
  
  
  def triangle(length):
      '''
      Generate a triangle filter.
-        
+
      Args :
           - length  : length of the filter.
      returns :
-        - triangle : triangle filter.    
-            
+        - triangle : triangle filter.
+
      '''
      triangle = numpy.zeros((1,length))[0]
      climax= numpy.ceil(length/2)
-        
+
      triangle[0:climax] = numpy.linspace(0,1,climax)
      triangle[climax:length] = numpy.linspace(1,0,length-climax)
      return triangle
-    
+
  
  def entropy(serie,nbins=10,base=numpy.exp(1),approach='unbiased'):
          '''
          Compute entropy of a serie using the histogram method.
-        
+
          Args :
              - serie     : Serie on witch compute the entropy
              - nbins     : Number of bins of the histogram
              - base      : Base used for normalisation
              - approach  : String in the following set : {unbiased,mmse}
                            for un-biasing value.
-    
+
          Returns :
              - estimate  : Entropy value
              - nbias     : N-bias of the estimate
              - sigma     : Estimated standard error
-            
-        Raises : 
-            A warning in case of unknown 'approach' value. 
-            No un-biasing is then performed 
-        
+
+        Raises :
+            A warning in case of unknown 'approach' value.
+            No un-biasing is then performed
+
          '''
-               
+
          estimate = 0
          sigma = 0
          bins,edges = numpy.histogram(serie,nbins);
          ncell = len(bins)
          norm = (numpy.max(edges)-numpy.min(edges))/len(bins)
-    
-    
+
+
          for b in bins :
              if b == 0 :
                  logf = 0
@@ -189,17 +189,17 @@ def entropy(serie,nbins=10,base=numpy.exp(1),approach='unbiased'):
                  logf = numpy.log(b)
              estimate = estimate - b*logf
              sigma = sigma + b * logf**2
-            
+
          count = numpy.sum(bins)
          estimate=estimate/count;
          sigma=numpy.sqrt( (sigma/count-estimate**2)/float(count-1) );
          estimate=estimate+numpy.log(count)+numpy.log(norm);
          nbias=-(ncell-1)/(2*count);
-    
+
          if approach =='unbiased' :
              estimate=estimate-nbias;
              nbias=0;
-            
+
          elif approach =='mmse' :
              estimate=estimate-nbias;
              nbias=0;
@@ -209,7 +209,7 @@ def entropy(serie,nbins=10,base=numpy.exp(1),approach='unbiased'):
              sigma   =lambda_value*sigma;
          else :
              return 0
-                
+
          estimate=estimate/numpy.log(base);
          nbias   =nbias   /numpy.log(base);
          sigma   =sigma   /numpy.log(base);
author	Thomas Fillon <thomas@parisson.com>
	Thu, 11 Jul 2013 08:28:13 +0000 (10:28 +0200)
committer	Thomas Fillon <thomas@parisson.com>
	Thu, 11 Jul 2013 08:28:13 +0000 (10:28 +0200)
timeside/analyzer/irit_speech_4hz.py		patch \| blob \| history
timeside/analyzer/irit_speech_entropy.py		patch \| blob \| history
timeside/analyzer/utils.py		patch \| blob \| history