]> git.parisson.com Git - timeside-diadems.git/commitdiff
feature(irit_speech_4hz): apply a median filter at the output of irit_4hz and make...
authorThomas Fillon <thomas@parisson.com>
Mon, 30 Jun 2014 13:38:45 +0000 (15:38 +0200)
committerThomas Fillon <thomas@parisson.com>
Mon, 30 Jun 2014 13:38:45 +0000 (15:38 +0200)
timeside/analyzer/irit_speech_4hz.py
timeside/analyzer/utils.py

index b6df6cf55b83af5876aa621048503aa07403b063..18ad3915d120c124e9ec2b402ce9ad248c5ca5ed 100644 (file)
@@ -24,7 +24,7 @@ from timeside.analyzer.core import Analyzer
 from timeside.analyzer.utils import melFilterBank, computeModulation
 from timeside.analyzer.utils import segmentFromValues
 from timeside.api import IAnalyzer
-from numpy import array, hamming, dot, mean, float
+from numpy import array, hamming, dot, mean, float, mod
 from numpy.fft import rfft
 from scipy.signal import firwin, lfilter
 
@@ -49,11 +49,10 @@ class IRITSpeech4Hz(Analyzer):
     implements(IAnalyzer)
 
     @interfacedoc
-    def setup(self, channels=None, samplerate=None, blocksize=None,
-              totalframes=None):
-        super(IRITSpeech4Hz, self).setup(
-            channels, samplerate, blocksize, totalframes)
+    def __init__(self, medfilt_duration=5):
+        super(IRITSpeech4Hz, self).__init__()
         self.energy4hz = []
+
         # Classification
         self.threshold = 2.0
 
@@ -63,9 +62,18 @@ class IRITSpeech4Hz(Analyzer):
         self.orderFilter = 100
 
         self.normalizeEnergy = True
+        self.modulLen = 2.0
+
+        # Median filter duration in second
+        self.medfilt_duration = medfilt_duration
+
+    @interfacedoc
+    def setup(self, channels=None, samplerate=None, blocksize=None,
+              totalframes=None):
+        super(IRITSpeech4Hz, self).setup(
+            channels, samplerate, blocksize, totalframes)
         self.nFFT = 2048
         self.nbFilters = 30
-        self.modulLen = 2.0
         self.melFilter = melFilterBank(self.nbFilters, self.nFFT, samplerate)
 
     @staticmethod
@@ -143,10 +151,15 @@ class IRITSpeech4Hz(Analyzer):
         convert = {False: 0, True: 1}
         label = {0: 'nonSpeech', 1: 'Speech'}
 
-        segList = segmentFromValues(modEnergyValue > self.threshold)
+        decision = modEnergyValue > self.threshold
+
+        segList = segmentFromValues(decision)
         # Hint : Median filtering could imrove smoothness of the result
-        # from scipy.signal import medfilt
-        # segList = segmentFromValues(medfilt(modEnergyValue > self.threshold, 31))
+        from scipy.signal import medfilt
+        output_samplerate = float(self.samplerate()) / self.input_stepsize
+        N = self.medfilt_duration * output_samplerate
+        N += 1 - mod(N, 2)  # Make N odd
+        segList_filt = segmentFromValues(medfilt(decision, N))
 
         segs = self.new_result(data_mode='label', time_mode='segment')
         segs.id_metadata.id += '.' + 'segments'
@@ -164,4 +177,23 @@ class IRITSpeech4Hz(Analyzer):
 
         self.process_pipe.results.add(segs)
 
+        # Median filter on decision
+        segs = self.new_result(data_mode='label', time_mode='segment')
+        segs.id_metadata.id += '.' + 'segments_median'
+        segs.id_metadata.name += ' ' + 'Segments after Median filtering'
+
+        segs.data_object.label_metadata.label = label
+
+        segs.data_object.label = [convert[s[2]] for s in segList_filt]
+        segs.data_object.time = [(float(s[0]) * self.blocksize() /
+                                 self.samplerate())
+                                 for s in segList_filt]
+        segs.data_object.duration = [(float(s[1] - s[0] + 1) * self.blocksize() /
+                                     self.samplerate())
+                                     for s in segList_filt]
+
+        self.process_pipe.results.add(segs)
+
+
+
         return
index b8ad23c086f93ddce1d50e00645cabf657a77f1d..24264fc3455f4954c968d6745924189283adcd06 100644 (file)
@@ -113,7 +113,6 @@ def melFilterBank(nbFilters, fftLen, sr):
                         The filter bank can be applied by matrix multiplication
                         (Use numpy *dot* function).
     '''
-
     fh = float(sr) / 2.0
     mh = 2595 * np.log10(1 + fh / 700)