]> git.parisson.com Git - timeside.git/commitdiff
feature(irit_speech_4hz): apply a median filter at the output of irit_4hz and make...
authorThomas Fillon <thomas@parisson.com>
Mon, 30 Jun 2014 13:38:45 +0000 (15:38 +0200)
committerThomas Fillon <thomas@parisson.com>
Mon, 30 Jun 2014 13:38:45 +0000 (15:38 +0200)
tests/test_graphers_render_analyzers.py
timeside/analyzer/irit_speech_4hz.py
timeside/analyzer/utils.py
timeside/grapher/render_analyzers.py

index 1819f64e07977ef2fae40a087a9ad8102a3ba055..a6d2d4a9f228dc2c7076fc75fc574ca9b12f9f05 100644 (file)
@@ -8,7 +8,7 @@ import numpy as np
 from tempfile import NamedTemporaryFile
 import os
 
-PLOT = False
+PLOT = True
 
 
 class Test_graphers_analyzers(unittest.TestCase):
index b6df6cf55b83af5876aa621048503aa07403b063..18ad3915d120c124e9ec2b402ce9ad248c5ca5ed 100644 (file)
@@ -24,7 +24,7 @@ from timeside.analyzer.core import Analyzer
 from timeside.analyzer.utils import melFilterBank, computeModulation
 from timeside.analyzer.utils import segmentFromValues
 from timeside.api import IAnalyzer
-from numpy import array, hamming, dot, mean, float
+from numpy import array, hamming, dot, mean, float, mod
 from numpy.fft import rfft
 from scipy.signal import firwin, lfilter
 
@@ -49,11 +49,10 @@ class IRITSpeech4Hz(Analyzer):
     implements(IAnalyzer)
 
     @interfacedoc
-    def setup(self, channels=None, samplerate=None, blocksize=None,
-              totalframes=None):
-        super(IRITSpeech4Hz, self).setup(
-            channels, samplerate, blocksize, totalframes)
+    def __init__(self, medfilt_duration=5):
+        super(IRITSpeech4Hz, self).__init__()
         self.energy4hz = []
+
         # Classification
         self.threshold = 2.0
 
@@ -63,9 +62,18 @@ class IRITSpeech4Hz(Analyzer):
         self.orderFilter = 100
 
         self.normalizeEnergy = True
+        self.modulLen = 2.0
+
+        # Median filter duration in second
+        self.medfilt_duration = medfilt_duration
+
+    @interfacedoc
+    def setup(self, channels=None, samplerate=None, blocksize=None,
+              totalframes=None):
+        super(IRITSpeech4Hz, self).setup(
+            channels, samplerate, blocksize, totalframes)
         self.nFFT = 2048
         self.nbFilters = 30
-        self.modulLen = 2.0
         self.melFilter = melFilterBank(self.nbFilters, self.nFFT, samplerate)
 
     @staticmethod
@@ -143,10 +151,15 @@ class IRITSpeech4Hz(Analyzer):
         convert = {False: 0, True: 1}
         label = {0: 'nonSpeech', 1: 'Speech'}
 
-        segList = segmentFromValues(modEnergyValue > self.threshold)
+        decision = modEnergyValue > self.threshold
+
+        segList = segmentFromValues(decision)
         # Hint : Median filtering could imrove smoothness of the result
-        # from scipy.signal import medfilt
-        # segList = segmentFromValues(medfilt(modEnergyValue > self.threshold, 31))
+        from scipy.signal import medfilt
+        output_samplerate = float(self.samplerate()) / self.input_stepsize
+        N = self.medfilt_duration * output_samplerate
+        N += 1 - mod(N, 2)  # Make N odd
+        segList_filt = segmentFromValues(medfilt(decision, N))
 
         segs = self.new_result(data_mode='label', time_mode='segment')
         segs.id_metadata.id += '.' + 'segments'
@@ -164,4 +177,23 @@ class IRITSpeech4Hz(Analyzer):
 
         self.process_pipe.results.add(segs)
 
+        # Median filter on decision
+        segs = self.new_result(data_mode='label', time_mode='segment')
+        segs.id_metadata.id += '.' + 'segments_median'
+        segs.id_metadata.name += ' ' + 'Segments after Median filtering'
+
+        segs.data_object.label_metadata.label = label
+
+        segs.data_object.label = [convert[s[2]] for s in segList_filt]
+        segs.data_object.time = [(float(s[0]) * self.blocksize() /
+                                 self.samplerate())
+                                 for s in segList_filt]
+        segs.data_object.duration = [(float(s[1] - s[0] + 1) * self.blocksize() /
+                                     self.samplerate())
+                                     for s in segList_filt]
+
+        self.process_pipe.results.add(segs)
+
+
+
         return
index b8ad23c086f93ddce1d50e00645cabf657a77f1d..24264fc3455f4954c968d6745924189283adcd06 100644 (file)
@@ -113,7 +113,6 @@ def melFilterBank(nbFilters, fftLen, sr):
                         The filter bank can be applied by matrix multiplication
                         (Use numpy *dot* function).
     '''
-
     fh = float(sr) / 2.0
     mh = 2595 * np.log10(1 + fh / 700)
 
index f53a0d96523c7727076ae7c683834999336f94e2..dd7ea730e273115de003eca40c2cd42fe72ebe7a 100644 (file)
@@ -127,7 +127,7 @@ class DisplayAnalyzer(Grapher):
 #-------------------------------------------------
 
 # Aubio Pitch
-try: # because of the dependencies on the Aubio librairy
+try:  # because of the dependencies on the Aubio librairy
     aubiopitch = get_processor('aubio_pitch')()
     DisplayAubioPitch = DisplayAnalyzer.create(
         analyzer=aubiopitch,
@@ -162,8 +162,18 @@ Display4hzSpeechSegmentation = DisplayAnalyzer.create(
     grapher_name='Irit 4Hz Speech Segmentation',
     background='waveform')
 
+
+# IRIT 4Hz with median filter
+irit4hz = get_processor('irit_speech_4hz')()
+Display4hzSpeechSegmentation = DisplayAnalyzer.create(
+    analyzer=irit4hz,
+    result_id='irit_speech_4hz.segments_median',
+    grapher_id='grapher_irit_speech_4hz_segments_median',
+    grapher_name='Irit 4Hz Speech Segmentation with median filter',
+    background='waveform')
+
 # IRIT Monopoly
-try: # because of the dependencies on Aubio Pitch
+try:  # because of the dependencies on Aubio Pitch
     iritmonopoly = get_processor('irit_monopoly')()
     DisplayMonopoly = DisplayAnalyzer.create(
         analyzer=iritmonopoly,