feature(irit_speech_4hz): apply a median filter at the output of irit_4hz and make...

author Thomas Fillon <thomas@parisson.com>

Mon, 30 Jun 2014 13:38:45 +0000 (15:38 +0200)

committer Thomas Fillon <thomas@parisson.com>

Mon, 30 Jun 2014 13:38:45 +0000 (15:38 +0200)
author Thomas Fillon <thomas@parisson.com>
Mon, 30 Jun 2014 13:38:45 +0000 (15:38 +0200)
committer Thomas Fillon <thomas@parisson.com>
Mon, 30 Jun 2014 13:38:45 +0000 (15:38 +0200)
diff --git a/tests/test_graphers_render_analyzers.py b/tests/test_graphers_render_analyzers.py

index 1819f64e07977ef2fae40a087a9ad8102a3ba055..a6d2d4a9f228dc2c7076fc75fc574ca9b12f9f05 100644 (file)
--- a/tests/test_graphers_render_analyzers.py
+++ b/tests/test_graphers_render_analyzers.py
@@ -8,7 +8,7 @@ import numpy as np
  from tempfile import NamedTemporaryFile
  import os
  
-PLOT = False
+PLOT = True
  
  
  class Test_graphers_analyzers(unittest.TestCase):
diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py

index b6df6cf55b83af5876aa621048503aa07403b063..18ad3915d120c124e9ec2b402ce9ad248c5ca5ed 100644 (file)
--- a/timeside/analyzer/irit_speech_4hz.py
+++ b/timeside/analyzer/irit_speech_4hz.py
@@ -24,7 +24,7 @@ from timeside.analyzer.core import Analyzer
  from timeside.analyzer.utils import melFilterBank, computeModulation
  from timeside.analyzer.utils import segmentFromValues
  from timeside.api import IAnalyzer
-from numpy import array, hamming, dot, mean, float
+from numpy import array, hamming, dot, mean, float, mod
  from numpy.fft import rfft
  from scipy.signal import firwin, lfilter
  
@@ -49,11 +49,10 @@ class IRITSpeech4Hz(Analyzer):
      implements(IAnalyzer)
  
      @interfacedoc
-    def setup(self, channels=None, samplerate=None, blocksize=None,
-              totalframes=None):
-        super(IRITSpeech4Hz, self).setup(
-            channels, samplerate, blocksize, totalframes)
+    def __init__(self, medfilt_duration=5):
+        super(IRITSpeech4Hz, self).__init__()
          self.energy4hz = []
+
          # Classification
          self.threshold = 2.0
  
@@ -63,9 +62,18 @@ class IRITSpeech4Hz(Analyzer):
          self.orderFilter = 100
  
          self.normalizeEnergy = True
+        self.modulLen = 2.0
+
+        # Median filter duration in second
+        self.medfilt_duration = medfilt_duration
+
+    @interfacedoc
+    def setup(self, channels=None, samplerate=None, blocksize=None,
+              totalframes=None):
+        super(IRITSpeech4Hz, self).setup(
+            channels, samplerate, blocksize, totalframes)
          self.nFFT = 2048
          self.nbFilters = 30
-        self.modulLen = 2.0
          self.melFilter = melFilterBank(self.nbFilters, self.nFFT, samplerate)
  
      @staticmethod
@@ -143,10 +151,15 @@ class IRITSpeech4Hz(Analyzer):
          convert = {False: 0, True: 1}
          label = {0: 'nonSpeech', 1: 'Speech'}
  
-        segList = segmentFromValues(modEnergyValue > self.threshold)
+        decision = modEnergyValue > self.threshold
+
+        segList = segmentFromValues(decision)
          # Hint : Median filtering could imrove smoothness of the result
-        # from scipy.signal import medfilt
-        # segList = segmentFromValues(medfilt(modEnergyValue > self.threshold, 31))
+        from scipy.signal import medfilt
+        output_samplerate = float(self.samplerate()) / self.input_stepsize
+        N = self.medfilt_duration * output_samplerate
+        N += 1 - mod(N, 2)  # Make N odd
+        segList_filt = segmentFromValues(medfilt(decision, N))
  
          segs = self.new_result(data_mode='label', time_mode='segment')
          segs.id_metadata.id += '.' + 'segments'
@@ -164,4 +177,23 @@ class IRITSpeech4Hz(Analyzer):
  
          self.process_pipe.results.add(segs)
  
+        # Median filter on decision
+        segs = self.new_result(data_mode='label', time_mode='segment')
+        segs.id_metadata.id += '.' + 'segments_median'
+        segs.id_metadata.name += ' ' + 'Segments after Median filtering'
+
+        segs.data_object.label_metadata.label = label
+
+        segs.data_object.label = [convert[s[2]] for s in segList_filt]
+        segs.data_object.time = [(float(s[0]) * self.blocksize() /
+                                 self.samplerate())
+                                 for s in segList_filt]
+        segs.data_object.duration = [(float(s[1] - s[0] + 1) * self.blocksize() /
+                                     self.samplerate())
+                                     for s in segList_filt]
+
+        self.process_pipe.results.add(segs)
+
+
+
          return
diff --git a/timeside/analyzer/utils.py b/timeside/analyzer/utils.py

index b8ad23c086f93ddce1d50e00645cabf657a77f1d..24264fc3455f4954c968d6745924189283adcd06 100644 (file)
--- a/timeside/analyzer/utils.py
+++ b/timeside/analyzer/utils.py
@@ -113,7 +113,6 @@ def melFilterBank(nbFilters, fftLen, sr):
                          The filter bank can be applied by matrix multiplication
                          (Use numpy *dot* function).
      '''
-
      fh = float(sr) / 2.0
      mh = 2595 * np.log10(1 + fh / 700)
  
diff --git a/timeside/grapher/render_analyzers.py b/timeside/grapher/render_analyzers.py

index f53a0d96523c7727076ae7c683834999336f94e2..dd7ea730e273115de003eca40c2cd42fe72ebe7a 100644 (file)
--- a/timeside/grapher/render_analyzers.py
+++ b/timeside/grapher/render_analyzers.py
@@ -127,7 +127,7 @@ class DisplayAnalyzer(Grapher):
  #-------------------------------------------------
  
  # Aubio Pitch
-try: # because of the dependencies on the Aubio librairy
+try:  # because of the dependencies on the Aubio librairy
      aubiopitch = get_processor('aubio_pitch')()
      DisplayAubioPitch = DisplayAnalyzer.create(
          analyzer=aubiopitch,
@@ -162,8 +162,18 @@ Display4hzSpeechSegmentation = DisplayAnalyzer.create(
      grapher_name='Irit 4Hz Speech Segmentation',
      background='waveform')
  
+
+# IRIT 4Hz with median filter
+irit4hz = get_processor('irit_speech_4hz')()
+Display4hzSpeechSegmentation = DisplayAnalyzer.create(
+    analyzer=irit4hz,
+    result_id='irit_speech_4hz.segments_median',
+    grapher_id='grapher_irit_speech_4hz_segments_median',
+    grapher_name='Irit 4Hz Speech Segmentation with median filter',
+    background='waveform')
+
  # IRIT Monopoly
-try: # because of the dependencies on Aubio Pitch
+try:  # because of the dependencies on Aubio Pitch
      iritmonopoly = get_processor('irit_monopoly')()
      DisplayMonopoly = DisplayAnalyzer.create(
          analyzer=iritmonopoly,
author	Thomas Fillon <thomas@parisson.com>
	Mon, 30 Jun 2014 13:38:45 +0000 (15:38 +0200)
committer	Thomas Fillon <thomas@parisson.com>
	Mon, 30 Jun 2014 13:38:45 +0000 (15:38 +0200)
tests/test_graphers_render_analyzers.py		patch \| blob \| history
timeside/analyzer/irit_speech_4hz.py		patch \| blob \| history
timeside/analyzer/utils.py		patch \| blob \| history
timeside/grapher/render_analyzers.py		patch \| blob \| history