From: Thomas Fillon Date: Mon, 30 Jun 2014 13:38:45 +0000 (+0200) Subject: feature(irit_speech_4hz): apply a median filter at the output of irit_4hz and make... X-Git-Tag: 0.6~4^2~75 X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=e8d3f64159fef86892474a322baf78d15e01f1ef;p=timeside.git feature(irit_speech_4hz): apply a median filter at the output of irit_4hz and make it a new result --- diff --git a/tests/test_graphers_render_analyzers.py b/tests/test_graphers_render_analyzers.py index 1819f64..a6d2d4a 100644 --- a/tests/test_graphers_render_analyzers.py +++ b/tests/test_graphers_render_analyzers.py @@ -8,7 +8,7 @@ import numpy as np from tempfile import NamedTemporaryFile import os -PLOT = False +PLOT = True class Test_graphers_analyzers(unittest.TestCase): diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py index b6df6cf..18ad391 100644 --- a/timeside/analyzer/irit_speech_4hz.py +++ b/timeside/analyzer/irit_speech_4hz.py @@ -24,7 +24,7 @@ from timeside.analyzer.core import Analyzer from timeside.analyzer.utils import melFilterBank, computeModulation from timeside.analyzer.utils import segmentFromValues from timeside.api import IAnalyzer -from numpy import array, hamming, dot, mean, float +from numpy import array, hamming, dot, mean, float, mod from numpy.fft import rfft from scipy.signal import firwin, lfilter @@ -49,11 +49,10 @@ class IRITSpeech4Hz(Analyzer): implements(IAnalyzer) @interfacedoc - def setup(self, channels=None, samplerate=None, blocksize=None, - totalframes=None): - super(IRITSpeech4Hz, self).setup( - channels, samplerate, blocksize, totalframes) + def __init__(self, medfilt_duration=5): + super(IRITSpeech4Hz, self).__init__() self.energy4hz = [] + # Classification self.threshold = 2.0 @@ -63,9 +62,18 @@ class IRITSpeech4Hz(Analyzer): self.orderFilter = 100 self.normalizeEnergy = True + self.modulLen = 2.0 + + # Median filter duration in second + self.medfilt_duration = medfilt_duration + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, + totalframes=None): + super(IRITSpeech4Hz, self).setup( + channels, samplerate, blocksize, totalframes) self.nFFT = 2048 self.nbFilters = 30 - self.modulLen = 2.0 self.melFilter = melFilterBank(self.nbFilters, self.nFFT, samplerate) @staticmethod @@ -143,10 +151,15 @@ class IRITSpeech4Hz(Analyzer): convert = {False: 0, True: 1} label = {0: 'nonSpeech', 1: 'Speech'} - segList = segmentFromValues(modEnergyValue > self.threshold) + decision = modEnergyValue > self.threshold + + segList = segmentFromValues(decision) # Hint : Median filtering could imrove smoothness of the result - # from scipy.signal import medfilt - # segList = segmentFromValues(medfilt(modEnergyValue > self.threshold, 31)) + from scipy.signal import medfilt + output_samplerate = float(self.samplerate()) / self.input_stepsize + N = self.medfilt_duration * output_samplerate + N += 1 - mod(N, 2) # Make N odd + segList_filt = segmentFromValues(medfilt(decision, N)) segs = self.new_result(data_mode='label', time_mode='segment') segs.id_metadata.id += '.' + 'segments' @@ -164,4 +177,23 @@ class IRITSpeech4Hz(Analyzer): self.process_pipe.results.add(segs) + # Median filter on decision + segs = self.new_result(data_mode='label', time_mode='segment') + segs.id_metadata.id += '.' + 'segments_median' + segs.id_metadata.name += ' ' + 'Segments after Median filtering' + + segs.data_object.label_metadata.label = label + + segs.data_object.label = [convert[s[2]] for s in segList_filt] + segs.data_object.time = [(float(s[0]) * self.blocksize() / + self.samplerate()) + for s in segList_filt] + segs.data_object.duration = [(float(s[1] - s[0] + 1) * self.blocksize() / + self.samplerate()) + for s in segList_filt] + + self.process_pipe.results.add(segs) + + + return diff --git a/timeside/analyzer/utils.py b/timeside/analyzer/utils.py index b8ad23c..24264fc 100644 --- a/timeside/analyzer/utils.py +++ b/timeside/analyzer/utils.py @@ -113,7 +113,6 @@ def melFilterBank(nbFilters, fftLen, sr): The filter bank can be applied by matrix multiplication (Use numpy *dot* function). ''' - fh = float(sr) / 2.0 mh = 2595 * np.log10(1 + fh / 700) diff --git a/timeside/grapher/render_analyzers.py b/timeside/grapher/render_analyzers.py index f53a0d9..dd7ea73 100644 --- a/timeside/grapher/render_analyzers.py +++ b/timeside/grapher/render_analyzers.py @@ -127,7 +127,7 @@ class DisplayAnalyzer(Grapher): #------------------------------------------------- # Aubio Pitch -try: # because of the dependencies on the Aubio librairy +try: # because of the dependencies on the Aubio librairy aubiopitch = get_processor('aubio_pitch')() DisplayAubioPitch = DisplayAnalyzer.create( analyzer=aubiopitch, @@ -162,8 +162,18 @@ Display4hzSpeechSegmentation = DisplayAnalyzer.create( grapher_name='Irit 4Hz Speech Segmentation', background='waveform') + +# IRIT 4Hz with median filter +irit4hz = get_processor('irit_speech_4hz')() +Display4hzSpeechSegmentation = DisplayAnalyzer.create( + analyzer=irit4hz, + result_id='irit_speech_4hz.segments_median', + grapher_id='grapher_irit_speech_4hz_segments_median', + grapher_name='Irit 4Hz Speech Segmentation with median filter', + background='waveform') + # IRIT Monopoly -try: # because of the dependencies on Aubio Pitch +try: # because of the dependencies on Aubio Pitch iritmonopoly = get_processor('irit_monopoly')() DisplayMonopoly = DisplayAnalyzer.create( analyzer=iritmonopoly,