From d12a66b106bba09d14fa296599a370554df637e6 Mon Sep 17 00:00:00 2001 From: Maxime LE COZ Date: Wed, 12 Mar 2014 13:41:07 +0100 Subject: [PATCH] .orig files deletion --- timeside/analyzer/irit_speech_4hz.py.orig | 178 ------------------ timeside/analyzer/irit_speech_entropy.py.orig | 118 ------------ 2 files changed, 296 deletions(-) delete mode 100644 timeside/analyzer/irit_speech_4hz.py.orig delete mode 100644 timeside/analyzer/irit_speech_entropy.py.orig diff --git a/timeside/analyzer/irit_speech_4hz.py.orig b/timeside/analyzer/irit_speech_4hz.py.orig deleted file mode 100644 index 6acac1c..0000000 --- a/timeside/analyzer/irit_speech_4hz.py.orig +++ /dev/null @@ -1,178 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2013 Maxime Le Coz - -# This file is part of TimeSide. - -# TimeSide is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 2 of the License, or -# (at your option) any later version. - -# TimeSide is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with TimeSide. If not, see . - -# Author: Maxime Le Coz - -from timeside.core import implements, interfacedoc -from timeside.analyzer.core import Analyzer -from timeside.analyzer.utils import melFilterBank, computeModulation -from timeside.analyzer.utils import segmentFromValues -from timeside.api import IAnalyzer -from numpy import array, hamming, dot, mean, float -from numpy.fft import rfft -from scipy.signal import firwin, lfilter -from timeside.analyzer.preprocessors import frames_adapter - - -class IRITSpeech4Hz(Analyzer): - '''Speech Segmentor based on the 4Hz energy modulation analysis. - - Properties: - - energy4hz (list) : List of the 4Hz energy by frame for the modulation computation - - threshold (float) : Threshold for the classification Speech/NonSpeech - - frequency_center (float) : Center of the frequency range where the energy is extracted - - frequency_width (float) : Width of the frequency range where the energy is extracted - - orderFilter (int) : Order of the pass-band filter extracting the frequency range - - normalizeEnergy (boolean) : Whether the energy must be normalized or not - - nFFT (int) : Number of points for the FFT. Better if 512 <= nFFT <= 2048 - - nbFilters (int) : Length of the Mel Filter bank - - melFilter (numpy array) : Mel Filter bank - - modulLen (float) : Length (in second) of the modulation computation window - ''' - -<<<<<<< HEAD - @interfacedoc -======= - implements(IAnalyzer) - - @interfacedoc ->>>>>>> 7c3ccb1c5b87c4639fee32df595cca1991265657 - def setup(self, channels=None, samplerate=None, blocksize=None, - totalframes=None): - super(IRITSpeech4Hz, self).setup( - channels, samplerate, blocksize, totalframes) - - self.energy4hz = [] - # Classification - self.threshold = 2.0 - - self.wLen = 1.0 - self.wStep = 0.1 - self.input_blocksize = int(self.wLen * samplerate) - self.input_stepsize = int(self.wStep * samplerate) - - # Pass-band Filter - self.frequency_center = 4.0 - self.frequency_width = 0.5 - self.orderFilter = 100 - - self.normalizeEnergy = True - self.nFFT = 2048 - self.nbFilters = 30 - self.modulLen = 2.0 - self.melFilter = melFilterBank(self.nbFilters, self.nFFT, samplerate) - - @staticmethod - @interfacedoc - def id(): - return "irit_speech_4hz" - - @staticmethod - @interfacedoc - def name(): - return "IRIT Speech 4Hz Modulation" - - @staticmethod - @interfacedoc - def unit(): - return "" - - def __str__(self): - return "Speech confidences indexes" - - @frames_adapter - def process(self, frames, eod=False): - frames = frames.T[0] - # windowing of the frame (could be a changeable property) - w = frames * hamming(len(frames)) - - # Mel scale spectrum extraction - f = abs(rfft(w, n=2 * self.nFFT)[0:self.nFFT]) - e = dot(f ** 2, self.melFilter) - - self.energy4hz.append(e) - - return frames, eod - - def post_process(self): - ''' - - ''' - # Creation of the pass-band filter - Wo = self.frequency_center / self.samplerate() - Wn = [Wo - (self.frequency_width / 2) / self.samplerate(), - Wo + (self.frequency_width / 2) / self.samplerate()] - num = firwin(self.orderFilter, Wn, pass_zero=False) - - # Energy on the frequency range - self.energy4hz = array(self.energy4hz) - energy = lfilter(num, 1, self.energy4hz.T, 0) - energy = sum(energy) - - # Normalization - if self.normalizeEnergy: - energy = energy / mean(energy) - - # Energy Modulation - frameLenModulation = int( - self.modulLen * self.samplerate() / self.input_blocksize) - modEnergyValue = computeModulation(energy, frameLenModulation, True) - - # Confidence Index - conf = array(modEnergyValue - self.threshold) / self.threshold - conf[conf > 1] = 1 - - modEnergy = self.new_result(data_mode='value', time_mode='framewise') - modEnergy.id_metadata.id += '.' + 'energy_confidence' - modEnergy.id_metadata.name += ' ' + 'Energy Confidence' - - modEnergy.data_object.value = conf - - self.process_pipe.results.add(modEnergy) - - # Segment - convert = {False: 0, True: 1} - label = {0: 'nonSpeech', 1: 'Speech'} - - segList = segmentFromValues(modEnergyValue > self.threshold) - # Hint : Median filtering could imrove smoothness of the result - # from scipy.signal import medfilt - # segList = segmentFromValues(medfilt(modEnergyValue > self.threshold, 31)) - - segs = self.new_result(data_mode='label', time_mode='segment') - segs.id_metadata.id += '.' + 'segments' - segs.id_metadata.name += ' ' + 'Segments' - - segs.label_metadata.label = label - - segs.data_object.label = [convert[s[2]] for s in segList] - segs.data_object.time = [(float(s[0]) * self.input_blocksize / - self.samplerate()) - for s in segList] -<<<<<<< HEAD - segs.data_object.duration = [(float(s[1]-s[0]) * self.input_blocksize / -======= - segs.data_object.duration = [(float(s[1]-s[0]+1) * self.blocksize() / ->>>>>>> 7c3ccb1c5b87c4639fee32df595cca1991265657 - self.samplerate()) - for s in segList] - - self.process_pipe.results.add(segs) - - return diff --git a/timeside/analyzer/irit_speech_entropy.py.orig b/timeside/analyzer/irit_speech_entropy.py.orig deleted file mode 100644 index 45be006..0000000 --- a/timeside/analyzer/irit_speech_entropy.py.orig +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2013 Maxime Le Coz - -# This file is part of TimeSide. - -# TimeSide is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 2 of the License, or -# (at your option) any later version. - -# TimeSide is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with TimeSide. If not, see . - -# Author: Maxime Le Coz - -from timeside.core import implements, interfacedoc -from timeside.analyzer.core import Analyzer -from timeside.analyzer.utils import entropy, computeModulation -from timeside.analyzer.utils import segmentFromValues -from timeside.api import IAnalyzer -from numpy import array -from scipy.ndimage.morphology import binary_opening -from timeside.analyzer.preprocessors import frames_adapter - - -class IRITSpeechEntropy(Analyzer): - """Speech Segmentor based on Entropy analysis.""" - - implements(IAnalyzer) - - @interfacedoc - def setup(self, channels=None, samplerate=None, blocksize=None, - totalframes=None): - super(IRITSpeechEntropy, self).setup( - channels, samplerate, blocksize, totalframes) - self.entropyValue = [] - self.threshold = 0.4 - self.smoothLen = 5 - self.modulLen = 2 - self.wLen = 1.0 - self.wStep = 0.1 - self.input_blocksize = int(self.wLen * samplerate) - self.input_stepsize = int(self.wStep * samplerate) - - @staticmethod - @interfacedoc - def id(): - return "irit_speech_entropy" - - @staticmethod - @interfacedoc - def name(): - return "IRIT Speech entropy" - - @staticmethod - @interfacedoc - def unit(): - return "" - - def __str__(self): - return "Speech confidences indexes" - - @frames_adapter - def process(self, frames, eod=False): - self.entropyValue.append(entropy(frames)) - return frames, eod - - def post_process(self): - entropyValue = array(self.entropyValue) - w = self.modulLen * self.samplerate() / self.input_blocksize - modulentropy = computeModulation(entropyValue, w, False) - confEntropy = array(modulentropy - self.threshold) / self.threshold - confEntropy[confEntropy > 1] = 1 - - conf = self.new_result(data_mode='value', time_mode='framewise') - - conf.id_metadata.id += '.' + 'confidence' - conf.id_metadata.name += ' ' + 'Confidence' - - conf.data_object.value = confEntropy - self.process_pipe.results.add(conf) - - # Binary Entropy - binaryEntropy = modulentropy > self.threshold - binaryEntropy = binary_opening( - binaryEntropy, [1] * (self.smoothLen * 2)) - - convert = {False: 0, True: 1} - label = {0: 'NonSpeech', 1: 'Speech'} - segList = segmentFromValues(binaryEntropy) - - segs = self.new_result(data_mode='label', time_mode='segment') - segs.id_metadata.id += '.' + 'segments' - segs.id_metadata.name += ' ' + 'Segments' - - segs.label_metadata.label = label - - segs.data_object.label = [convert[s[2]] for s in segList] - segs.data_object.time = [(float(s[0]) * self.input_blocksize / - self.samplerate()) - for s in segList] -<<<<<<< HEAD - segs.data_object.duration = [(float(s[1]-s[0]) * self.input_blocksize / -======= - segs.data_object.duration = [(float(s[1]-s[0]+1) * self.blocksize() / ->>>>>>> 7c3ccb1c5b87c4639fee32df595cca1991265657 - self.samplerate()) - for s in segList] - - self.process_pipe.results.add(segs) - - return -- 2.39.5