From: Maxime LE COZ Date: Mon, 17 Jun 2013 14:12:25 +0000 (+0200) Subject: Irit analysers X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=739417167adec52a823f3539fe74ec8a51f95a5c;p=timeside-diadems.git Irit analysers --- diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py new file mode 100644 index 0000000..3298be3 --- /dev/null +++ b/timeside/analyzer/irit_speech_4hz.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2013 Maxime Le Coz + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: Maxime Le Coz + +from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter +from timeside.analyzer.core import * +from timeside.api import IValueAnalyzer +from numpy import array,hamming,dot,mean +from numpy.fft import rfft +from scipy.ndimage.morphology import binary_opening +from scipy.signal import firwin,lfilter +from scipy.io.wavfile import write as wavwrite +from matplotlib import pylab + +class IRITSpeech4Hz(Processor): + implements(IValueAnalyzer) + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): + super(IRITSpeech4Hz, self).setup(channels, samplerate, blocksize, totalframes) + self.energy4hz = [] + self.threshold = 2.0 + self.smoothLen = 5 + self.fCenter = 4.0 + self.normalizeEnergy = True + self.nFFT=2048 + self.orderFilter=100 + self.nbFilters =30 + self.modulLen = 2 + self.fwidth = 0.5 + self.melFilter = melFilterBank(self.nbFilters,self.nFFT,samplerate); + @staticmethod + @interfacedoc + def id(): + return "irit_speech_4hz" + + @staticmethod + @interfacedoc + def name(): + return "Speech entropy (IRIT)" + + @staticmethod + @interfacedoc + def unit(): + return "" + + def __str__(self): + return "Speech confidences indexes" + + def process(self, frames, eod=False): + ''' + + ''' + + frames = frames.T[0] + w = frames * hamming(len(frames)); + f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT]) + e = dot(f**2,self.melFilter) + self.energy4hz.append(e) + return frames, eod + + def results(self): + ''' + + ''' + #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16)) + + Wo = self.fCenter/self.samplerate() ; + Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()]; + num = firwin(self.orderFilter, Wn,pass_zero=False); + self.energy4hz=numpy.array(self.energy4hz) + energy = lfilter(num,1,self.energy4hz.T,0) + energy = sum(energy) + + if self.normalizeEnergy : + energy =energy/mean(energy) + + + w= int(float(self.modulLen)*self.samplerate()/self.blocksize()) + modEnergyValue =computeModulation(energy,w,True) + + conf = array(modEnergyValue-self.threshold)/self.threshold + conf[conf>1] = 1 + + modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?") + modEnergy.value = conf + convert = {False:'NonSpeech',True:'Speech'} + + segList = segmentFromValues(modEnergyValue>self.threshold) + segmentsEntropy =[] + for s in segList : + segmentsEntropy.append((s[0],s[1],convert[s[2]])) + segs = AnalyzerResult(id = "irit_4hzenergy_segments", name = "seg 4Hz (IRIT)", unit = "s") + segs.value = segmentsEntropy + return AnalyzerResultContainer([modEnergy,segs]) diff --git a/timeside/analyzer/irit_speech_4hz.pyc b/timeside/analyzer/irit_speech_4hz.pyc new file mode 100644 index 0000000..eed34b4 Binary files /dev/null and b/timeside/analyzer/irit_speech_4hz.pyc differ diff --git a/timeside/analyzer/irit_speech_entropy.py b/timeside/analyzer/irit_speech_entropy.py new file mode 100644 index 0000000..7d72921 --- /dev/null +++ b/timeside/analyzer/irit_speech_entropy.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2013 Maxime Le Coz + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: Maxime Le Coz + +from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter +from timeside.analyzer.core import * +from timeside.api import IValueAnalyzer +from numpy import array +from scipy.ndimage.morphology import binary_opening +from matplotlib import pylab + +class IRITSpeechEntropy(Processor): + implements(IValueAnalyzer) + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): + super(IRITSpeechEntropy, self).setup(channels, samplerate, blocksize, totalframes) + self.entropyValue = [] + self.threshold = 0.4 + self.smoothLen = 5 + self.modulLen = 2 + + @staticmethod + @interfacedoc + def id(): + return "irit_speech_entropy" + + @staticmethod + @interfacedoc + def name(): + return "Speech entropy (IRIT)" + + @staticmethod + @interfacedoc + def unit(): + return "" + + def __str__(self): + return "Speech confidences indexes" + + def process(self, frames, eod=False): + self.entropyValue.append(entropy(frames)) + return frames, eod + + def results(self): + + entropyValue = numpy.array(self.entropyValue) + w = self.modulLen*self.samplerate()/self.blocksize() + modulentropy = computeModulation(entropyValue,w,False) + confEntropy= array(modulentropy-self.threshold)/self.threshold + confEntropy[confEntropy>1] = 1 + + conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?") + conf.value = confEntropy + + binaryEntropy = modulentropy > self.threshold + binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2)) + + convert = {False:'NonSpeech',True:'Speech'} + segList = segmentFromValues(binaryEntropy) + segmentsEntropy =[] + for s in segList : + segmentsEntropy.append((s[0],s[1],convert[s[2]])) + + segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s") + segs.value = segmentsEntropy + + + return AnalyzerResultContainer([conf, segs])