From: Thomas Fillon Date: Thu, 11 Jul 2013 08:28:13 +0000 (+0200) Subject: Fix segment issues in IRIT Analyzers X-Git-Tag: 0.5.0~31^2~1 X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=c00843cb8744d57fd0b6195aa9a5d4c41b1a471c;p=timeside.git Fix segment issues in IRIT Analyzers - Fix segmentFromValues to handleproperly handle the last segment - Fix irit_entropy_segments and irit_4hzenergy_segments results format to have segment limits in seconds --- diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py index 3298be3..9605e33 100644 --- a/timeside/analyzer/irit_speech_4hz.py +++ b/timeside/analyzer/irit_speech_4hz.py @@ -65,48 +65,50 @@ class IRITSpeech4Hz(Processor): return "Speech confidences indexes" def process(self, frames, eod=False): - ''' - - ''' - - frames = frames.T[0] - w = frames * hamming(len(frames)); - f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT]) - e = dot(f**2,self.melFilter) - self.energy4hz.append(e) - return frames, eod + ''' + + ''' + frames = frames.T[0] + w = frames * hamming(len(frames)) + f = abs(rfft(w, n=2*self.nFFT)[0:self.nFFT]) + e = dot(f**2, self.melFilter) + self.energy4hz.append(e) + return frames, eod + def results(self): - ''' - - ''' - #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16)) - - Wo = self.fCenter/self.samplerate() ; - Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()]; - num = firwin(self.orderFilter, Wn,pass_zero=False); - self.energy4hz=numpy.array(self.energy4hz) - energy = lfilter(num,1,self.energy4hz.T,0) - energy = sum(energy) - - if self.normalizeEnergy : - energy =energy/mean(energy) - - - w= int(float(self.modulLen)*self.samplerate()/self.blocksize()) - modEnergyValue =computeModulation(energy,w,True) - - conf = array(modEnergyValue-self.threshold)/self.threshold - conf[conf>1] = 1 - - modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?") - modEnergy.value = conf - convert = {False:'NonSpeech',True:'Speech'} - - segList = segmentFromValues(modEnergyValue>self.threshold) - segmentsEntropy =[] - for s in segList : - segmentsEntropy.append((s[0],s[1],convert[s[2]])) - segs = AnalyzerResult(id = "irit_4hzenergy_segments", name = "seg 4Hz (IRIT)", unit = "s") - segs.value = segmentsEntropy - return AnalyzerResultContainer([modEnergy,segs]) + ''' + + ''' + #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16)) + + Wo = self.fCenter/self.samplerate() + Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()] + num = firwin(self.orderFilter, Wn, pass_zero=False) + self.energy4hz=numpy.array(self.energy4hz) + energy = lfilter(num, 1, self.energy4hz.T, 0) + energy = sum(energy) + + if self.normalizeEnergy: + energy = energy / mean(energy) + + w = int(float(self.modulLen) * self.samplerate() / self.blocksize()) + modEnergyValue = computeModulation(energy, w, True) + + conf = array(modEnergyValue-self.threshold)/self.threshold + conf[conf>1] = 1 + + modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?") + modEnergy.value = conf + convert = {False:'NonSpeech',True:'Speech'} + + segList = segmentFromValues(modEnergyValue>self.threshold) + segmentsEntropy =[] + for s in segList : + segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(), + numpy.float(s[1])*self.blocksize()/self.samplerate(), + convert[s[2]])) + + segs = AnalyzerResult(id="irit_4hzenergy_segments", name="seg 4Hz (IRIT)", unit="s") + segs.value = segmentsEntropy + return AnalyzerResultContainer([modEnergy,segs]) diff --git a/timeside/analyzer/irit_speech_entropy.py b/timeside/analyzer/irit_speech_entropy.py index 7d72921..c84f76d 100644 --- a/timeside/analyzer/irit_speech_entropy.py +++ b/timeside/analyzer/irit_speech_entropy.py @@ -56,9 +56,9 @@ class IRITSpeechEntropy(Processor): return "Speech confidences indexes" def process(self, frames, eod=False): - self.entropyValue.append(entropy(frames)) - return frames, eod - + self.entropyValue.append(entropy(frames)) + return frames, eod + def results(self): entropyValue = numpy.array(self.entropyValue) @@ -66,21 +66,23 @@ class IRITSpeechEntropy(Processor): modulentropy = computeModulation(entropyValue,w,False) confEntropy= array(modulentropy-self.threshold)/self.threshold confEntropy[confEntropy>1] = 1 - + conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?") conf.value = confEntropy - + binaryEntropy = modulentropy > self.threshold binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2)) - + convert = {False:'NonSpeech',True:'Speech'} segList = segmentFromValues(binaryEntropy) + segmentsEntropy =[] - for s in segList : - segmentsEntropy.append((s[0],s[1],convert[s[2]])) - - segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s") + for s in segList : + segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(), + numpy.float(s[1])*self.blocksize()/self.samplerate(), + convert[s[2]])) + + segs = AnalyzerResult(id="irit_entropy_segments", name="seg entropy (IRIT)", unit="s") segs.value = segmentsEntropy - return AnalyzerResultContainer([conf, segs]) diff --git a/timeside/analyzer/utils.py b/timeside/analyzer/utils.py index 43b52e5..0192ca7 100644 --- a/timeside/analyzer/utils.py +++ b/timeside/analyzer/utils.py @@ -37,38 +37,38 @@ def downsample_blocking(frames, hop_s, dtype='float32'): def computeModulation(serie,wLen,withLog=True): ''' Compute the modulation of a parameter centered. Extremums are set to zero. - + Args : - serie : list or numpy array containing the serie. - wLen : Length of the analyzis window in samples. - - withLog : Whether compute the var() or log(var()) . - + - withLog : Whether compute the var() or log(var()) . + Returns : - modul : Modulation of the serie. - + ''' - + modul = numpy.zeros((1,len(serie)))[0]; w = int(wLen/2) - + for i in range(w,len(serie)-w): - + d = serie[i-w:i+w] if withLog: d = numpy.log(d) modul[i] = numpy.var(d) - + modul[:w] = modul[w] - + modul[-w:] = modul[-w-1] - + return modul; def segmentFromValues(values,offset=0): ''' - + ''' - + seg = [offset,-1,values[0]] segList = [] for i,v in enumerate(values) : @@ -77,111 +77,111 @@ def segmentFromValues(values,offset=0): seg[1] = i+offset segList.append(tuple(seg)) seg = [i+offset,-1,v] - - seg[1] = i+offset + + seg[1] = i+offset+1 segList.append(tuple(seg)) return segList -# Attention +# Attention # --------- # # Double emploi avec le calcul mfcc d'aubio. Voir pour la fusion... -# Maxime +# Maxime def melFilterBank(nbFilters,fftLen,sr) : ''' Grenerate a Mel Filter-Bank - + Args : - nbFilters : Number of filters. - fftLen : Length of the frequency range. - - sr : Sampling rate of the signal to filter. + - sr : Sampling rate of the signal to filter. Returns : - filterbank : fftLen x nbFilters matrix containing one filter by column. - The filter bank can be applied by matrix multiplication - (Use numpy *dot* function). + The filter bank can be applied by matrix multiplication + (Use numpy *dot* function). ''' - - fh = float(sr)/2.0 + + fh = float(sr)/2.0 mh = 2595*numpy.log10(1+fh/700) - + step = mh/nbFilters; - + mcenter = numpy.arange(step,mh,step) - + fcenter = 700*(10**(mcenter/2595)-1) - - filterbank = numpy.zeros((fftLen,nbFilters)); - + + filterbank = numpy.zeros((fftLen,nbFilters)); + for i,_ in enumerate(fcenter) : - + if i == 0 : fmin = 0.0 else : fmin = fcenter[i-1] - + if i == len(fcenter)-1 : fmax = fh else : - fmax = fcenter[i+1] - + fmax = fcenter[i+1] + imin = numpy.ceil(fmin/fh*fftLen) imax = numpy.ceil(fmax/fh*fftLen) - + filterbank[imin:imax,i] = triangle(imax-imin) - + return filterbank def triangle(length): ''' Generate a triangle filter. - + Args : - length : length of the filter. returns : - - triangle : triangle filter. - + - triangle : triangle filter. + ''' triangle = numpy.zeros((1,length))[0] climax= numpy.ceil(length/2) - + triangle[0:climax] = numpy.linspace(0,1,climax) triangle[climax:length] = numpy.linspace(1,0,length-climax) return triangle - + def entropy(serie,nbins=10,base=numpy.exp(1),approach='unbiased'): ''' Compute entropy of a serie using the histogram method. - + Args : - serie : Serie on witch compute the entropy - nbins : Number of bins of the histogram - base : Base used for normalisation - approach : String in the following set : {unbiased,mmse} for un-biasing value. - + Returns : - estimate : Entropy value - nbias : N-bias of the estimate - sigma : Estimated standard error - - Raises : - A warning in case of unknown 'approach' value. - No un-biasing is then performed - + + Raises : + A warning in case of unknown 'approach' value. + No un-biasing is then performed + ''' - + estimate = 0 sigma = 0 bins,edges = numpy.histogram(serie,nbins); ncell = len(bins) norm = (numpy.max(edges)-numpy.min(edges))/len(bins) - - + + for b in bins : if b == 0 : logf = 0 @@ -189,17 +189,17 @@ def entropy(serie,nbins=10,base=numpy.exp(1),approach='unbiased'): logf = numpy.log(b) estimate = estimate - b*logf sigma = sigma + b * logf**2 - + count = numpy.sum(bins) estimate=estimate/count; sigma=numpy.sqrt( (sigma/count-estimate**2)/float(count-1) ); estimate=estimate+numpy.log(count)+numpy.log(norm); nbias=-(ncell-1)/(2*count); - + if approach =='unbiased' : estimate=estimate-nbias; nbias=0; - + elif approach =='mmse' : estimate=estimate-nbias; nbias=0; @@ -209,7 +209,7 @@ def entropy(serie,nbins=10,base=numpy.exp(1),approach='unbiased'): sigma =lambda_value*sigma; else : return 0 - + estimate=estimate/numpy.log(base); nbias =nbias /numpy.log(base); sigma =sigma /numpy.log(base);