return "Speech confidences indexes"
def process(self, frames, eod=False):
- '''
-
- '''
-
- frames = frames.T[0]
- w = frames * hamming(len(frames));
- f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT])
- e = dot(f**2,self.melFilter)
- self.energy4hz.append(e)
- return frames, eod
+ '''
+
+ '''
+ frames = frames.T[0]
+ w = frames * hamming(len(frames))
+ f = abs(rfft(w, n=2*self.nFFT)[0:self.nFFT])
+ e = dot(f**2, self.melFilter)
+ self.energy4hz.append(e)
+ return frames, eod
+
def results(self):
- '''
-
- '''
- #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16))
-
- Wo = self.fCenter/self.samplerate() ;
- Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()];
- num = firwin(self.orderFilter, Wn,pass_zero=False);
- self.energy4hz=numpy.array(self.energy4hz)
- energy = lfilter(num,1,self.energy4hz.T,0)
- energy = sum(energy)
-
- if self.normalizeEnergy :
- energy =energy/mean(energy)
-
-
- w= int(float(self.modulLen)*self.samplerate()/self.blocksize())
- modEnergyValue =computeModulation(energy,w,True)
-
- conf = array(modEnergyValue-self.threshold)/self.threshold
- conf[conf>1] = 1
-
- modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
- modEnergy.value = conf
- convert = {False:'NonSpeech',True:'Speech'}
-
- segList = segmentFromValues(modEnergyValue>self.threshold)
- segmentsEntropy =[]
- for s in segList :
- segmentsEntropy.append((s[0],s[1],convert[s[2]]))
- segs = AnalyzerResult(id = "irit_4hzenergy_segments", name = "seg 4Hz (IRIT)", unit = "s")
- segs.value = segmentsEntropy
- return AnalyzerResultContainer([modEnergy,segs])
+ '''
+
+ '''
+ #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16))
+
+ Wo = self.fCenter/self.samplerate()
+ Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()]
+ num = firwin(self.orderFilter, Wn, pass_zero=False)
+ self.energy4hz=numpy.array(self.energy4hz)
+ energy = lfilter(num, 1, self.energy4hz.T, 0)
+ energy = sum(energy)
+
+ if self.normalizeEnergy:
+ energy = energy / mean(energy)
+
+ w = int(float(self.modulLen) * self.samplerate() / self.blocksize())
+ modEnergyValue = computeModulation(energy, w, True)
+
+ conf = array(modEnergyValue-self.threshold)/self.threshold
+ conf[conf>1] = 1
+
+ modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
+ modEnergy.value = conf
+ convert = {False:'NonSpeech',True:'Speech'}
+
+ segList = segmentFromValues(modEnergyValue>self.threshold)
+ segmentsEntropy =[]
+ for s in segList :
+ segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
+ numpy.float(s[1])*self.blocksize()/self.samplerate(),
+ convert[s[2]]))
+
+ segs = AnalyzerResult(id="irit_4hzenergy_segments", name="seg 4Hz (IRIT)", unit="s")
+ segs.value = segmentsEntropy
+ return AnalyzerResultContainer([modEnergy,segs])
return "Speech confidences indexes"
def process(self, frames, eod=False):
- self.entropyValue.append(entropy(frames))
- return frames, eod
-
+ self.entropyValue.append(entropy(frames))
+ return frames, eod
+
def results(self):
entropyValue = numpy.array(self.entropyValue)
modulentropy = computeModulation(entropyValue,w,False)
confEntropy= array(modulentropy-self.threshold)/self.threshold
confEntropy[confEntropy>1] = 1
-
+
conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?")
conf.value = confEntropy
-
+
binaryEntropy = modulentropy > self.threshold
binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2))
-
+
convert = {False:'NonSpeech',True:'Speech'}
segList = segmentFromValues(binaryEntropy)
+
segmentsEntropy =[]
- for s in segList :
- segmentsEntropy.append((s[0],s[1],convert[s[2]]))
-
- segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s")
+ for s in segList :
+ segmentsEntropy.append((numpy.float(s[0])*self.blocksize()/self.samplerate(),
+ numpy.float(s[1])*self.blocksize()/self.samplerate(),
+ convert[s[2]]))
+
+ segs = AnalyzerResult(id="irit_entropy_segments", name="seg entropy (IRIT)", unit="s")
segs.value = segmentsEntropy
-
return AnalyzerResultContainer([conf, segs])
def computeModulation(serie,wLen,withLog=True):
'''
Compute the modulation of a parameter centered. Extremums are set to zero.
-
+
Args :
- serie : list or numpy array containing the serie.
- wLen : Length of the analyzis window in samples.
- - withLog : Whether compute the var() or log(var()) .
-
+ - withLog : Whether compute the var() or log(var()) .
+
Returns :
- modul : Modulation of the serie.
-
+
'''
-
+
modul = numpy.zeros((1,len(serie)))[0];
w = int(wLen/2)
-
+
for i in range(w,len(serie)-w):
-
+
d = serie[i-w:i+w]
if withLog:
d = numpy.log(d)
modul[i] = numpy.var(d)
-
+
modul[:w] = modul[w]
-
+
modul[-w:] = modul[-w-1]
-
+
return modul;
def segmentFromValues(values,offset=0):
'''
-
+
'''
-
+
seg = [offset,-1,values[0]]
segList = []
for i,v in enumerate(values) :
seg[1] = i+offset
segList.append(tuple(seg))
seg = [i+offset,-1,v]
-
- seg[1] = i+offset
+
+ seg[1] = i+offset+1
segList.append(tuple(seg))
return segList
-# Attention
+# Attention
# ---------
#
# Double emploi avec le calcul mfcc d'aubio. Voir pour la fusion...
-# Maxime
+# Maxime
def melFilterBank(nbFilters,fftLen,sr) :
'''
Grenerate a Mel Filter-Bank
-
+
Args :
- nbFilters : Number of filters.
- fftLen : Length of the frequency range.
- - sr : Sampling rate of the signal to filter.
+ - sr : Sampling rate of the signal to filter.
Returns :
- filterbank : fftLen x nbFilters matrix containing one filter by column.
- The filter bank can be applied by matrix multiplication
- (Use numpy *dot* function).
+ The filter bank can be applied by matrix multiplication
+ (Use numpy *dot* function).
'''
-
- fh = float(sr)/2.0
+
+ fh = float(sr)/2.0
mh = 2595*numpy.log10(1+fh/700)
-
+
step = mh/nbFilters;
-
+
mcenter = numpy.arange(step,mh,step)
-
+
fcenter = 700*(10**(mcenter/2595)-1)
-
- filterbank = numpy.zeros((fftLen,nbFilters));
-
+
+ filterbank = numpy.zeros((fftLen,nbFilters));
+
for i,_ in enumerate(fcenter) :
-
+
if i == 0 :
fmin = 0.0
else :
fmin = fcenter[i-1]
-
+
if i == len(fcenter)-1 :
fmax = fh
else :
- fmax = fcenter[i+1]
-
+ fmax = fcenter[i+1]
+
imin = numpy.ceil(fmin/fh*fftLen)
imax = numpy.ceil(fmax/fh*fftLen)
-
+
filterbank[imin:imax,i] = triangle(imax-imin)
-
+
return filterbank
def triangle(length):
'''
Generate a triangle filter.
-
+
Args :
- length : length of the filter.
returns :
- - triangle : triangle filter.
-
+ - triangle : triangle filter.
+
'''
triangle = numpy.zeros((1,length))[0]
climax= numpy.ceil(length/2)
-
+
triangle[0:climax] = numpy.linspace(0,1,climax)
triangle[climax:length] = numpy.linspace(1,0,length-climax)
return triangle
-
+
def entropy(serie,nbins=10,base=numpy.exp(1),approach='unbiased'):
'''
Compute entropy of a serie using the histogram method.
-
+
Args :
- serie : Serie on witch compute the entropy
- nbins : Number of bins of the histogram
- base : Base used for normalisation
- approach : String in the following set : {unbiased,mmse}
for un-biasing value.
-
+
Returns :
- estimate : Entropy value
- nbias : N-bias of the estimate
- sigma : Estimated standard error
-
- Raises :
- A warning in case of unknown 'approach' value.
- No un-biasing is then performed
-
+
+ Raises :
+ A warning in case of unknown 'approach' value.
+ No un-biasing is then performed
+
'''
-
+
estimate = 0
sigma = 0
bins,edges = numpy.histogram(serie,nbins);
ncell = len(bins)
norm = (numpy.max(edges)-numpy.min(edges))/len(bins)
-
-
+
+
for b in bins :
if b == 0 :
logf = 0
logf = numpy.log(b)
estimate = estimate - b*logf
sigma = sigma + b * logf**2
-
+
count = numpy.sum(bins)
estimate=estimate/count;
sigma=numpy.sqrt( (sigma/count-estimate**2)/float(count-1) );
estimate=estimate+numpy.log(count)+numpy.log(norm);
nbias=-(ncell-1)/(2*count);
-
+
if approach =='unbiased' :
estimate=estimate-nbias;
nbias=0;
-
+
elif approach =='mmse' :
estimate=estimate-nbias;
nbias=0;
sigma =lambda_value*sigma;
else :
return 0
-
+
estimate=estimate/numpy.log(base);
nbias =nbias /numpy.log(base);
sigma =sigma /numpy.log(base);