- 2 * N.dot(x, (self.means / self.vars).T)
+ N.dot(x ** 2, (1.0 / self.vars).T))
+ N.log(self.weights)
- m = N.amax(llh,1)
+ m = N.amax(llh, 1)
dif = llh - N.atleast_2d(m).T
- return m + N.log(N.sum(N.exp(dif),1))
+ return m + N.log(N.sum(N.exp(dif), 1))
class LimsiSad(Analyzer):
+
"""
Limsi Speech Activity Detection Systems
LimsiSad performs frame level speech activity detection based on GMM models
"""
implements(IAnalyzer)
-
def __init__(self, sad_model='etape'):
"""
Parameters:
# feature extraction defition
spec = yaafelib.FeaturePlan(sample_rate=16000)
- spec.addFeature('mfcc: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256')
- spec.addFeature('mfccd1: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=1')
- spec.addFeature('mfccd2: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=2')
+ spec.addFeature(
+ 'mfcc: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256')
+ spec.addFeature(
+ 'mfccd1: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=1')
+ spec.addFeature(
+ 'mfccd2: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=2')
spec.addFeature('zcr: ZCR blockSize=1024 stepSize=256')
parent_analyzer = Yaafe(spec)
self.parents.append(parent_analyzer)
# load gmm model
if sad_model not in ['etape', 'maya']:
- raise ValueError("argument sad_model %s not supported. Supported values are 'etape' or 'maya'" % sad_model)
- picfname = os.path.join(timeside.__path__[0], 'trained_models', 'limsi_sad_%s.pkl' % sad_model)
+ raise ValueError(
+ "argument sad_model %s not supported. Supported values are 'etape' or 'maya'" % sad_model)
+ picfname = os.path.join(
+ timeside.__path__[0], 'trained_models', 'limsi_sad_%s.pkl' % sad_model)
self.gmms = pickle.load(open(picfname, 'rb'))
-
@staticmethod
@interfacedoc
def id():
def process(self, frames, eod=False):
if self.input_samplerate != 16000:
- raise Exception('%s requires 16000 input sample rate: %d provided' % (self.__class__.__name__, self.input_samplerate))
+ raise Exception(
+ '%s requires 16000 input sample rate: %d provided' %
+ (self.__class__.__name__, self.input_samplerate))
return frames, eod
def post_process(self):
mfcc = self.process_pipe.results['yaafe.mfcc']['data_object']['value']
- mfccd1 = self.process_pipe.results['yaafe.mfccd1']['data_object']['value']
- mfccd2 = self.process_pipe.results['yaafe.mfccd2']['data_object']['value']
+ mfccd1 = self.process_pipe.results[
+ 'yaafe.mfccd1']['data_object']['value']
+ mfccd2 = self.process_pipe.results[
+ 'yaafe.mfccd2']['data_object']['value']
zcr = self.process_pipe.results['yaafe.zcr']['data_object']['value']
features = N.concatenate((mfcc, mfccd1, mfccd2, zcr), axis=1)
- res = 0.5 + 0.5 * (self.gmms[0].llh(features) - self.gmms[1].llh(features))
+ res = 0.5 + 0.5 * \
+ (self.gmms[0].llh(features) - self.gmms[1].llh(features))
sad_result = self.new_result(data_mode='value', time_mode='framewise')
sad_result.id_metadata.id += '.' + 'sad_lhh_diff'
- sad_result.id_metadata.name += ' ' + 'Speech Activity Detection Log Likelihood Difference'
+ sad_result.id_metadata.name += ' ' + \
+ 'Speech Activity Detection Log Likelihood Difference'
sad_result.data_object.value = res
self.process_pipe.results.add(sad_result)
-
MACHINE_EPSILON = np.finfo(np.float32).eps
+
def downsample_blocking(frames, hop_s, dtype='float32'):
# downmixing to one channel
if len(frames.shape) != 1:
- downsampled = frames.sum(axis = -1) / frames.shape[-1]
+ downsampled = frames.sum(axis=-1) / frames.shape[-1]
else:
downsampled = frames
# zero padding to have a multiple of hop_s
if downsampled.shape[0] % hop_s != 0:
- pad_length = hop_s + downsampled.shape[0] / hop_s * hop_s - downsampled.shape[0]
- downsampled = np.hstack([downsampled, np.zeros(pad_length, dtype = dtype)])
+ pad_length = hop_s + \
+ downsampled.shape[0] / hop_s * hop_s - downsampled.shape[0]
+ downsampled = np.hstack(
+ [downsampled, np.zeros(pad_length, dtype=dtype)])
# blocking
return downsampled.reshape(downsampled.shape[0] / hop_s, hop_s)
'''
sLen = len(serie)
modul = np.zeros((sLen,))
- w = int(wLen/2)
+ w = int(wLen / 2)
- for i in range(w, sLen-w):
+ for i in range(w, sLen - w):
- d = serie[i-w:i+w]
+ d = serie[i - w:i + w]
if withLog:
if not (d > 0).all():
d[d <= 0] = MACHINE_EPSILON # prevent log(0)=inf
modul[i] = np.var(d)
modul[:w] = modul[w]
- modul[-w:] = modul[-w-1]
+ modul[-w:] = modul[-w - 1]
return modul
'''
- seg = [offset,-1,values[0]]
+ seg = [offset, -1, values[0]]
segList = []
- for i,v in enumerate(values) :
+ for i, v in enumerate(values):
- if not (v == seg[2]) :
- seg[1] = i+offset-1
+ if not (v == seg[2]):
+ seg[1] = i + offset - 1
segList.append(tuple(seg))
- seg = [i+offset,-1,v]
+ seg = [i + offset, -1, v]
- seg[1] = i+offset
+ seg[1] = i + offset
segList.append(tuple(seg))
return segList
# Double emploi avec le calcul mfcc d'aubio. Voir pour la fusion...
# Maxime
-def melFilterBank(nbFilters,fftLen,sr) :
+def melFilterBank(nbFilters, fftLen, sr):
'''
Grenerate a Mel Filter-Bank
(Use numpy *dot* function).
'''
- fh = float(sr)/2.0
- mh = 2595*np.log10(1+fh/700)
+ fh = float(sr) / 2.0
+ mh = 2595 * np.log10(1 + fh / 700)
- step = mh/nbFilters;
+ step = mh / nbFilters
- mcenter = np.arange(step,mh,step)
+ mcenter = np.arange(step, mh, step)
- fcenter = 700*(10**(mcenter/2595)-1)
+ fcenter = 700 * (10 ** (mcenter / 2595) - 1)
- filterbank = np.zeros((fftLen,nbFilters));
+ filterbank = np.zeros((fftLen, nbFilters))
- for i,_ in enumerate(fcenter) :
+ for i, _ in enumerate(fcenter):
- if i == 0 :
+ if i == 0:
fmin = 0.0
- else :
- fmin = fcenter[i-1]
+ else:
+ fmin = fcenter[i - 1]
- if i == len(fcenter)-1 :
+ if i == len(fcenter) - 1:
fmax = fh
- else :
- fmax = fcenter[i+1]
+ else:
+ fmax = fcenter[i + 1]
- imin = np.ceil(fmin/fh*fftLen)
- imax = np.ceil(fmax/fh*fftLen)
+ imin = np.ceil(fmin / fh * fftLen)
+ imax = np.ceil(fmax / fh * fftLen)
- filterbank[imin:imax,i] = triangle(imax-imin)
+ filterbank[imin:imax, i] = triangle(imax - imin)
return filterbank
- triangle : triangle filter.
'''
- triangle = np.zeros((1,length))[0]
- climax= np.ceil(length/2)
+ triangle = np.zeros((1, length))[0]
+ climax = np.ceil(length / 2)
- triangle[0:climax] = np.linspace(0,1,climax)
- triangle[climax:length] = np.linspace(1,0,length-climax)
+ triangle[0:climax] = np.linspace(0, 1, climax)
+ triangle[climax:length] = np.linspace(1, 0, length - climax)
return triangle
-def entropy(serie,nbins=10,base=np.exp(1),approach='unbiased'):
+def entropy(serie, nbins=10, base=np.exp(1), approach='unbiased'):
'''
Compute entropy of a serie using the histogram method.
estimate = 0
sigma = 0
- bins,edges = np.histogram(serie,nbins);
+ bins, edges = np.histogram(serie, nbins)
ncell = len(bins)
- norm = (np.max(edges)-np.min(edges))/len(bins)
-
+ norm = (np.max(edges) - np.min(edges)) / len(bins)
- for b in bins :
- if b == 0 :
+ for b in bins:
+ if b == 0:
logf = 0
- else :
+ else:
logf = np.log(b)
- estimate = estimate - b*logf
- sigma = sigma + b * logf**2
+ estimate = estimate - b * logf
+ sigma = sigma + b * logf ** 2
count = np.sum(bins)
- estimate=estimate/count;
- sigma=np.sqrt( (sigma/count-estimate**2)/float(count-1) );
- estimate=estimate+np.log(count)+np.log(norm);
- nbias=-(ncell-1)/(2*count);
-
- if approach =='unbiased' :
- estimate=estimate-nbias;
- nbias=0;
-
- elif approach =='mmse' :
- estimate=estimate-nbias;
- nbias=0;
- lambda_value=estimate^2/(estimate^2+sigma^2);
- nbias =(1-lambda_value)*estimate;
- estimate=lambda_value*estimate;
- sigma =lambda_value*sigma;
- else :
+ estimate = estimate / count
+ sigma = np.sqrt((sigma / count - estimate ** 2) / float(count - 1))
+ estimate = estimate + np.log(count) + np.log(norm)
+ nbias = -(ncell - 1) / (2 * count)
+
+ if approach == 'unbiased':
+ estimate = estimate - nbias
+ nbias = 0
+
+ elif approach == 'mmse':
+ estimate = estimate - nbias
+ nbias = 0
+ lambda_value = estimate ^ 2 / (estimate ^ 2 + sigma ^ 2)
+ nbias = (1 - lambda_value) * estimate
+ estimate = lambda_value * estimate
+ sigma = lambda_value * sigma
+ else:
return 0
- estimate=estimate/np.log(base);
- nbias =nbias /np.log(base);
- sigma =sigma /np.log(base);
+ estimate = estimate / np.log(base)
+ nbias = nbias / np.log(base)
+ sigma = sigma / np.log(base)
return estimate
-