From: Maxime LE COZ Date: Tue, 17 Dec 2013 16:10:18 +0000 (+0100) Subject: Segmentation Forward/Backward added X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=5837900262c6e8714ea60b5ac9964545025ec78f;p=timeside.git Segmentation Forward/Backward added --- diff --git a/front.lab b/front.lab new file mode 100644 index 0000000..474c0f3 --- /dev/null +++ b/front.lab @@ -0,0 +1,136 @@ +0.016938 backward +0.031313 backward +0.120000 backward +0.176687 forward +0.316500 forward +0.330688 backward +0.430437 forward +0.459937 forward +0.479938 instable +0.603000 forward +0.643938 backward +0.663500 backward +0.684375 backward +0.708250 forward +0.715562 backward +0.742437 forward +0.746938 backward +0.780813 forward +0.810875 backward +0.837437 forward +0.842688 backward +0.862938 forward +0.893500 backward +0.914813 backward +0.986563 forward +1.080000 backward +1.095500 backward +1.146688 forward +1.178250 forward +1.228938 forward +1.287313 backward +1.291125 backward +1.314000 backward +1.318437 backward +1.338437 forward +1.360313 forward +1.377438 backward +1.400375 forward +1.431375 backward +1.496500 forward +1.516500 forward +1.537688 forward +1.637063 backward +1.658750 backward +1.700750 forward +1.747250 backward +1.762812 backward +1.785562 backward +1.805562 backward +1.825562 instable +1.828625 backward +1.848625 forward +1.927500 backward +1.934250 backward +1.957125 forward +1.977250 forward +2.022375 backward +2.043563 forward +2.079938 forward +2.101250 backward +2.146313 backward +2.166313 instable +2.199937 backward +2.259937 forward +2.349188 backward +2.370750 backward +2.396125 backward +2.415125 backward +2.447312 forward +2.517375 backward +2.538750 forward +2.569812 forward +2.600750 forward +2.648625 backward +2.677062 forward +2.678750 backward +2.699125 forward +2.726313 backward +2.766812 backward +2.790750 backward +2.834563 forward +2.939687 forward +2.964938 backward +3.004812 backward +3.044375 forward +3.066250 forward +3.123937 forward +3.239937 backward +3.279500 backward +3.300250 forward +3.308750 backward +3.328750 instable +3.348937 backward +3.368937 instable +3.373687 backward +3.394000 forward +3.417375 forward +3.425625 backward +3.463500 backward +3.472875 backward +3.492875 backward +3.531000 forward +3.560500 forward +3.565625 backward +3.587375 forward +3.599687 backward +3.629000 forward +3.684000 backward +3.710062 backward +3.758563 forward +3.820625 forward +3.843437 forward +3.869875 backward +3.892313 backward +3.973187 forward +3.998312 forward +4.048063 backward +4.058125 backward +4.078437 forward +4.129125 backward +4.140688 backward +4.200938 forward +4.226625 forward +4.257187 forward +4.356687 backward +4.379937 backward +4.400062 forward +4.466563 forward +4.609750 backward +4.616250 backward +4.636250 backward +4.665875 forward +4.669625 backward +4.690750 forward +4.713687 forward +4.796125 forward diff --git a/timeside/analyzer/__init__.py b/timeside/analyzer/__init__.py index 4c293f1..e7a0150 100644 --- a/timeside/analyzer/__init__.py +++ b/timeside/analyzer/__init__.py @@ -13,4 +13,7 @@ from waveform import Waveform from vamp_plugin import VampSimpleHost from irit_speech_entropy import IRITSpeechEntropy from irit_speech_4hz import IRITSpeech4Hz +from irit_monopoly import IRITMonopoly +from irit_diverg import IRITDiverg + from odf import OnsetDetectionFunction diff --git a/timeside/analyzer/irit_diverg.py b/timeside/analyzer/irit_diverg.py new file mode 100644 index 0000000..c52ff22 --- /dev/null +++ b/timeside/analyzer/irit_diverg.py @@ -0,0 +1,411 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2013 Maxime Le Coz + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: Maxime Le Coz + +from timeside.core import implements, interfacedoc +from timeside.analyzer.core import Analyzer +from timeside.analyzer import Waveform +from timeside.api import IAnalyzer +from numpy import spacing +from collections import deque + +class ModelLongTerm(object): + ''' + ''' + + def __init__(self,ordre,echantillon): + ''' + + Constructor + + ''' + + self.ordre = ordre + self.ft = [0]*(ordre+2) + self.ftm1 = [0]*(ordre+2) + self.variance_f = [1]*(ordre+2) + self.variance_b = [1]*(ordre+2) + self.et = [0]*(ordre+2) + self.cor = [0]*(ordre+2) + self.length = 1 + self.erreur_residuelle = 0 + self.variance_erreur_residuelle = 0 + + oubli = 1.0/float(self.length) + + self.variance_f[0] = self.variance_f[0]+oubli*(echantillon**2-self.variance_f[0]) + self.variance_b[0] = self.variance_f[0] + self.et[0] = echantillon + self.ft[0] = echantillon + + ik = min([ordre,self.length-1]) + self.erreur_residuelle = self.et[ik] + self.variance_erreur_residuelle =self.variance_f[ik] + + def miseAJour(self,echantillon): + ''' + + ''' + + self.length+=1 + self.ftm1 = self.ft[:] + + self.et[0] = echantillon + + oubli = 1.0/float(self.length) + self.variance_f[0] = self.variance_f[0]+oubli*(echantillon**2-self.variance_f[0]) + self.variance_b[0] = self.variance_f[0] + ik = min([self.ordre,self.length-1]) + + + for n in xrange(ik+1) : + oubli =1.0/float(self.length-n) + + self.cor[n] = self.cor[n] + oubli*(self.ftm1[n]*self.et[n]-self.cor[n]) + + knplus1 = 2*self.cor[n]/(self.variance_f[n] + self.variance_b[n]) + + self.et[n+1] = self.et[n]-knplus1*self.ftm1[n] + self.ft[n+1] = self.ftm1[n]-knplus1*self.et[n] + + self.variance_f[n+1] = self.variance_f[n+1]+oubli*(self.et[n+1]**2-self.variance_f[n+1]) + self.variance_b[n+1] = self.variance_b[n+1]+oubli*(self.ft[n+1]**2-self.variance_b[n+1]) + + self.ft[0] = echantillon + self.erreur_residuelle = self.et[ik+1] + self.variance_erreur_residuelle =self.variance_f[ik+1] + + def __str__(self): + ''' + + ''' + + s = 'Model Long Terme\n' + s += '\tOrdre\t\t%d\n'%self.ordre + s += '\tLongueur\t%d\n'%self.length + s += '\tet\t\t[' + for e in self.et : + s += '%f '%e + s += ']\n' + s += '\tft\t\t[' + for e in self.ft : + s += '%f '%e + s += ']\n' + s += '\tft-1\t\t[' + for e in self.ftm1 : + s += '%f '%e + s += ']\n' + s += '\tVarb\t\t[' + for e in self.variance_b : + s += '%f '%e + s += ']\n' + s += '\tVarf\t\t[' + for e in self.variance_f : + s += '%f '%e + s += ']\n' + s += '\tErreur\t\t%f\n'%self.erreur_residuelle + s += '\tVar(err)\t%f\n'%self.variance_erreur_residuelle + return s + + + +class ModelCourtTrerm(object): + ''' + ''' + + def __init__(self,ordre,buff): + ''' + + Constructor + + ''' + + self.N = len(buff) + self.ordre = ordre + self.erreur_residuelle = 0 + self.variance_erreur_residuelle = 0 + self.coef_autocorr = [0]*(self.ordre+2) + self.AI = [0]*(self.ordre+2) + self.dernier_echantillon = 0 + self.buff = buff + for tau in xrange(self.ordre+1) : + for i in xrange(self.N-tau): + self.coef_autocorr[tau] = self.coef_autocorr[tau]+buff[i]*buff[i+tau-1] + self.estimModel() + + def estimModel(self): + + coef_reflexion = [0]*self.ordre + + if self.coef_autocorr[0] <= 0 : + self.coef_autocorr[0] = 1.0 + + coef_reflexion[0] = -self.coef_autocorr[1]/self.coef_autocorr[0] + self.AI[0] = 1 + self.AI[1] = coef_reflexion[0] + self.variance_erreur_residuelle = self.coef_autocorr[0]+self.coef_autocorr[1]*coef_reflexion[0] + + if self.ordre > 1 : + i_ordre = 1 + while i_ordre 0 : + + if self.variance_erreur_residuelle > 0 : + S = 0 + for i in xrange(i_ordre) : + S = S+self.AI[i]*self.coef_autocorr[i_ordre-i+1] + + # coef reflexion + coef_reflexion[i_ordre] = -S/self.variance_erreur_residuelle + + MH = i_ordre/2+1 + for i in xrange(1,MH) : + + IB = i_ordre-i+2 + tmp = self.AI[i]+coef_reflexion[i_ordre]*self.AI[IB] + self.AI[IB] = self.AI[IB]+coef_reflexion[i_ordre]*self.AI[i] + self.AI[i] = tmp + self.AI[i_ordre+1] = coef_reflexion[i_ordre] + self.variance_erreur_residuelle = self.variance_erreur_residuelle+coef_reflexion[i_ordre]*S + + i_ordre+=1 + + if self.variance_erreur_residuelle > 0 : + self.variance_erreur_residuelle = self.variance_erreur_residuelle/float(self.N-1) + self.erreur_residuelle = 0 + for i in range(self.ordre+1) : + self.erreur_residuelle = self.erreur_residuelle +self.AI[i]*self.buff[self.N-i-1] + + def miseAJour(self,echantillon): + self.dernier_echantillon = self.buff.popleft() + self.buff.append(echantillon) + for tau in xrange(1,self.ordre+1): + self.coef_autocorr[tau] = self.coef_autocorr[tau]-self.dernier_echantillon*self.buff[tau-1]+self.buff[self.N-tau-1]*self.buff[self.N-1] + self.coef_autocorr[0] = self.coef_autocorr[0] -self.dernier_echantillon**2+self.buff[self.N-1]**2 + self.estimModel() + + def __str__(self): + ''' + ''' + s = 'Model Court Terme\n' + s += '\tOrdre\t%d\n'%self.ordre + s += '\tAI\t[' + for e in self.AI : + s += '%f '%e + s += ']\n' + s += '\tErreur\t%d\n'%self.erreur_residuelle + s += '\tVar(err)\t%d\n'%self.variance_erreur_residuelle + s += '\tAutocor\t [' + for e in self.coef_autocorr : + s += '%f '%e + s += ']\n' + return s + + +def calculDistance(modeleLong,modeleCourt): + ''' + Calcul de la distance entre les modèles longs et court terme + + args : + - modeleLong (ModelLongTerme) : Modèle appris sur tous les echantillons depuis la dernière rupture + - modeleCourt (ModelCourtTrerm): Modèle appris sur les Lmin derniers echantillons + ''' + + if modeleCourt.variance_erreur_residuelle == 0 : + # epsilon pour le type de donnés correspondant à modeleLong.variance_erreur_residuelle + numerateur = spacing(modeleCourt.variance_erreur_residuelle) + else : + numerateur = modeleCourt.variance_erreur_residuelle + + QV= numerateur/modeleLong.variance_erreur_residuelle + return (2*modeleCourt.erreur_residuelle*modeleLong.erreur_residuelle/modeleLong.variance_erreur_residuelle-(1.0+QV)*modeleLong.erreur_residuelle**2/modeleLong.variance_erreur_residuelle+QV-1.0)/(2.0*QV) + + +def segment(data,fe,ordre=2,Lmin=0.02,lamb=40.0,biais=-0.2,with_backward=True,seuil_vois=None,withTrace = False): + ''' + Fonction principale de segmentation. + + args : + - data (list of float): echantillons du signal + - fe (float) : fréquence d'échantillonage + - ordre (int) : ordre des modèles. Par défaut = 2 + - Lmin (float) : longeur minimale d'un segment et taille du buffer pour l'apprentissage du model court terme. Par défaut = 0.02 + - lamb (float) : valeur de lambda pour la détection de chute de Wn. Par défaut = 40.0 + - biais (float) : valeur du bias à appliquer (en négatif). Par défaut = -0.2 + - with_backward (Bool) : Interrupteur du calcul ou non en backward. Par défaut = True + - seuil_vois (float) : Si fixé, défini les valeurs de lambda et du biais en fonction du voisement ou non du buffer initial du model court terme. + (voisement_yin > seuil_vois ==> Non voisé). Par défaut = None + - withTrace (Bool) : Enregistre ou non la trace de tous les calculs pour un affichage à postériori. Par défaut = False + + ''' + # Initialisation + frontieres = [] + t = 0 + rupt_last = t + long_signal = len(data) + # taille minimum en echantillons + Lmin = int(Lmin*fe) + + while t < long_signal-1 : + # Nouvelle Rupture + + # Critere d'arret : decouverte d'une rupture + rupture = False + + # Cumulateur de vraissemblance + Wn = 0. + + # Valeur et emplacement de la valeur max + maxi = (0,-1) + + audio_buffer = deque([],Lmin) + + # Initialisation du modèle long terme + echantillon= data[t] + longTerme = ModelLongTerm(ordre,echantillon) + + while (not rupture) and t < long_signal-1 : + + t+=1 + + # Mise à jour du long terme + echantillon = data[t] + longTerme.miseAJour(echantillon) + + # Si l'ecart avec la dernière rupture est suffisant + # pour utiliser le modèle court terme + if t-rupt_last >= Lmin : + + # Initialisation du modèle court terme + if t-rupt_last == Lmin : + courtTerme = ModelCourtTrerm(ordre, audio_buffer) + + # Mise à jour du modèle court terme + if t-rupt_last > Lmin : + courtTerme.miseAJour(echantillon) + + # mise à jour du critère + Wn = Wn+calculDistance(longTerme,courtTerme)-biais + + # Recherche de nouveau maximum + if Wn > maxi[0] : + maxi = (Wn,t) + + if withTrace : + dynaWn += [Wn] + tWn += [t] + + # Recherche de rupture par chute superieure à lambda + if (maxi[0] - Wn) > lamb : + rupture = True + else : + # Sinon, on prepare l'initialisation + audio_buffer.append(echantillon) + + # Positionnement de la rupture au dernier point maximum + t_rupt = maxi[1] + + + # Si une rupture à été detecté avec un modèle stable (Wn à croit) + if t_rupt > -1 : + + m = 'forward' + if with_backward : + + bdata = data[t_rupt:rupt_last:-1] + + if len(bdata) > 0 : + + front = segment(bdata,fe,ordre,float(Lmin)/fe,lamb,biais,with_backward=False,seuil_vois=seuil_vois,withTrace=withTrace) + t_bs = [ t_rupt-tr for tr,_ in front] + + if len(t_bs) > 0 : + t_rupt = t_bs[-1] + m ='backward' + + # Sinon on crée un segment de longueur minimale + else : + t_rupt = rupt_last+Lmin + m = 'instable' + + if withTrace : + evnt['selected'] = t_rupt + evnt['comment'] = m + trace+=[evnt] + + # Mise à jour des frontières + t = t_rupt + rupt_last = t_rupt + + if rupture : + frontieres.append((t_rupt,m)) + + return frontieres + + + +class IRITDiverg(Analyzer): + implements(IAnalyzer) + ''' + ''' + def __init__(self, blocksize=1024, stepsize=None) : + super(IRITDiverg, self).__init__(); + self.parents.append(Waveform()) + self.ordre = 2 + + + @interfacedoc + def setup(self, channels=None, samplerate=None,blocksize=None, totalframes=None): + super(IRITDiverg, self).setup(channels, + samplerate, + blocksize, + totalframes) + self.parents.append(Waveform()) + @staticmethod + @interfacedoc + def id(): + return "irit_diverg" + + @staticmethod + @interfacedoc + def name(): + return "IRIT Forward/Backward Divergence Segmentation" + + @staticmethod + @interfacedoc + def unit(): + return "" + + def __str__(self): + return "Stationnary Segments" + + def process(self, frames, eod=False): + ''' + + ''' + return frames, eod + + def post_process(self): + data = self.process_pipe.results['waveform_analyzer'].data + frontieres = segment(data,self.samplerate(),self.ordre) + f = open('front.lab','w') + for t,m in frontieres : + f.write('%f\t%s\n'%(float(t)/self.samplerate(),m)); + f.close() + print frontieres diff --git a/timeside/analyzer/irit_monopoly.py b/timeside/analyzer/irit_monopoly.py new file mode 100644 index 0000000..6ad8dce --- /dev/null +++ b/timeside/analyzer/irit_monopoly.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2013 Maxime Le Coz + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: Maxime Le Coz + +from timeside.analyzer.utils import segmentFromValues +from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter +from timeside.analyzer.core import Analyzer +from timeside.api import IAnalyzer +from aubio import pitch +import numpy +class IRITMonopoly(Analyzer): + implements(IAnalyzer) + ''' + Segmentor MOnophony/Polyphony based on the analalysis of yin confidence. + + Properties: + ''' + + @interfacedoc + def setup(self, channels=None, samplerate=None, + blocksize=None, totalframes=None): + super(IRITMonopoly, self).setup(channels, + samplerate, + blocksize, + totalframes) + self.aubio_pitch = pitch("default", self.input_blocksize, self.input_stepsize, + samplerate) + self.aubio_pitch.set_unit("freq") + self.block_read = 0 + self.pitches = [] + self.pitch_confidences = [] + + @staticmethod + @interfacedoc + def id(): + return "irit_monopoly" + + @staticmethod + @interfacedoc + def name(): + return "IRIT Monophony / Polyphony classification" + + @staticmethod + @interfacedoc + def unit(): + return "" + + def __str__(self): + return "Labeled Monophonic/Polyphonic segments" + + def process(self, frames, eod=False): + ''' + + ''' + + self.decisionLen = 1.0;# in seconds + pf = self.aubio_pitch(frames.T[0]) + self.pitches += [pf[0]] + self.pitch_confidences += [self.aubio_pitch.get_confidence()] + self.block_read += 1 + return frames, eod + + def post_process(self): + ''' + + ''' + frameLenModulation = int(self.decisionLen * self.samplerate() / self.blocksize()) + epsilon = numpy.spacing(self.pitch_confidences[0]) + + w = int(self.decisionLen * self.samplerate() /(self.blocksize()*2)) + is_mono = [] + for i in range(w,len(self.pitch_confidences)-w,frameLenModulation): + d = self.pitch_confidences[i-w:i+w] + conf_mean= numpy.mean(d) + conf_var = numpy.var(d+epsilon) + if self.monoLikelihood(conf_mean,conf_var) > self.polyLikelihood(conf_mean,conf_var) : + is_mono += [True] + else : + is_mono += [False] + + conf = self.new_result(data_mode='value', time_mode='framewise') + conf = self.new_result(data_mode='value', time_mode='framewise') + conf.id_metadata.id += '.' + 'yin_confidence' + conf.id_metadata.name += ' ' + 'Yin Confidence' + conf.data_object.value = self.pitch_confidences + + self.process_pipe.results.add(conf) + + convert = {False: 0, True: 1} + label = {0: 'Poly', 1: 'Mono'} + segList = segmentFromValues(is_mono) + segs = self.new_result(data_mode='label', time_mode='segment') + segs.id_metadata.id += '.' + 'segments' + segs.id_metadata.name += ' ' + 'Segments' + + segs.label_metadata.label = label + + + segs.data_object.label = [convert[s[2]] for s in segList] + segs.data_object.time = [(float(s[0]) * self.blocksize() / + self.samplerate()) + for s in segList] + segs.data_object.duration = [(float(s[1]-s[0]) * self.blocksize() / + self.samplerate()) + for s in segList] + self.process_pipe.results.add(segs) + + def monoLikelihood(self,m,v): + + theta1=0.1007; + theta2=0.0029; + beta1=0.5955; + beta2=0.2821; + delta=0.848; + return self.weibullLikelihood(m,v,theta1,theta2,beta1,beta2,delta) + + + def polyLikelihood(self,m,v): + theta1=0.3224; + theta2=0.0121; + beta1=1.889; + beta2=0.8705; + delta=0.644; + return self.weibullLikelihood(m,v,theta1,theta2,beta1,beta2,delta) + + def weibullLikelihood(self,m,v,theta1,theta2,beta1,beta2,delta): + m = numpy.array(m) + v= numpy.array(v) + + c0=numpy.log(beta1*beta2/(theta1*theta2)); + a1=m/theta1; + b1=a1**(beta1/delta); + c1=numpy.log(a1); + a2=v/theta2; + b2=a2**(beta2/delta); + c2=numpy.log(a2); + somme1=(b1+b2)**delta; + Pxy=c0+(beta1/delta-1)*c1+(beta2/delta-1)*c2+(delta-2)*numpy.log(b1+b2)+numpy.log(somme1+1/delta-1)-somme1; + + return numpy.mean(Pxy)