From 2a782f6bff2ec2df06ff9c535c265b349b4ac083 Mon Sep 17 00:00:00 2001 From: Maxime LE COZ Date: Mon, 17 Jun 2013 16:12:25 +0200 Subject: [PATCH] Irit analysers --- timeside/analyzer/irit_speech.py~ | 83 ++++++++++++++++ timeside/analyzer/irit_speech_4hz.py | 112 ++++++++++++++++++++++ timeside/analyzer/irit_speech_4hz.pyc | Bin 0 -> 4014 bytes timeside/analyzer/irit_speech_entropy.py | 86 +++++++++++++++++ timeside/analyzer/irit_speech_entropy.pyc | Bin 0 -> 3117 bytes timeside/analyzer/irit_speech_entropy.py~ | 88 +++++++++++++++++ 6 files changed, 369 insertions(+) create mode 100644 timeside/analyzer/irit_speech.py~ create mode 100644 timeside/analyzer/irit_speech_4hz.py create mode 100644 timeside/analyzer/irit_speech_4hz.pyc create mode 100644 timeside/analyzer/irit_speech_entropy.py create mode 100644 timeside/analyzer/irit_speech_entropy.pyc create mode 100644 timeside/analyzer/irit_speech_entropy.py~ diff --git a/timeside/analyzer/irit_speech.py~ b/timeside/analyzer/irit_speech.py~ new file mode 100644 index 0000000..8f0edc4 --- /dev/null +++ b/timeside/analyzer/irit_speech.py~ @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2013 Maxime Le Coz + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: Maxime Le Coz + +from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter +from timeside.analyzer.core import * +from timeside.api import IValueAnalyzer + +class IRITSpeech(Processor): + implements(IValueAnalyzer) + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): + super(AubioPitch, self).setup(channels, samplerate, blocksize, totalframes) + self.win_s = 2048 + self.hop_s = self.win_s / 2 + self.p = pitch("default", self.win_s, self.hop_s, samplerate) + self.p.set_unit("freq") + self.block_read = 0 + self.pitches = [] + + @staticmethod + @interfacedoc + def id(): + return "aubio_pitch_analyzer" + + @staticmethod + @interfacedoc + def name(): + return "f0 (aubio)" + + @staticmethod + @interfacedoc + def unit(): + return "" + + def __str__(self): + return "pitch values" + + def process(self, frames, eod=False): + for samples in downsample_blocking(frames, self.hop_s): + #time = self.block_read * self.hop_s * 1. / self.samplerate() + self.pitches += [self.p(samples)[0]] + self.block_read += 1 + return frames, eod + + def results(self): + + #container = AnalyzerResultContainer() + + self.pitches = numpy.array(self.pitches) + + pitch = AnalyzerResult(id = "aubio_pitch", name = "f0 (aubio)", unit = "Hz") + pitch.value = self.pitches + #container.add_result(pitch) + + pitch_mean = AnalyzerResult(id = "aubio_pitch_mean", name = "f0 mean (aubio)", unit = "Hz") + pitch_mean.value = numpy.mean(self.pitches) + #container.add_result(pitch_mean) + + pitch_median = AnalyzerResult(id = "aubio_pitch_median", name = "f0 median (aubio)", unit = "Hz") + pitch_median.value = numpy.median(self.pitches) + #container.add_result(pitch_median) + + #return container + return AnalyzerResultContainer([pitch, pitch_mean, pitch_median]) diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py new file mode 100644 index 0000000..3298be3 --- /dev/null +++ b/timeside/analyzer/irit_speech_4hz.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2013 Maxime Le Coz + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: Maxime Le Coz + +from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter +from timeside.analyzer.core import * +from timeside.api import IValueAnalyzer +from numpy import array,hamming,dot,mean +from numpy.fft import rfft +from scipy.ndimage.morphology import binary_opening +from scipy.signal import firwin,lfilter +from scipy.io.wavfile import write as wavwrite +from matplotlib import pylab + +class IRITSpeech4Hz(Processor): + implements(IValueAnalyzer) + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): + super(IRITSpeech4Hz, self).setup(channels, samplerate, blocksize, totalframes) + self.energy4hz = [] + self.threshold = 2.0 + self.smoothLen = 5 + self.fCenter = 4.0 + self.normalizeEnergy = True + self.nFFT=2048 + self.orderFilter=100 + self.nbFilters =30 + self.modulLen = 2 + self.fwidth = 0.5 + self.melFilter = melFilterBank(self.nbFilters,self.nFFT,samplerate); + @staticmethod + @interfacedoc + def id(): + return "irit_speech_4hz" + + @staticmethod + @interfacedoc + def name(): + return "Speech entropy (IRIT)" + + @staticmethod + @interfacedoc + def unit(): + return "" + + def __str__(self): + return "Speech confidences indexes" + + def process(self, frames, eod=False): + ''' + + ''' + + frames = frames.T[0] + w = frames * hamming(len(frames)); + f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT]) + e = dot(f**2,self.melFilter) + self.energy4hz.append(e) + return frames, eod + + def results(self): + ''' + + ''' + #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16)) + + Wo = self.fCenter/self.samplerate() ; + Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()]; + num = firwin(self.orderFilter, Wn,pass_zero=False); + self.energy4hz=numpy.array(self.energy4hz) + energy = lfilter(num,1,self.energy4hz.T,0) + energy = sum(energy) + + if self.normalizeEnergy : + energy =energy/mean(energy) + + + w= int(float(self.modulLen)*self.samplerate()/self.blocksize()) + modEnergyValue =computeModulation(energy,w,True) + + conf = array(modEnergyValue-self.threshold)/self.threshold + conf[conf>1] = 1 + + modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?") + modEnergy.value = conf + convert = {False:'NonSpeech',True:'Speech'} + + segList = segmentFromValues(modEnergyValue>self.threshold) + segmentsEntropy =[] + for s in segList : + segmentsEntropy.append((s[0],s[1],convert[s[2]])) + segs = AnalyzerResult(id = "irit_4hzenergy_segments", name = "seg 4Hz (IRIT)", unit = "s") + segs.value = segmentsEntropy + return AnalyzerResultContainer([modEnergy,segs]) diff --git a/timeside/analyzer/irit_speech_4hz.pyc b/timeside/analyzer/irit_speech_4hz.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eed34b4836e69c995224ad9bc33f2978102275d9 GIT binary patch literal 4014 zcmbVP&2AgX5w7M?lt_`XtRGvlH(9MWNNB^5lfdyM&aQWz71qGUn~a1QArKHFPE#6r zewdl2Wf|hhKIM{YULp^Wm&h&myu&^~@_p5$s4NUbvBW7>SM^j^*FS^rI<5Qv{M+va zGW`qi{RB_-4}cgSBdJJcq-P|nNv|eZAiY4cy7cN|xT7ZNf*cSFByC8xD7{6`sY|*f zSyOsV$y(BDNwzG#Wy#voYx^Awl6E9pk=}}5Hzd6#*{bwb{d!T->yoWWZ%wjw>8*R} zlB64wZA$NkUpFPaDcP3vwy+;`^g*oEFX>C7jZPghOPm}5#00W z3eac2kJ7RIB9GFuH`t=*Er1@CWpw5ifgML#mgGl{29FEpxFxeu?qzUQ4u@_o@i57w z@~mHsY>wBuHEeJVfMHUeLQ#rLhe>*&5KlWT6KA~$E=Ol+bm(Y>XW_Gh{gJhU~Gk_Jk+}6Yf^45s4b%_@*eMH{X$>=r2E(*Cu?QN8}G$Yn75t}n&aEaKG zxXvZw!nP$|kav*Tk+`A66$$sdOHgOP3(TIURPwI?PAz{9H`nB(Cg(=Z0RuUQYKYDw z5%Br}bt3{L!3MIxSS8XBSdPdN)gaQO4n&|>Ly=|PniQzFq)12H3Xzj#g-xFN)(gDG zq}lH>ww~j7{di z&K@4W(Um(cZFO9vv97ADDBSTk2;oJD7(NAJOO36Aa%|l-5c8tUq7*^=mC8|OzPESa zmccB_*p_=5Mk+5q^oz>T30V=3QwrC>J56GDtl^Z|)NlJT%3tbx7nClqqGwb#9jcVU zag^satu*DTh&fkA&gy%IX)$f3o)Xg`Vl22pe3<=5KU9;L4sKC zR~plO*k*Th7Or9Ar+BI@0I7X!n(z6)h<;|U238#i0%l)x?77>Du z{uy9JRD%DdLdDwS({%^NFN@LHPM0ZscYgdFySgI0Xb$5(!uJ(bLBdVlFh5YS|L@ds zp15CLI+eDcJ@4^BbqoBd0R}}rgyVCR?VTi#?dw@=@cnQHV6NHKm14H^`;{yE{U<=Z zHOwFY&RFN8dLijqWll6gwkIenhP59)1%hgb&&DJWRCUBW3K8m!uQVqMvhmg!Ux={y z1Un~nmT=T6Y*}YHS!5lXX!$u=k`vUG1xhr&t_^F5N&2j!I<00Cp9Gk1KDpMBF58$k zql1ul()N-fXn2$8unNf!w+WUAJ|UNfo8J7S!pcTAiFwWzE%qMz%_G>i zMyrqu58*{DVda{k+Zyx%SxdA#5cCDqB|6f-0Ye7UXK`X<)CFyUo1z}o(86IIy!qJZ= zO>wv8CqNGMYq3pDmgV(BId93Y|9D>}Z8=|-liPBR-dLAyeg*h~$$%Nz-W~?3`Jet` zUQ9JXZ-*u;s>wCg1lEEfJJiH|M3sQUQ8;p6+u`MSXT{V`F^a0HkJ+k77%Q3w;g88= zaBICr7x1mJKd%|#Pr$9XMSEG~k|IYCkg??q{470bsNI+$i6(=+7M7T)K+ z0H7LYA)8vrIYTKJ0rkCMT0}1Vh`dk9yACiYGK{PCH|*Yeqd1iNCaIjZ?wMP@@{KcmOwn3J z?a_x!6?y)3W5-5q(=<%S z+{2EhdEQmyvjq41Y{C7$PDA~^h9#CE12E+96l%y*V??6Wp+i30P`}f*p*wOPM|vwpi@)>R|?ElCopUE+0zW#?dM&MVMgg}G}|2% zr42z4e-~aOWbitsz{RfF0H@paVsq9g@fP@uuWBx^ANbCtkx!tRvWwqJ+S^cMpz0tQ zo$cmvl0`>$H!I2!2IS)CEaZsmji)=R + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: Maxime Le Coz + +from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter +from timeside.analyzer.core import * +from timeside.api import IValueAnalyzer +from numpy import array +from scipy.ndimage.morphology import binary_opening +from matplotlib import pylab + +class IRITSpeechEntropy(Processor): + implements(IValueAnalyzer) + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): + super(IRITSpeechEntropy, self).setup(channels, samplerate, blocksize, totalframes) + self.entropyValue = [] + self.threshold = 0.4 + self.smoothLen = 5 + self.modulLen = 2 + + @staticmethod + @interfacedoc + def id(): + return "irit_speech_entropy" + + @staticmethod + @interfacedoc + def name(): + return "Speech entropy (IRIT)" + + @staticmethod + @interfacedoc + def unit(): + return "" + + def __str__(self): + return "Speech confidences indexes" + + def process(self, frames, eod=False): + self.entropyValue.append(entropy(frames)) + return frames, eod + + def results(self): + + entropyValue = numpy.array(self.entropyValue) + w = self.modulLen*self.samplerate()/self.blocksize() + modulentropy = computeModulation(entropyValue,w,False) + confEntropy= array(modulentropy-self.threshold)/self.threshold + confEntropy[confEntropy>1] = 1 + + conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?") + conf.value = confEntropy + + binaryEntropy = modulentropy > self.threshold + binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2)) + + convert = {False:'NonSpeech',True:'Speech'} + segList = segmentFromValues(binaryEntropy) + segmentsEntropy =[] + for s in segList : + segmentsEntropy.append((s[0],s[1],convert[s[2]])) + + segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s") + segs.value = segmentsEntropy + + + return AnalyzerResultContainer([conf, segs]) diff --git a/timeside/analyzer/irit_speech_entropy.pyc b/timeside/analyzer/irit_speech_entropy.pyc new file mode 100644 index 0000000000000000000000000000000000000000..387de1a3687b0277e9cd8c260f6a5b3fb5fe8fa0 GIT binary patch literal 3117 zcmbtW>uwuG6h5=-yW=#3v^1@%Mo1u2K`J3s1gcs}X;7sS!MXxr39UA}V|&xRu`_P# z%Knv#AG`s)1CPNQ@Q3H%0m^sI+I5R6L5l4ipP4x`b1vU}XWhSgo!|fX?VFI69~b|h z;W0m-tdU|3hmtzoXUV0cL1~kQ&1$AbNsH1p4ck?} zPDzK-E)Ba?zd^|cr9B$<&~K7fG*T3`=qb?&(f7kmirO^jcVJ5V44-}tl5L~;y39ju zOkP@mvyIp1a5Sb&k3hEQiju+U&64J3Pq<6nQ_&!0DAJ|}k2^qYJLcg?iWrN;@by4p?) zNhxy6L6qzW0PLjHW|Ak7^h}!Pc5)9|>_wVK(*&J9Z)q7bMkgbg3MWC9>BI7@)* zrDJ=2hW2}F6V}q{jA5rUg!cpd|3`BG#APDZhl+Iilc!m1KYZb2&c*6^MNZ5G%r65G z=Gh48&p<$XaTe*vD}?ZRZwt-Zy3-lP_rUj!Eq(uEh*eZn=t5pSV%r`DaIcQ>Q-rMI zMuH=lKdQSqa)2ea=CIG}JSIiUyb}~)tjO{?%XC%>Y_up;fVaF#=Yo{i0KV*BR1Ezo zb~2o5Qs-32HnA~mfOx_q$ST}me;oE*V*=TVcVXL3WeoP*XInHa^33Yi9=xiAk^;3K#6V7InyO(X~aBP!+I%{cF*fXwjy} zAJM!;U;OYsEjl!B)A2UV!P8yZY zHWm~Dt09jiq=Wt$1pF`-fS!YB-$El4vxI@a2DQg?hKu3LH{RRjg}lX0VQt~e!#t~Q zg`lSrW2lhS-Iq=NpqFmUB_Hoipl^v!gat1EW`X#61+mNy+tnW_jP2lI#1G^SMDTRJ5eTD}A9hk2 + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: Maxime Le Coz + +from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter +from timeside.analyzer.core import * +from timeside.api import IValueAnalyzer +from numpy import histogram,log,sqrt,min,max,sum,exp,array,zeros,var +from scipy.ndimage.morphology import binary_opening +from matplotlib import pylab + +class IRITSpeechEntropy(Processor): + implements(IValueAnalyzer) + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): + super(IRITSpeechEntropy, self).setup(channels, samplerate, blocksize, totalframes) + self.entropyValue = [] + self.threshold = 0.4 + self.smoothLen = 5 + self.w_hop = 1024 + self.modulLen = 1 + @staticmethod + @interfacedoc + def id(): + return "irit_speech_entropy" + + @staticmethod + @interfacedoc + def name(): + return "Speech entropy (IRIT)" + + @staticmethod + @interfacedoc + def unit(): + return "" + + def __str__(self): + return "Speech confidences indexes" + + def process(self, frames, eod=False): + + for samples in downsample_blocking(frames, self.w_hop): + self.entropyValue.append(entropy(samples)) + return frames, eod + + def results(self): + + entropyValue = numpy.array(self.entropyValue) + w = self.modulLen*self.samplerate()/self.w_hop + modulentropy = computeModulation(entr,w,withLog=False) + confEntropy= array(modulentropy-self.threshold)/self.threshold + confEntropy[confEntropy>1] = 1 + + conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?") + conf.value = confEntropy + + binaryEntropy = modulentropy > self.threshold + binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2)) + + convert = {False:'NonSpeech',True:'Speech'} + segList = segmentFromValues(binaryEntropy) + segmentsEntropy =[] + for s in segList : + segmentsEntropy.append((s[0]*self.w_hop/float(self.samplerate()),s[1]*self.w_hop/float(self.samplerate()),convert[s[2]])) + + segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s") + segs.value = segmentsEntropy + + + return AnalyzerResultContainer([conf, segs]) -- 2.39.5