--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
+
+# This file is part of TimeSide.
+
+# TimeSide is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+
+# TimeSide is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with TimeSide. If not, see <http://www.gnu.org/licenses/>.
+
+# Author: Maxime Le Coz <lecoz@irit.fr>
+
+from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter
+from timeside.analyzer.core import *
+from timeside.api import IValueAnalyzer
+
+class IRITSpeech(Processor):
+ implements(IValueAnalyzer)
+
+ @interfacedoc
+ def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
+ super(AubioPitch, self).setup(channels, samplerate, blocksize, totalframes)
+ self.win_s = 2048
+ self.hop_s = self.win_s / 2
+ self.p = pitch("default", self.win_s, self.hop_s, samplerate)
+ self.p.set_unit("freq")
+ self.block_read = 0
+ self.pitches = []
+
+ @staticmethod
+ @interfacedoc
+ def id():
+ return "aubio_pitch_analyzer"
+
+ @staticmethod
+ @interfacedoc
+ def name():
+ return "f0 (aubio)"
+
+ @staticmethod
+ @interfacedoc
+ def unit():
+ return ""
+
+ def __str__(self):
+ return "pitch values"
+
+ def process(self, frames, eod=False):
+ for samples in downsample_blocking(frames, self.hop_s):
+ #time = self.block_read * self.hop_s * 1. / self.samplerate()
+ self.pitches += [self.p(samples)[0]]
+ self.block_read += 1
+ return frames, eod
+
+ def results(self):
+
+ #container = AnalyzerResultContainer()
+
+ self.pitches = numpy.array(self.pitches)
+
+ pitch = AnalyzerResult(id = "aubio_pitch", name = "f0 (aubio)", unit = "Hz")
+ pitch.value = self.pitches
+ #container.add_result(pitch)
+
+ pitch_mean = AnalyzerResult(id = "aubio_pitch_mean", name = "f0 mean (aubio)", unit = "Hz")
+ pitch_mean.value = numpy.mean(self.pitches)
+ #container.add_result(pitch_mean)
+
+ pitch_median = AnalyzerResult(id = "aubio_pitch_median", name = "f0 median (aubio)", unit = "Hz")
+ pitch_median.value = numpy.median(self.pitches)
+ #container.add_result(pitch_median)
+
+ #return container
+ return AnalyzerResultContainer([pitch, pitch_mean, pitch_median])
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
+
+# This file is part of TimeSide.
+
+# TimeSide is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+
+# TimeSide is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with TimeSide. If not, see <http://www.gnu.org/licenses/>.
+
+# Author: Maxime Le Coz <lecoz@irit.fr>
+
+from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter
+from timeside.analyzer.core import *
+from timeside.api import IValueAnalyzer
+from numpy import array,hamming,dot,mean
+from numpy.fft import rfft
+from scipy.ndimage.morphology import binary_opening
+from scipy.signal import firwin,lfilter
+from scipy.io.wavfile import write as wavwrite
+from matplotlib import pylab
+
+class IRITSpeech4Hz(Processor):
+ implements(IValueAnalyzer)
+
+ @interfacedoc
+ def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
+ super(IRITSpeech4Hz, self).setup(channels, samplerate, blocksize, totalframes)
+ self.energy4hz = []
+ self.threshold = 2.0
+ self.smoothLen = 5
+ self.fCenter = 4.0
+ self.normalizeEnergy = True
+ self.nFFT=2048
+ self.orderFilter=100
+ self.nbFilters =30
+ self.modulLen = 2
+ self.fwidth = 0.5
+ self.melFilter = melFilterBank(self.nbFilters,self.nFFT,samplerate);
+ @staticmethod
+ @interfacedoc
+ def id():
+ return "irit_speech_4hz"
+
+ @staticmethod
+ @interfacedoc
+ def name():
+ return "Speech entropy (IRIT)"
+
+ @staticmethod
+ @interfacedoc
+ def unit():
+ return ""
+
+ def __str__(self):
+ return "Speech confidences indexes"
+
+ def process(self, frames, eod=False):
+ '''
+
+ '''
+
+ frames = frames.T[0]
+ w = frames * hamming(len(frames));
+ f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT])
+ e = dot(f**2,self.melFilter)
+ self.energy4hz.append(e)
+ return frames, eod
+
+ def results(self):
+ '''
+
+ '''
+ #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16))
+
+ Wo = self.fCenter/self.samplerate() ;
+ Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()];
+ num = firwin(self.orderFilter, Wn,pass_zero=False);
+ self.energy4hz=numpy.array(self.energy4hz)
+ energy = lfilter(num,1,self.energy4hz.T,0)
+ energy = sum(energy)
+
+ if self.normalizeEnergy :
+ energy =energy/mean(energy)
+
+
+ w= int(float(self.modulLen)*self.samplerate()/self.blocksize())
+ modEnergyValue =computeModulation(energy,w,True)
+
+ conf = array(modEnergyValue-self.threshold)/self.threshold
+ conf[conf>1] = 1
+
+ modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
+ modEnergy.value = conf
+ convert = {False:'NonSpeech',True:'Speech'}
+
+ segList = segmentFromValues(modEnergyValue>self.threshold)
+ segmentsEntropy =[]
+ for s in segList :
+ segmentsEntropy.append((s[0],s[1],convert[s[2]]))
+ segs = AnalyzerResult(id = "irit_4hzenergy_segments", name = "seg 4Hz (IRIT)", unit = "s")
+ segs.value = segmentsEntropy
+ return AnalyzerResultContainer([modEnergy,segs])
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
+
+# This file is part of TimeSide.
+
+# TimeSide is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+
+# TimeSide is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with TimeSide. If not, see <http://www.gnu.org/licenses/>.
+
+# Author: Maxime Le Coz <lecoz@irit.fr>
+
+from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter
+from timeside.analyzer.core import *
+from timeside.api import IValueAnalyzer
+from numpy import array
+from scipy.ndimage.morphology import binary_opening
+from matplotlib import pylab
+
+class IRITSpeechEntropy(Processor):
+ implements(IValueAnalyzer)
+
+ @interfacedoc
+ def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
+ super(IRITSpeechEntropy, self).setup(channels, samplerate, blocksize, totalframes)
+ self.entropyValue = []
+ self.threshold = 0.4
+ self.smoothLen = 5
+ self.modulLen = 2
+
+ @staticmethod
+ @interfacedoc
+ def id():
+ return "irit_speech_entropy"
+
+ @staticmethod
+ @interfacedoc
+ def name():
+ return "Speech entropy (IRIT)"
+
+ @staticmethod
+ @interfacedoc
+ def unit():
+ return ""
+
+ def __str__(self):
+ return "Speech confidences indexes"
+
+ def process(self, frames, eod=False):
+ self.entropyValue.append(entropy(frames))
+ return frames, eod
+
+ def results(self):
+
+ entropyValue = numpy.array(self.entropyValue)
+ w = self.modulLen*self.samplerate()/self.blocksize()
+ modulentropy = computeModulation(entropyValue,w,False)
+ confEntropy= array(modulentropy-self.threshold)/self.threshold
+ confEntropy[confEntropy>1] = 1
+
+ conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?")
+ conf.value = confEntropy
+
+ binaryEntropy = modulentropy > self.threshold
+ binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2))
+
+ convert = {False:'NonSpeech',True:'Speech'}
+ segList = segmentFromValues(binaryEntropy)
+ segmentsEntropy =[]
+ for s in segList :
+ segmentsEntropy.append((s[0],s[1],convert[s[2]]))
+
+ segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s")
+ segs.value = segmentsEntropy
+
+
+ return AnalyzerResultContainer([conf, segs])
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
+
+# This file is part of TimeSide.
+
+# TimeSide is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+
+# TimeSide is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with TimeSide. If not, see <http://www.gnu.org/licenses/>.
+
+# Author: Maxime Le Coz <lecoz@irit.fr>
+
+from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter
+from timeside.analyzer.core import *
+from timeside.api import IValueAnalyzer
+from numpy import histogram,log,sqrt,min,max,sum,exp,array,zeros,var
+from scipy.ndimage.morphology import binary_opening
+from matplotlib import pylab
+
+class IRITSpeechEntropy(Processor):
+ implements(IValueAnalyzer)
+
+ @interfacedoc
+ def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
+ super(IRITSpeechEntropy, self).setup(channels, samplerate, blocksize, totalframes)
+ self.entropyValue = []
+ self.threshold = 0.4
+ self.smoothLen = 5
+ self.w_hop = 1024
+ self.modulLen = 1
+ @staticmethod
+ @interfacedoc
+ def id():
+ return "irit_speech_entropy"
+
+ @staticmethod
+ @interfacedoc
+ def name():
+ return "Speech entropy (IRIT)"
+
+ @staticmethod
+ @interfacedoc
+ def unit():
+ return ""
+
+ def __str__(self):
+ return "Speech confidences indexes"
+
+ def process(self, frames, eod=False):
+
+ for samples in downsample_blocking(frames, self.w_hop):
+ self.entropyValue.append(entropy(samples))
+ return frames, eod
+
+ def results(self):
+
+ entropyValue = numpy.array(self.entropyValue)
+ w = self.modulLen*self.samplerate()/self.w_hop
+ modulentropy = computeModulation(entr,w,withLog=False)
+ confEntropy= array(modulentropy-self.threshold)/self.threshold
+ confEntropy[confEntropy>1] = 1
+
+ conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?")
+ conf.value = confEntropy
+
+ binaryEntropy = modulentropy > self.threshold
+ binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2))
+
+ convert = {False:'NonSpeech',True:'Speech'}
+ segList = segmentFromValues(binaryEntropy)
+ segmentsEntropy =[]
+ for s in segList :
+ segmentsEntropy.append((s[0]*self.w_hop/float(self.samplerate()),s[1]*self.w_hop/float(self.samplerate()),convert[s[2]]))
+
+ segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s")
+ segs.value = segmentsEntropy
+
+
+ return AnalyzerResultContainer([conf, segs])