Irit analysers

author Maxime LE COZ <lecoz@irit.fr>

Mon, 17 Jun 2013 14:12:25 +0000 (16:12 +0200)

committer Maxime LE COZ <lecoz@irit.fr>

Mon, 17 Jun 2013 14:12:25 +0000 (16:12 +0200)
author Maxime LE COZ <lecoz@irit.fr>
Mon, 17 Jun 2013 14:12:25 +0000 (16:12 +0200)
committer Maxime LE COZ <lecoz@irit.fr>
Mon, 17 Jun 2013 14:12:25 +0000 (16:12 +0200)
diff --git a/timeside/analyzer/irit_speech.py~ b/timeside/analyzer/irit_speech.py~

new file mode 100644 (file)

index 0000000..8f0edc4
--- /dev/null
+++ b/timeside/analyzer/irit_speech.py~
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
+
+# This file is part of TimeSide.
+
+# TimeSide is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+
+# TimeSide is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with TimeSide.  If not, see <http://www.gnu.org/licenses/>.
+
+# Author: Maxime Le Coz <lecoz@irit.fr>
+
+from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter
+from timeside.analyzer.core import *
+from timeside.api import IValueAnalyzer
+
+class IRITSpeech(Processor):
+    implements(IValueAnalyzer)
+
+    @interfacedoc
+    def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
+        super(AubioPitch, self).setup(channels, samplerate, blocksize, totalframes)
+        self.win_s = 2048
+        self.hop_s = self.win_s / 2
+        self.p = pitch("default", self.win_s, self.hop_s, samplerate)
+        self.p.set_unit("freq")
+        self.block_read = 0
+        self.pitches = []
+
+    @staticmethod
+    @interfacedoc
+    def id():
+        return "aubio_pitch_analyzer"
+
+    @staticmethod
+    @interfacedoc
+    def name():
+        return "f0 (aubio)"
+
+    @staticmethod
+    @interfacedoc
+    def unit():
+        return ""
+
+    def __str__(self):
+        return "pitch values"
+
+    def process(self, frames, eod=False):
+        for samples in downsample_blocking(frames, self.hop_s):
+            #time = self.block_read * self.hop_s * 1. / self.samplerate()
+            self.pitches += [self.p(samples)[0]]
+            self.block_read += 1
+        return frames, eod
+
+    def results(self):
+
+        #container = AnalyzerResultContainer()
+
+        self.pitches = numpy.array(self.pitches)
+
+        pitch = AnalyzerResult(id = "aubio_pitch", name = "f0 (aubio)", unit = "Hz")
+        pitch.value = self.pitches
+        #container.add_result(pitch)
+
+        pitch_mean = AnalyzerResult(id = "aubio_pitch_mean", name = "f0 mean (aubio)", unit = "Hz")
+        pitch_mean.value = numpy.mean(self.pitches)
+        #container.add_result(pitch_mean)
+
+        pitch_median = AnalyzerResult(id = "aubio_pitch_median", name = "f0 median (aubio)", unit = "Hz")
+        pitch_median.value = numpy.median(self.pitches)
+        #container.add_result(pitch_median)
+
+        #return container
+        return AnalyzerResultContainer([pitch, pitch_mean, pitch_median])
diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py

new file mode 100644 (file)

index 0000000..3298be3
--- /dev/null
+++ b/timeside/analyzer/irit_speech_4hz.py
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
+
+# This file is part of TimeSide.
+
+# TimeSide is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+
+# TimeSide is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with TimeSide.  If not, see <http://www.gnu.org/licenses/>.
+
+# Author: Maxime Le Coz <lecoz@irit.fr>
+
+from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter
+from timeside.analyzer.core import *
+from timeside.api import IValueAnalyzer
+from numpy import array,hamming,dot,mean
+from numpy.fft import rfft
+from scipy.ndimage.morphology import binary_opening
+from scipy.signal import firwin,lfilter
+from scipy.io.wavfile import write as wavwrite
+from matplotlib import pylab
+
+class IRITSpeech4Hz(Processor):
+    implements(IValueAnalyzer)
+
+    @interfacedoc
+    def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
+        super(IRITSpeech4Hz, self).setup(channels, samplerate, blocksize, totalframes)
+        self.energy4hz = []
+        self.threshold = 2.0
+        self.smoothLen = 5
+        self.fCenter = 4.0
+        self.normalizeEnergy = True
+        self.nFFT=2048
+        self.orderFilter=100
+        self.nbFilters =30
+        self.modulLen = 2
+        self.fwidth = 0.5
+        self.melFilter = melFilterBank(self.nbFilters,self.nFFT,samplerate);
+    @staticmethod
+    @interfacedoc
+    def id():
+        return "irit_speech_4hz"
+
+    @staticmethod
+    @interfacedoc
+    def name():
+        return "Speech entropy (IRIT)"
+
+    @staticmethod
+    @interfacedoc
+    def unit():
+        return ""
+
+    def __str__(self):
+        return "Speech confidences indexes"
+
+    def process(self, frames, eod=False):
+               '''
+               
+               '''
+               
+               frames = frames.T[0]
+               w = frames * hamming(len(frames));
+               f = abs(rfft(w,n=2*self.nFFT)[0:self.nFFT])
+               e = dot(f**2,self.melFilter)
+               self.energy4hz.append(e)
+               return frames, eod
+        
+    def results(self):
+               '''
+               
+               '''             
+               #wavwrite('out.wav',self.fe,(numpy.array(self.data)*2**15).astype(numpy.int16))
+               
+               Wo = self.fCenter/self.samplerate()  ;
+               Wn = [ Wo-(self.fwidth/2)/self.samplerate() , Wo+(self.fwidth/2)/self.samplerate()];
+               num = firwin(self.orderFilter, Wn,pass_zero=False);
+               self.energy4hz=numpy.array(self.energy4hz)
+               energy = lfilter(num,1,self.energy4hz.T,0)
+               energy = sum(energy)
+               
+               if self.normalizeEnergy :
+                       energy =energy/mean(energy)
+                       
+               
+               w= int(float(self.modulLen)*self.samplerate()/self.blocksize())
+               modEnergyValue =computeModulation(energy,w,True)
+                               
+               conf = array(modEnergyValue-self.threshold)/self.threshold
+               conf[conf>1] = 1
+
+               modEnergy = AnalyzerResult(id = "irit_4hzenergy_confidence", name = "modulation energie (IRIT)", unit = "?")
+               modEnergy.value = conf
+               convert = {False:'NonSpeech',True:'Speech'}
+               
+               segList = segmentFromValues(modEnergyValue>self.threshold)
+               segmentsEntropy =[]
+               for s in segList : 
+                       segmentsEntropy.append((s[0],s[1],convert[s[2]]))
+               segs = AnalyzerResult(id = "irit_4hzenergy_segments", name = "seg 4Hz (IRIT)", unit = "s")
+               segs.value = segmentsEntropy
+               return AnalyzerResultContainer([modEnergy,segs])
diff --git a/timeside/analyzer/irit_speech_4hz.pyc b/timeside/analyzer/irit_speech_4hz.pyc

new file mode 100644 (file)

index 0000000..eed34b4

Binary files /dev/null and b/timeside/analyzer/irit_speech_4hz.pyc differ
diff --git a/timeside/analyzer/irit_speech_entropy.py b/timeside/analyzer/irit_speech_entropy.py

new file mode 100644 (file)

index 0000000..7d72921
--- /dev/null
+++ b/timeside/analyzer/irit_speech_entropy.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
+
+# This file is part of TimeSide.
+
+# TimeSide is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+
+# TimeSide is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with TimeSide.  If not, see <http://www.gnu.org/licenses/>.
+
+# Author: Maxime Le Coz <lecoz@irit.fr>
+
+from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter
+from timeside.analyzer.core import *
+from timeside.api import IValueAnalyzer
+from numpy import array
+from scipy.ndimage.morphology import binary_opening
+from matplotlib import pylab
+
+class IRITSpeechEntropy(Processor):
+    implements(IValueAnalyzer)
+
+    @interfacedoc
+    def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
+        super(IRITSpeechEntropy, self).setup(channels, samplerate, blocksize, totalframes)
+        self.entropyValue = []
+        self.threshold = 0.4
+        self.smoothLen = 5
+        self.modulLen = 2
+
+    @staticmethod
+    @interfacedoc
+    def id():
+        return "irit_speech_entropy"
+
+    @staticmethod
+    @interfacedoc
+    def name():
+        return "Speech entropy (IRIT)"
+
+    @staticmethod
+    @interfacedoc
+    def unit():
+        return ""
+
+    def __str__(self):
+        return "Speech confidences indexes"
+
+    def process(self, frames, eod=False):
+               self.entropyValue.append(entropy(frames))
+               return frames, eod        
+      
+    def results(self):
+
+        entropyValue = numpy.array(self.entropyValue)
+        w = self.modulLen*self.samplerate()/self.blocksize()
+        modulentropy = computeModulation(entropyValue,w,False)
+        confEntropy=  array(modulentropy-self.threshold)/self.threshold
+        confEntropy[confEntropy>1] = 1
+        
+        conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?")
+        conf.value = confEntropy
+        
+        binaryEntropy = modulentropy > self.threshold
+        binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2))
+        
+        convert = {False:'NonSpeech',True:'Speech'}
+        segList = segmentFromValues(binaryEntropy)
+        segmentsEntropy =[]
+        for s in segList : 
+            segmentsEntropy.append((s[0],s[1],convert[s[2]])) 
+        
+        segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s")
+        segs.value = segmentsEntropy
+
+      
+        return AnalyzerResultContainer([conf, segs])
diff --git a/timeside/analyzer/irit_speech_entropy.pyc b/timeside/analyzer/irit_speech_entropy.pyc

new file mode 100644 (file)

index 0000000..387de1a

Binary files /dev/null and b/timeside/analyzer/irit_speech_entropy.pyc differ
diff --git a/timeside/analyzer/irit_speech_entropy.py~ b/timeside/analyzer/irit_speech_entropy.py~

new file mode 100644 (file)

index 0000000..a37fadf
--- /dev/null
+++ b/timeside/analyzer/irit_speech_entropy.py~
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
+
+# This file is part of TimeSide.
+
+# TimeSide is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+
+# TimeSide is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with TimeSide.  If not, see <http://www.gnu.org/licenses/>.
+
+# Author: Maxime Le Coz <lecoz@irit.fr>
+
+from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter
+from timeside.analyzer.core import *
+from timeside.api import IValueAnalyzer
+from numpy import histogram,log,sqrt,min,max,sum,exp,array,zeros,var
+from scipy.ndimage.morphology import binary_opening
+from matplotlib import pylab
+
+class IRITSpeechEntropy(Processor):
+    implements(IValueAnalyzer)
+
+    @interfacedoc
+    def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
+        super(IRITSpeechEntropy, self).setup(channels, samplerate, blocksize, totalframes)
+        self.entropyValue = []
+        self.threshold = 0.4
+        self.smoothLen = 5
+        self.w_hop = 1024
+        self.modulLen = 1
+    @staticmethod
+    @interfacedoc
+    def id():
+        return "irit_speech_entropy"
+
+    @staticmethod
+    @interfacedoc
+    def name():
+        return "Speech entropy (IRIT)"
+
+    @staticmethod
+    @interfacedoc
+    def unit():
+        return ""
+
+    def __str__(self):
+        return "Speech confidences indexes"
+
+    def process(self, frames, eod=False):
+               
+        for samples in downsample_blocking(frames, self.w_hop):
+            self.entropyValue.append(entropy(samples))
+        return frames, eod        
+      
+    def results(self):
+
+        entropyValue = numpy.array(self.entropyValue)
+        w = self.modulLen*self.samplerate()/self.w_hop
+        modulentropy = computeModulation(entr,w,withLog=False)
+        confEntropy=  array(modulentropy-self.threshold)/self.threshold
+        confEntropy[confEntropy>1] = 1
+        
+        conf = AnalyzerResult(id = "irit_entropy_confidence", name = "entropy (IRIT)", unit = "?")
+        conf.value = confEntropy
+        
+        binaryEntropy = modulentropy > self.threshold
+        binaryEntropy = binary_opening(binaryEntropy,[1]*(self.smoothLen*2))
+        
+        convert = {False:'NonSpeech',True:'Speech'}
+               segList = segmentFromValues(binaryEntropy)
+        segmentsEntropy =[]
+        for s in segList : 
+            segmentsEntropy.append((s[0]*self.w_hop/float(self.samplerate()),s[1]*self.w_hop/float(self.samplerate()),convert[s[2]])) 
+        
+        segs = AnalyzerResult(id = "irit_entropy_segments", name = "seg entropy (IRIT)", unit = "s")
+        segs.value = segmentsEntropy
+
+      
+        return AnalyzerResultContainer([conf, segs])
author	Maxime LE COZ <lecoz@irit.fr>
	Mon, 17 Jun 2013 14:12:25 +0000 (16:12 +0200)
committer	Maxime LE COZ <lecoz@irit.fr>
	Mon, 17 Jun 2013 14:12:25 +0000 (16:12 +0200)
timeside/analyzer/irit_speech.py~	[new file with mode: 0644]	patch \| blob
timeside/analyzer/irit_speech_4hz.py	[new file with mode: 0644]	patch \| blob
timeside/analyzer/irit_speech_4hz.pyc	[new file with mode: 0644]	patch \| blob
timeside/analyzer/irit_speech_entropy.py	[new file with mode: 0644]	patch \| blob
timeside/analyzer/irit_speech_entropy.pyc	[new file with mode: 0644]	patch \| blob
timeside/analyzer/irit_speech_entropy.py~	[new file with mode: 0644]	patch \| blob