From: Maxime LE COZ Date: Tue, 25 Feb 2014 12:14:52 +0000 (+0100) Subject: use of frame_adapter for IRIT methods X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=0391a5b82e8c09511a5ec8c664a884d4c9782658;p=timeside.git use of frame_adapter for IRIT methods --- diff --git a/timeside/analyzer/__init__.py b/timeside/analyzer/__init__.py index 6c8a2fc..75e8a6f 100644 --- a/timeside/analyzer/__init__.py +++ b/timeside/analyzer/__init__.py @@ -14,6 +14,7 @@ from vamp_plugin import VampSimpleHost from irit_speech_entropy import IRITSpeechEntropy from irit_speech_4hz import IRITSpeech4Hz from irit_diverg import IRITDiverg +from irit_noise_startSilences import IRITStartSeg from irit_music_SLN import IRITMusicSLN from irit_music_SNB import IRITMusicSNB #~ from irit_monopoly import IRITMonopoly diff --git a/timeside/analyzer/irit_music_SLN.py b/timeside/analyzer/irit_music_SLN.py index 3a7a638..216c2bb 100644 --- a/timeside/analyzer/irit_music_SLN.py +++ b/timeside/analyzer/irit_music_SLN.py @@ -28,18 +28,29 @@ from timeside.api import IAnalyzer from numpy import logical_and,array, hamming, dot, mean, float, arange, nonzero from numpy.fft import rfft from scipy.signal import firwin, lfilter -from pylab import plot,show +from timeside.analyzer.preprocessors import frames_adapter class IRITMusicSLN(Analyzer): implements(IAnalyzer) - def __init__(self, blocksize=1024, stepsize=None) : + def __init__(self, blocksize=None, stepsize=None) : super(IRITMusicSLN, self).__init__(); + self.parents.append(IRITDiverg()) self.wLen = 1.0 self.wStep = 0.1 self.threshold = 20 - + self.input_blocksize = 0; + self.input_stepsize = 0; + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, + totalframes=None): + super(IRITMusicSLN, self).setup( + channels, samplerate, blocksize, totalframes) + self.input_blocksize = int(self.wLen * samplerate) + self.input_stepsize = int(self.wStep * samplerate) + @staticmethod @interfacedoc def id(): @@ -57,7 +68,8 @@ class IRITMusicSLN(Analyzer): def __str__(self): return "Music confidence indexes" - + + @frames_adapter def process(self, frames, eod=False): return frames,eod @@ -79,9 +91,6 @@ class IRITMusicSLN(Analyzer): idx = nonzero(logical_and(segList>(t-w) ,segList<(t+w)))[0] segLen[i]= len(idx) - - plot(tLine,segLen) - show() # Confidence Index conf = array(segLen - self.threshold) / self.threshold conf[conf > 1] = 1 diff --git a/timeside/analyzer/irit_music_SNB.py b/timeside/analyzer/irit_music_SNB.py index 15dce01..f1ed556 100644 --- a/timeside/analyzer/irit_music_SNB.py +++ b/timeside/analyzer/irit_music_SNB.py @@ -28,17 +28,27 @@ from timeside.api import IAnalyzer from numpy import logical_and,array, hamming, dot, mean, float, arange, nonzero from numpy.fft import rfft from scipy.signal import firwin, lfilter -from pylab import plot,show +from timeside.analyzer.preprocessors import frames_adapter class IRITMusicSNB(Analyzer): implements(IAnalyzer) - def __init__(self, blocksize=1024, stepsize=None) : + def __init__(self, blocksize=1024, stepsize=None, samplerate=None) : super(IRITMusicSNB, self).__init__(); self.parents.append(IRITDiverg()) self.wLen = 1.0 self.wStep = 0.1 + self.input_blocksize = 0; + self.input_stepsize = 0; self.threshold = 20 + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, + totalframes=None): + super(IRITMusicSNB, self).setup( + channels, samplerate, blocksize, totalframes) + self.input_blocksize = int(self.wLen * samplerate) + self.input_stepsize = int(self.wStep * samplerate) @staticmethod @interfacedoc @@ -57,7 +67,8 @@ class IRITMusicSNB(Analyzer): def __str__(self): return "Music confidence indexes" - + + @frames_adapter def process(self, frames, eod=False): return frames,eod @@ -80,10 +91,7 @@ class IRITMusicSNB(Analyzer): l = [tLine[t1]-tLine[t2] for t1,t2 in zip()] segLen[i]= len(idx) - - plot(tLine,segLen) - show() - # Confidence Index + # Confidence Index conf = array(segLen - self.threshold) / self.threshold conf[conf > 1] = 1 diff --git a/timeside/analyzer/irit_speech_4hz.py b/timeside/analyzer/irit_speech_4hz.py index ea86677..5771891 100644 --- a/timeside/analyzer/irit_speech_4hz.py +++ b/timeside/analyzer/irit_speech_4hz.py @@ -27,6 +27,7 @@ from timeside.api import IAnalyzer from numpy import array, hamming, dot, mean, float from numpy.fft import rfft from scipy.signal import firwin, lfilter +from timeside.analyzer.preprocessors import frames_adapter class IRITSpeech4Hz(Analyzer): @@ -47,14 +48,20 @@ class IRITSpeech4Hz(Analyzer): - modulLen (float) : Length (in second) of the modulation computation window ''' - @interfacedoc + @interfacedoc def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(IRITSpeech4Hz, self).setup( channels, samplerate, blocksize, totalframes) + self.energy4hz = [] # Classification self.threshold = 2.0 + + self.wLen = 1.0 + self.wStep = 0.1 + self.input_blocksize = int(self.wLen * samplerate) + self.input_stepsize = int(self.wStep * samplerate) # Pass-band Filter self.frequency_center = 4.0 @@ -85,6 +92,7 @@ class IRITSpeech4Hz(Analyzer): def __str__(self): return "Speech confidences indexes" + @frames_adapter def process(self, frames, eod=False): ''' @@ -123,7 +131,7 @@ class IRITSpeech4Hz(Analyzer): # Energy Modulation frameLenModulation = int( - self.modulLen * self.samplerate() / self.blocksize()) + self.modulLen * self.samplerate() / self.input_blocksize) modEnergyValue = computeModulation(energy, frameLenModulation, True) # Confidence Index @@ -154,10 +162,10 @@ class IRITSpeech4Hz(Analyzer): segs.label_metadata.label = label segs.data_object.label = [convert[s[2]] for s in segList] - segs.data_object.time = [(float(s[0]) * self.blocksize() / + segs.data_object.time = [(float(s[0]) * self.input_blocksize / self.samplerate()) for s in segList] - segs.data_object.duration = [(float(s[1]-s[0]) * self.blocksize() / + segs.data_object.duration = [(float(s[1]-s[0]) * self.input_blocksize / self.samplerate()) for s in segList] diff --git a/timeside/analyzer/irit_speech_entropy.py b/timeside/analyzer/irit_speech_entropy.py index 8d13ab4..cdfaec5 100644 --- a/timeside/analyzer/irit_speech_entropy.py +++ b/timeside/analyzer/irit_speech_entropy.py @@ -26,6 +26,7 @@ from timeside.analyzer.utils import segmentFromValues from timeside.api import IAnalyzer from numpy import array from scipy.ndimage.morphology import binary_opening +from timeside.analyzer.preprocessors import frames_adapter class IRITSpeechEntropy(Analyzer): @@ -40,6 +41,10 @@ class IRITSpeechEntropy(Analyzer): self.threshold = 0.4 self.smoothLen = 5 self.modulLen = 2 + self.wLen = 1.0 + self.wStep = 0.1 + self.input_blocksize = int(self.wLen * samplerate) + self.input_stepsize = int(self.wStep * samplerate) @staticmethod @interfacedoc @@ -58,7 +63,8 @@ class IRITSpeechEntropy(Analyzer): def __str__(self): return "Speech confidences indexes" - + + @frames_adapter def process(self, frames, eod=False): self.entropyValue.append(entropy(frames)) return frames, eod @@ -66,7 +72,7 @@ class IRITSpeechEntropy(Analyzer): def post_process(self): entropyValue = array(self.entropyValue) - w = self.modulLen * self.samplerate() / self.blocksize() + w = self.modulLen * self.samplerate() / self.input_blocksize modulentropy = computeModulation(entropyValue, w, False) confEntropy = array(modulentropy - self.threshold) / self.threshold confEntropy[confEntropy > 1] = 1 @@ -95,10 +101,10 @@ class IRITSpeechEntropy(Analyzer): segs.label_metadata.label = label segs.data_object.label = [convert[s[2]] for s in segList] - segs.data_object.time = [(float(s[0]) * self.blocksize() / + segs.data_object.time = [(float(s[0]) * self.input_blocksize / self.samplerate()) for s in segList] - segs.data_object.duration = [(float(s[1]-s[0]) * self.blocksize() / + segs.data_object.duration = [(float(s[1]-s[0]) * self.input_blocksize / self.samplerate()) for s in segList]