From: Thomas Fillon Date: Wed, 1 Oct 2014 10:44:54 +0000 (+0200) Subject: chore(analyzers): apply new API to Diadems analyzers X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=34336ff7ee287322abd692358e6a9cf21083e979;p=timeside.git chore(analyzers): apply new API to Diadems analyzers --- diff --git a/timeside/analyzer/irit_diverg.py b/timeside/analyzer/irit_diverg.py index d15f3a9..b96392d 100644 --- a/timeside/analyzer/irit_diverg.py +++ b/timeside/analyzer/irit_diverg.py @@ -359,7 +359,7 @@ class IRITDiverg(Analyzer): def __init__(self, blocksize=1024, stepsize=None): super(IRITDiverg, self).__init__() - self.parents.append(Waveform()) + self.parents['waveform'] = Waveform() self.ordre = 2 @interfacedoc @@ -390,7 +390,8 @@ class IRITDiverg(Analyzer): return frames, eod def post_process(self): - audio_data = self.process_pipe.results.get_result_by_id('waveform_analyzer').data + + audio_data = self.parents['waveform'].results['waveform_analyzer'].data if audio_data.shape[1] > 1: data = list(audio_data.mean(axis=1)) else: @@ -407,5 +408,5 @@ class IRITDiverg(Analyzer): segs.data_object.label = [s[1] for s in frontieres] segs.data_object.time = [(float(s[0]) / self.samplerate()) for s in frontieres] - self.process_pipe.results.add(segs) + self.add_result(segs) return diff --git a/timeside/analyzer/irit_music_.py b/timeside/analyzer/irit_music_.py deleted file mode 100644 index c8dae07..0000000 --- a/timeside/analyzer/irit_music_.py +++ /dev/null @@ -1,115 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2013 Maxime Le Coz - -# This file is part of TimeSide. - -# TimeSide is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 2 of the License, or -# (at your option) any later version. - -# TimeSide is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with TimeSide. If not, see . - -# Author: Maxime Le Coz - -from timeside.core import implements, interfacedoc -from timeside.analyzer.core import Analyzer -from timeside.analyzer.utils import melFilterBank, computeModulation -from timeside.analyzer.utils import segmentFromValues -from timeside.analyzer.irit_diverg import IRITDiverg -from timeside.api import IAnalyzer -from numpy import logical_and, array, hamming, dot, mean, float, arange, nonzero -from numpy.fft import rfft -from scipy.signal import firwin, lfilter -from pylab import plot, show - - -class IRITMusicLDN(Analyzer): - implements(IAnalyzer) - - def __init__(self, blocksize=1024, stepsize=None): - super(IRITMusicLDN, self).__init__() - self.parents.append(IRITDiverg()) - self.wLen = 1.0 - self.wStep = 0.1 - self.threshold = 20 - - @staticmethod - @interfacedoc - def id(): - return "irit_music_ldn" - - @staticmethod - @interfacedoc - def name(): - return "IRIT Music Detector - Segment Length" - - @staticmethod - @interfacedoc - def unit(): - return "" - - def __str__(self): - return "Music confidence indexes" - - def process(self, frames, eod=False): - return frames, eod - - def post_process(self): - ''' - ''' - - segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time - w = self.wLen / 2 - end = segList[-1] - tLine = arange(0, end, self.wStep) - - segLen = array([0] * len(tLine)) - - for i, t in enumerate(tLine): - idx = nonzero(logical_and(segList > (t - w), segList < (t + w)))[0] - segLen[i] = len(idx) - - #plot(tLine, segLen) - #show() - # Confidence Index - conf = array(segLen - self.threshold) / self.threshold - conf[conf > 1] = 1 - - segLenRes = self.new_result(data_mode='value', time_mode='framewise') - segLenRes.id_metadata.id += '.' + 'energy_confidence' - segLenRes.id_metadata.name += ' ' + 'Energy Confidence' - - segLenRes.data_object.value = segLen - - self.process_pipe.results.add(segLenRes) - - # Segment - convert = {False: 0, True: 1} - label = {0: 'nonMusic', 1: 'Music'} - - segList = segmentFromValues(segLen > self.threshold) - # Hint : Median filtering could imrove smoothness of the result - # from scipy.signal import medfilt - # segList = segmentFromValues(medfilt(modEnergyValue > self.threshold, 31)) - - segs = self.new_result(data_mode='label', time_mode='segment') - segs.id_metadata.id += '.' + 'segments' - segs.id_metadata.name += ' ' + 'Segments' - - segs.data_object.label_metadata.label = label - - segs.data_object.label = [convert[s[2]] for s in segList] - segs.data_object.time = [tLine[s[0]] for s in segList] - segs.data_object.duration = [tLine[s[1]] - tLine[s[0]] - for s in segList] - - self.process_pipe.results.add(segs) - return diff --git a/timeside/analyzer/irit_music_SLN.py b/timeside/analyzer/irit_music_SLN.py index bfa5728..8ace072 100644 --- a/timeside/analyzer/irit_music_SLN.py +++ b/timeside/analyzer/irit_music_SLN.py @@ -34,7 +34,7 @@ class IRITMusicSLN(Analyzer): def __init__(self, blocksize=None, stepsize=None): super(IRITMusicSLN, self).__init__() - self.parents.append(IRITDiverg()) + self.parents['irit_diverg'] = IRITDiverg() self.wLen = 1.0 self.wStep = 0.1 self.threshold = 0.05 @@ -73,8 +73,8 @@ class IRITMusicSLN(Analyzer): ''' ''' - - segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time + res_irit_diverg = self.parents['irit_diverg'].results + segList = res_irit_diverg['irit_diverg.segments'].time w = self.wLen / 2 end = segList[-1] @@ -95,7 +95,7 @@ class IRITMusicSLN(Analyzer): segLenRes.data_object.value = segLen - self.process_pipe.results.add(segLenRes) + self.add_result(segLenRes) # Segment convert = {False: 0, True: 1} @@ -117,7 +117,7 @@ class IRITMusicSLN(Analyzer): segs.data_object.duration = [tLine[s[1]] - tLine[s[0]] for s in segList] - self.process_pipe.results.add(segs) + self.add_result(segs) return diff --git a/timeside/analyzer/irit_music_SNB.py b/timeside/analyzer/irit_music_SNB.py index f359142..bb95403 100644 --- a/timeside/analyzer/irit_music_SNB.py +++ b/timeside/analyzer/irit_music_SNB.py @@ -34,7 +34,7 @@ class IRITMusicSNB(Analyzer): def __init__(self, blocksize=1024, stepsize=None, samplerate=None): super(IRITMusicSNB, self).__init__() - self.parents.append(IRITDiverg()) + self.parents['irit_diverg'] = IRITDiverg() self.wLen = 1.0 self.wStep = 0.1 self.threshold = 20 @@ -72,8 +72,8 @@ class IRITMusicSNB(Analyzer): ''' ''' - - segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time + res_irit_diverg = self.parents['irit_diverg'].results + segList = res_irit_diverg['irit_diverg.segments'].time w = self.wLen / 2 end = segList[-1] tLine = arange(0, end, self.wStep) @@ -90,7 +90,7 @@ class IRITMusicSNB(Analyzer): segLenRes.data_object.value = conf - self.process_pipe.results.add(segLenRes) + self.add_result(segLenRes) # Segment convert = {False: 0, True: 1} @@ -112,7 +112,7 @@ class IRITMusicSNB(Analyzer): segs.data_object.duration = [tLine[s[1]] - tLine[s[0]] for s in segList] - self.process_pipe.results.add(segs) + self.add_result(segs) return diff --git a/timeside/analyzer/irit_singings.py b/timeside/analyzer/irit_singings.py index f491dd3..e8e668a 100644 --- a/timeside/analyzer/irit_singings.py +++ b/timeside/analyzer/irit_singings.py @@ -40,7 +40,7 @@ class IRITSingings(Analyzer): @interfacedoc def __init__(self): super(IRITSingings, self).__init__() - self.parents.append(IRITMonopoly()) + self.parents['irit_monopoly'] = IRITMonopoly() self.block_read = 0 self.pitches = [] @@ -103,11 +103,13 @@ class IRITSingings(Analyzer): """ """ - preproc = self.process_pipe.results.get_result_by_id('irit_monopoly.segments').data_object - labels = self.process_pipe.results.get_result_by_id('irit_monopoly.segments').data_object.label_metadata['label'] - segments_monopoly = [(start, duration, labels[label])for start, duration, label in zip(preproc.time, - preproc.duration, - preproc.label)] + monopoly_results = self.parents['irit_monopoly'].results + preproc = monopoly_results['irit_monopoly.segments'].data_object + labels = preproc.label_metadata['label'] + segments_monopoly = [(start, duration, labels[label]) + for start, duration, label + in zip(preproc.time, preproc.duration, + preproc.label)] segments_chant = [] for start, duration, label in segments_monopoly: cumulChant = 0 @@ -118,7 +120,8 @@ class IRITSingings(Analyzer): for seg in segs: if has_vibrato(seg[2], f0_frame_rate): cumulChant += seg[1]-seg[0] - segments_chant += [(start, duration, cumulChant/duration >= self.thMono)] + segments_chant += [(start, duration, + cumulChant/duration >= self.thMono)] elif label == 'Poly': pass @@ -128,7 +131,6 @@ class IRITSingings(Analyzer): return - class SinusoidalSegment(object): """ diff --git a/timeside/analyzer/limsi_diarization.py b/timeside/analyzer/limsi_diarization.py index 623e3a2..21b713b 100644 --- a/timeside/analyzer/limsi_diarization.py +++ b/timeside/analyzer/limsi_diarization.py @@ -68,7 +68,8 @@ def segment(data, minsize): class LimsiDiarization(Analyzer): implements(IAnalyzer) - def __init__(self, sad_analyzer = None, gdiff_win_size_sec=5., min_seg_size_sec=2.5, bic_penalty_coeff=0.5): + def __init__(self, sad_analyzer=None, gdiff_win_size_sec=5., + min_seg_size_sec=2.5, bic_penalty_coeff=0.5): super(LimsiDiarization, self).__init__() self.gdiff_win_size_sec = gdiff_win_size_sec @@ -78,13 +79,12 @@ class LimsiDiarization(Analyzer): if sad_analyzer is None: sad_analyzer = LimsiSad('etape') self.sad_analyzer = sad_analyzer - self.parents.append(sad_analyzer) + self.parents['sad_analyzer'] = sad_analyzer # feature extraction defition spec = yaafelib.FeaturePlan(sample_rate=16000) spec.addFeature('mfccchop: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256') - parent_analyzer = Yaafe(spec) - self.parents.append(parent_analyzer) + self.parents['yaafe'] = Yaafe(spec) # informative parameters # these are not really taken into account by the system @@ -116,9 +116,12 @@ class LimsiDiarization(Analyzer): def post_process(self): # extract mfcc with yaafe and store them to be used with pyannote - mfcc = self.process_pipe.results.get_result_by_id('yaafe.mfccchop')['data_object']['value'] + print self.parents['yaafe'].results.keys() + res_yaafe = self.parents['yaafe'].results['yaafe.mfccchop'] + mfcc = res_yaafe.data_object.value - sw = YaafeFrame(self.input_blocksize, self.input_stepsize, self.input_samplerate) + sw = YaafeFrame(self.input_blocksize, self.input_stepsize, + self.input_samplerate) pyannotefeat = SlidingWindowFeature(mfcc, sw) # gaussian divergence window size @@ -127,13 +130,16 @@ class LimsiDiarization(Analyzer): min_seg_size_frame = int(self.min_seg_size_sec / timestepsize) # speech activity detection - sadval = self.process_pipe.results.get_result_by_id(self.sad_analyzer.id() + '.sad_lhh_diff').data_object.value[:] + sad_analyzer = self.parents['sad_analyzer'] + res_sad = sad_analyzer.results['limsi_sad.sad_lhh_diff'] + sadval = res_sad.data_object.value[:] # indices of frames detected as speech speech_threshold = 0. - frameids = [i for i, val in enumerate(sadval) if val > speech_threshold] + frameids = [i for i, val in enumerate(sadval) + if val > speech_threshold] # compute gaussian divergence of speech frames only - gdiff = gauss_div(mfcc[frameids,:], gdiff_win_size_frame) + gdiff = gauss_div(mfcc[frameids, :], gdiff_win_size_frame) # initial segmentation based on gaussian divergence criterion seg = segment(gdiff, min_seg_size_frame) @@ -182,7 +188,7 @@ class LimsiDiarization(Analyzer): duration[-1] = t + d - time[-1] lastlabel = l - + # store diarisation result diar_res = self.new_result(data_mode='label', time_mode='segment') diar_res.id_metadata.id += '.' + 'speakers' # + name + 'diarisation' @@ -193,5 +199,5 @@ class LimsiDiarization(Analyzer): diar_res.label_metadata.label = dict() for lab in diar_res.data_object.label: diar_res.label_metadata.label[lab] = str(lab) - - self.process_pipe.results.add(diar_res) + + self.add_result(diar_res) diff --git a/timeside/analyzer/limsi_sad.py b/timeside/analyzer/limsi_sad.py index 5643192..a012e86 100644 --- a/timeside/analyzer/limsi_sad.py +++ b/timeside/analyzer/limsi_sad.py @@ -88,7 +88,7 @@ class LimsiSad(Analyzer): """ Limsi Speech Activity Detection Systems LimsiSad performs frame level speech activity detection based on trained GMM models - For each frame, it computes the log likelihood difference between a speech model and a non speech model. + For each frame, it computes the log likelihood difference between a speech model and a non speech model. The highest is the estimate, the largest is the probability that the frame corresponds to speech. Dilatation and erosion procedures are used in a latter stage to obtain speech and non speech segments @@ -99,9 +99,9 @@ class LimsiSad(Analyzer): * sad_segments: speech/non speech segments """ implements(IAnalyzer) - - def __init__(self, sad_model, dews=0.2, speech_threshold=1., dllh_bounds=(-10., 10.)): + + def __init__(self, sad_model='etape', dews=0.2, speech_threshold=1., dllh_bounds=(-10., 10.)): """ Parameters: ----------