]> git.parisson.com Git - timeside.git/commitdiff
chore(analyzers): apply new API to Diadems analyzers
authorThomas Fillon <thomas@parisson.com>
Wed, 1 Oct 2014 10:44:54 +0000 (12:44 +0200)
committerThomas Fillon <thomas@parisson.com>
Wed, 1 Oct 2014 10:44:54 +0000 (12:44 +0200)
timeside/analyzer/irit_diverg.py
timeside/analyzer/irit_music_.py [deleted file]
timeside/analyzer/irit_music_SLN.py
timeside/analyzer/irit_music_SNB.py
timeside/analyzer/irit_singings.py
timeside/analyzer/limsi_diarization.py
timeside/analyzer/limsi_sad.py

index d15f3a96bff25bf8cfafe2e413d097534774ef7e..b96392df3b861671985fcd3327f266a191826988 100644 (file)
@@ -359,7 +359,7 @@ class IRITDiverg(Analyzer):
 
     def __init__(self, blocksize=1024, stepsize=None):
         super(IRITDiverg, self).__init__()
-        self.parents.append(Waveform())
+        self.parents['waveform'] = Waveform()
         self.ordre = 2
 
     @interfacedoc
@@ -390,7 +390,8 @@ class IRITDiverg(Analyzer):
         return frames, eod
 
     def post_process(self):
-        audio_data = self.process_pipe.results.get_result_by_id('waveform_analyzer').data
+
+        audio_data = self.parents['waveform'].results['waveform_analyzer'].data
         if audio_data.shape[1] > 1:
             data = list(audio_data.mean(axis=1))
         else:
@@ -407,5 +408,5 @@ class IRITDiverg(Analyzer):
         segs.data_object.label = [s[1] for s in frontieres]
         segs.data_object.time = [(float(s[0]) / self.samplerate())
                                  for s in frontieres]
-        self.process_pipe.results.add(segs)
+        self.add_result(segs)
         return
diff --git a/timeside/analyzer/irit_music_.py b/timeside/analyzer/irit_music_.py
deleted file mode 100644 (file)
index c8dae07..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
-
-# This file is part of TimeSide.
-
-# TimeSide is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# TimeSide is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with TimeSide.  If not, see <http://www.gnu.org/licenses/>.
-
-# Author: Maxime Le Coz <lecoz@irit.fr>
-
-from timeside.core import implements, interfacedoc
-from timeside.analyzer.core import Analyzer
-from timeside.analyzer.utils import melFilterBank, computeModulation
-from timeside.analyzer.utils import segmentFromValues
-from timeside.analyzer.irit_diverg import IRITDiverg
-from timeside.api import IAnalyzer
-from numpy import logical_and, array, hamming, dot, mean, float, arange, nonzero
-from numpy.fft import rfft
-from scipy.signal import firwin, lfilter
-from pylab import plot, show
-
-
-class IRITMusicLDN(Analyzer):
-    implements(IAnalyzer)
-
-    def __init__(self, blocksize=1024, stepsize=None):
-        super(IRITMusicLDN, self).__init__()
-        self.parents.append(IRITDiverg())
-        self.wLen = 1.0
-        self.wStep = 0.1
-        self.threshold = 20
-
-    @staticmethod
-    @interfacedoc
-    def id():
-        return "irit_music_ldn"
-
-    @staticmethod
-    @interfacedoc
-    def name():
-        return "IRIT Music Detector - Segment Length"
-
-    @staticmethod
-    @interfacedoc
-    def unit():
-        return ""
-
-    def __str__(self):
-        return "Music confidence indexes"
-
-    def process(self, frames, eod=False):
-        return frames, eod
-
-    def post_process(self):
-        '''
-        '''
-
-        segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time
-        w = self.wLen / 2
-        end = segList[-1]
-        tLine = arange(0, end, self.wStep)
-
-        segLen = array([0] * len(tLine))
-
-        for i, t in enumerate(tLine):
-            idx = nonzero(logical_and(segList > (t - w), segList < (t + w)))[0]
-            segLen[i] = len(idx)
-
-        #plot(tLine, segLen)
-        #show()
-        # Confidence Index
-        conf = array(segLen - self.threshold) / self.threshold
-        conf[conf > 1] = 1
-
-        segLenRes = self.new_result(data_mode='value', time_mode='framewise')
-        segLenRes.id_metadata.id += '.' + 'energy_confidence'
-        segLenRes.id_metadata.name += ' ' + 'Energy Confidence'
-
-        segLenRes.data_object.value = segLen
-
-        self.process_pipe.results.add(segLenRes)
-
-        # Segment
-        convert = {False: 0, True: 1}
-        label = {0: 'nonMusic', 1: 'Music'}
-
-        segList = segmentFromValues(segLen > self.threshold)
-        # Hint : Median filtering could imrove smoothness of the result
-        # from scipy.signal import medfilt
-        # segList = segmentFromValues(medfilt(modEnergyValue > self.threshold, 31))
-
-        segs = self.new_result(data_mode='label', time_mode='segment')
-        segs.id_metadata.id += '.' + 'segments'
-        segs.id_metadata.name += ' ' + 'Segments'
-
-        segs.data_object.label_metadata.label = label
-
-        segs.data_object.label = [convert[s[2]] for s in segList]
-        segs.data_object.time = [tLine[s[0]] for s in segList]
-        segs.data_object.duration = [tLine[s[1]] - tLine[s[0]]
-                                     for s in segList]
-
-        self.process_pipe.results.add(segs)
-        return
index bfa5728b5d8269cecb8591e66090096d2d38f9c8..8ace0725401e0aa9dfa789dee9320c5035b8abd3 100644 (file)
@@ -34,7 +34,7 @@ class IRITMusicSLN(Analyzer):
     def __init__(self, blocksize=None, stepsize=None):
         super(IRITMusicSLN, self).__init__()
 
-        self.parents.append(IRITDiverg())
+        self.parents['irit_diverg'] = IRITDiverg()
         self.wLen = 1.0
         self.wStep = 0.1
         self.threshold = 0.05
@@ -73,8 +73,8 @@ class IRITMusicSLN(Analyzer):
         '''
 
         '''
-
-        segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time
+        res_irit_diverg = self.parents['irit_diverg'].results
+        segList = res_irit_diverg['irit_diverg.segments'].time
 
         w = self.wLen / 2
         end = segList[-1]
@@ -95,7 +95,7 @@ class IRITMusicSLN(Analyzer):
 
         segLenRes.data_object.value = segLen
 
-        self.process_pipe.results.add(segLenRes)
+        self.add_result(segLenRes)
 
         # Segment
         convert = {False: 0, True: 1}
@@ -117,7 +117,7 @@ class IRITMusicSLN(Analyzer):
         segs.data_object.duration = [tLine[s[1]] - tLine[s[0]]
                                      for s in segList]
 
-        self.process_pipe.results.add(segs)
+        self.add_result(segs)
         return
 
 
index f359142386462989baeb96009fa87357aee1f726..bb95403f3a24b98f9565d5c930fea13fa3df8c63 100644 (file)
@@ -34,7 +34,7 @@ class IRITMusicSNB(Analyzer):
 
     def __init__(self, blocksize=1024, stepsize=None, samplerate=None):
         super(IRITMusicSNB, self).__init__()
-        self.parents.append(IRITDiverg())
+        self.parents['irit_diverg'] = IRITDiverg()
         self.wLen = 1.0
         self.wStep = 0.1
         self.threshold = 20
@@ -72,8 +72,8 @@ class IRITMusicSNB(Analyzer):
         '''
 
         '''
-
-        segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time
+        res_irit_diverg = self.parents['irit_diverg'].results
+        segList = res_irit_diverg['irit_diverg.segments'].time
         w = self.wLen / 2
         end = segList[-1]
         tLine = arange(0, end, self.wStep)
@@ -90,7 +90,7 @@ class IRITMusicSNB(Analyzer):
 
         segLenRes.data_object.value = conf
 
-        self.process_pipe.results.add(segLenRes)
+        self.add_result(segLenRes)
 
         # Segment
         convert = {False: 0, True: 1}
@@ -112,7 +112,7 @@ class IRITMusicSNB(Analyzer):
         segs.data_object.duration = [tLine[s[1]] - tLine[s[0]]
                                      for s in segList]
 
-        self.process_pipe.results.add(segs)
+        self.add_result(segs)
         return
 
 
index f491dd3ab84c2ced32bfc21d27ea0ff987d1c3ab..e8e668ae298b753edcf3b3de470d33ad50d593e2 100644 (file)
@@ -40,7 +40,7 @@ class IRITSingings(Analyzer):
     @interfacedoc
     def __init__(self):
         super(IRITSingings, self).__init__()
-        self.parents.append(IRITMonopoly())
+        self.parents['irit_monopoly'] = IRITMonopoly()
 
         self.block_read = 0
         self.pitches = []
@@ -103,11 +103,13 @@ class IRITSingings(Analyzer):
         """
 
         """
-        preproc = self.process_pipe.results.get_result_by_id('irit_monopoly.segments').data_object
-        labels = self.process_pipe.results.get_result_by_id('irit_monopoly.segments').data_object.label_metadata['label']
-        segments_monopoly = [(start, duration, labels[label])for start, duration, label in zip(preproc.time,
-                                                                                               preproc.duration,
-                                                                                               preproc.label)]
+        monopoly_results = self.parents['irit_monopoly'].results
+        preproc = monopoly_results['irit_monopoly.segments'].data_object
+        labels = preproc.label_metadata['label']
+        segments_monopoly = [(start, duration, labels[label])
+                             for start, duration, label
+                             in zip(preproc.time, preproc.duration,
+                                    preproc.label)]
         segments_chant = []
         for start, duration, label in segments_monopoly:
             cumulChant = 0
@@ -118,7 +120,8 @@ class IRITSingings(Analyzer):
                 for seg in segs:
                     if has_vibrato(seg[2], f0_frame_rate):
                         cumulChant += seg[1]-seg[0]
-                segments_chant += [(start, duration, cumulChant/duration >= self.thMono)]
+                segments_chant += [(start, duration,
+                                    cumulChant/duration >= self.thMono)]
 
             elif label == 'Poly':
                 pass
@@ -128,7 +131,6 @@ class IRITSingings(Analyzer):
         return
 
 
-
 class SinusoidalSegment(object):
 
     """
index 623e3a2b87498e908f120ce29014fd19e8150fd8..21b713b4234d1ff080c34e5f518216f83e2b6eba 100644 (file)
@@ -68,7 +68,8 @@ def segment(data, minsize):
 class LimsiDiarization(Analyzer):
     implements(IAnalyzer)
 
-    def __init__(self, sad_analyzer = None, gdiff_win_size_sec=5., min_seg_size_sec=2.5, bic_penalty_coeff=0.5):
+    def __init__(self, sad_analyzer=None, gdiff_win_size_sec=5.,
+                 min_seg_size_sec=2.5, bic_penalty_coeff=0.5):
         super(LimsiDiarization, self).__init__()
 
         self.gdiff_win_size_sec = gdiff_win_size_sec
@@ -78,13 +79,12 @@ class LimsiDiarization(Analyzer):
         if sad_analyzer is None:
             sad_analyzer = LimsiSad('etape')
         self.sad_analyzer = sad_analyzer
-        self.parents.append(sad_analyzer)
+        self.parents['sad_analyzer'] = sad_analyzer
 
         # feature extraction defition
         spec = yaafelib.FeaturePlan(sample_rate=16000)
         spec.addFeature('mfccchop: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256')
-        parent_analyzer = Yaafe(spec)
-        self.parents.append(parent_analyzer)
+        self.parents['yaafe'] = Yaafe(spec)
 
         # informative parameters
         # these are not really taken into account by the system
@@ -116,9 +116,12 @@ class LimsiDiarization(Analyzer):
 
     def post_process(self):
         # extract mfcc with yaafe and store them to be used with pyannote
-        mfcc = self.process_pipe.results.get_result_by_id('yaafe.mfccchop')['data_object']['value']
+        print self.parents['yaafe'].results.keys()
+        res_yaafe = self.parents['yaafe'].results['yaafe.mfccchop']
+        mfcc = res_yaafe.data_object.value
 
-        sw = YaafeFrame(self.input_blocksize, self.input_stepsize, self.input_samplerate)
+        sw = YaafeFrame(self.input_blocksize, self.input_stepsize,
+                        self.input_samplerate)
         pyannotefeat = SlidingWindowFeature(mfcc, sw)
 
         # gaussian divergence window size
@@ -127,13 +130,16 @@ class LimsiDiarization(Analyzer):
         min_seg_size_frame = int(self.min_seg_size_sec / timestepsize)
 
         # speech activity detection
-        sadval = self.process_pipe.results.get_result_by_id(self.sad_analyzer.id() + '.sad_lhh_diff').data_object.value[:]
+        sad_analyzer = self.parents['sad_analyzer']
+        res_sad = sad_analyzer.results['limsi_sad.sad_lhh_diff']
+        sadval = res_sad.data_object.value[:]
         # indices of frames detected as speech
         speech_threshold = 0.
-        frameids = [i for i, val in enumerate(sadval) if val > speech_threshold]
+        frameids = [i for i, val in enumerate(sadval)
+                    if val > speech_threshold]
 
         # compute gaussian divergence of speech frames only
-        gdiff = gauss_div(mfcc[frameids,:], gdiff_win_size_frame)
+        gdiff = gauss_div(mfcc[frameids, :], gdiff_win_size_frame)
 
         # initial segmentation based on gaussian divergence criterion
         seg = segment(gdiff, min_seg_size_frame)
@@ -182,7 +188,7 @@ class LimsiDiarization(Analyzer):
                 duration[-1] = t + d - time[-1]
             lastlabel = l
 
-            
+
         # store diarisation result
         diar_res = self.new_result(data_mode='label', time_mode='segment')
         diar_res.id_metadata.id += '.' + 'speakers' # + name + 'diarisation'
@@ -193,5 +199,5 @@ class LimsiDiarization(Analyzer):
         diar_res.label_metadata.label = dict()
         for lab in diar_res.data_object.label:
             diar_res.label_metadata.label[lab] = str(lab)
-            
-        self.process_pipe.results.add(diar_res)
+
+        self.add_result(diar_res)
index 564319240e227aab5025c6ef2a123cce7912f964..a012e86608d25b55097f413326a5bffc14ef1005 100644 (file)
@@ -88,7 +88,7 @@ class LimsiSad(Analyzer):
     """
     Limsi Speech Activity Detection Systems
     LimsiSad performs frame level speech activity detection based on trained GMM models
-    For each frame, it computes the log likelihood difference between a speech model and a non speech model. 
+    For each frame, it computes the log likelihood difference between a speech model and a non speech model.
     The highest is the estimate, the largest is the probability that the frame corresponds to speech.
     Dilatation and erosion procedures are used in a latter stage to obtain speech and non speech segments
 
@@ -99,9 +99,9 @@ class LimsiSad(Analyzer):
     * sad_segments: speech/non speech segments
     """
     implements(IAnalyzer)
-    
 
-    def __init__(self, sad_model, dews=0.2, speech_threshold=1., dllh_bounds=(-10., 10.)):
+
+    def __init__(self, sad_model='etape', dews=0.2, speech_threshold=1., dllh_bounds=(-10., 10.)):
         """
         Parameters:
         ----------