chore(analyzers): apply new API to Diadems analyzers

author Thomas Fillon <thomas@parisson.com>

Wed, 1 Oct 2014 10:44:54 +0000 (12:44 +0200)

committer Thomas Fillon <thomas@parisson.com>

Wed, 1 Oct 2014 10:44:54 +0000 (12:44 +0200)
author Thomas Fillon <thomas@parisson.com>
Wed, 1 Oct 2014 10:44:54 +0000 (12:44 +0200)
committer Thomas Fillon <thomas@parisson.com>
Wed, 1 Oct 2014 10:44:54 +0000 (12:44 +0200)
diff --git a/timeside/analyzer/irit_diverg.py b/timeside/analyzer/irit_diverg.py

index d15f3a96bff25bf8cfafe2e413d097534774ef7e..b96392df3b861671985fcd3327f266a191826988 100644 (file)
--- a/timeside/analyzer/irit_diverg.py
+++ b/timeside/analyzer/irit_diverg.py
@@ -359,7 +359,7 @@ class IRITDiverg(Analyzer):
  
      def __init__(self, blocksize=1024, stepsize=None):
          super(IRITDiverg, self).__init__()
-        self.parents.append(Waveform())
+        self.parents['waveform'] = Waveform()
          self.ordre = 2
  
      @interfacedoc
@@ -390,7 +390,8 @@ class IRITDiverg(Analyzer):
          return frames, eod
  
      def post_process(self):
-        audio_data = self.process_pipe.results.get_result_by_id('waveform_analyzer').data
+
+        audio_data = self.parents['waveform'].results['waveform_analyzer'].data
          if audio_data.shape[1] > 1:
              data = list(audio_data.mean(axis=1))
          else:
@@ -407,5 +408,5 @@ class IRITDiverg(Analyzer):
          segs.data_object.label = [s[1] for s in frontieres]
          segs.data_object.time = [(float(s[0]) / self.samplerate())
                                   for s in frontieres]
-        self.process_pipe.results.add(segs)
+        self.add_result(segs)
          return
diff --git a/timeside/analyzer/irit_music_.py b/timeside/analyzer/irit_music_.py

deleted file mode 100644 (file)

index c8dae07..0000000
--- a/timeside/analyzer/irit_music_.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
-
-# This file is part of TimeSide.
-
-# TimeSide is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# TimeSide is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with TimeSide.  If not, see <http://www.gnu.org/licenses/>.
-
-# Author: Maxime Le Coz <lecoz@irit.fr>
-
-from timeside.core import implements, interfacedoc
-from timeside.analyzer.core import Analyzer
-from timeside.analyzer.utils import melFilterBank, computeModulation
-from timeside.analyzer.utils import segmentFromValues
-from timeside.analyzer.irit_diverg import IRITDiverg
-from timeside.api import IAnalyzer
-from numpy import logical_and, array, hamming, dot, mean, float, arange, nonzero
-from numpy.fft import rfft
-from scipy.signal import firwin, lfilter
-from pylab import plot, show
-
-
-class IRITMusicLDN(Analyzer):
-    implements(IAnalyzer)
-
-    def __init__(self, blocksize=1024, stepsize=None):
-        super(IRITMusicLDN, self).__init__()
-        self.parents.append(IRITDiverg())
-        self.wLen = 1.0
-        self.wStep = 0.1
-        self.threshold = 20
-
-    @staticmethod
-    @interfacedoc
-    def id():
-        return "irit_music_ldn"
-
-    @staticmethod
-    @interfacedoc
-    def name():
-        return "IRIT Music Detector - Segment Length"
-
-    @staticmethod
-    @interfacedoc
-    def unit():
-        return ""
-
-    def __str__(self):
-        return "Music confidence indexes"
-
-    def process(self, frames, eod=False):
-        return frames, eod
-
-    def post_process(self):
-        '''
-        '''
-
-        segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time
-        w = self.wLen / 2
-        end = segList[-1]
-        tLine = arange(0, end, self.wStep)
-
-        segLen = array([0] * len(tLine))
-
-        for i, t in enumerate(tLine):
-            idx = nonzero(logical_and(segList > (t - w), segList < (t + w)))[0]
-            segLen[i] = len(idx)
-
-        #plot(tLine, segLen)
-        #show()
-        # Confidence Index
-        conf = array(segLen - self.threshold) / self.threshold
-        conf[conf > 1] = 1
-
-        segLenRes = self.new_result(data_mode='value', time_mode='framewise')
-        segLenRes.id_metadata.id += '.' + 'energy_confidence'
-        segLenRes.id_metadata.name += ' ' + 'Energy Confidence'
-
-        segLenRes.data_object.value = segLen
-
-        self.process_pipe.results.add(segLenRes)
-
-        # Segment
-        convert = {False: 0, True: 1}
-        label = {0: 'nonMusic', 1: 'Music'}
-
-        segList = segmentFromValues(segLen > self.threshold)
-        # Hint : Median filtering could imrove smoothness of the result
-        # from scipy.signal import medfilt
-        # segList = segmentFromValues(medfilt(modEnergyValue > self.threshold, 31))
-
-        segs = self.new_result(data_mode='label', time_mode='segment')
-        segs.id_metadata.id += '.' + 'segments'
-        segs.id_metadata.name += ' ' + 'Segments'
-
-        segs.data_object.label_metadata.label = label
-
-        segs.data_object.label = [convert[s[2]] for s in segList]
-        segs.data_object.time = [tLine[s[0]] for s in segList]
-        segs.data_object.duration = [tLine[s[1]] - tLine[s[0]]
-                                     for s in segList]
-
-        self.process_pipe.results.add(segs)
-        return
diff --git a/timeside/analyzer/irit_music_SLN.py b/timeside/analyzer/irit_music_SLN.py

index bfa5728b5d8269cecb8591e66090096d2d38f9c8..8ace0725401e0aa9dfa789dee9320c5035b8abd3 100644 (file)
--- a/timeside/analyzer/irit_music_SLN.py
+++ b/timeside/analyzer/irit_music_SLN.py
@@ -34,7 +34,7 @@ class IRITMusicSLN(Analyzer):
      def __init__(self, blocksize=None, stepsize=None):
          super(IRITMusicSLN, self).__init__()
  
-        self.parents.append(IRITDiverg())
+        self.parents['irit_diverg'] = IRITDiverg()
          self.wLen = 1.0
          self.wStep = 0.1
          self.threshold = 0.05
@@ -73,8 +73,8 @@ class IRITMusicSLN(Analyzer):
          '''
  
          '''
-
-        segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time
+        res_irit_diverg = self.parents['irit_diverg'].results
+        segList = res_irit_diverg['irit_diverg.segments'].time
  
          w = self.wLen / 2
          end = segList[-1]
@@ -95,7 +95,7 @@ class IRITMusicSLN(Analyzer):
  
          segLenRes.data_object.value = segLen
  
-        self.process_pipe.results.add(segLenRes)
+        self.add_result(segLenRes)
  
          # Segment
          convert = {False: 0, True: 1}
@@ -117,7 +117,7 @@ class IRITMusicSLN(Analyzer):
          segs.data_object.duration = [tLine[s[1]] - tLine[s[0]]
                                       for s in segList]
  
-        self.process_pipe.results.add(segs)
+        self.add_result(segs)
          return
  
  
diff --git a/timeside/analyzer/irit_music_SNB.py b/timeside/analyzer/irit_music_SNB.py

index f359142386462989baeb96009fa87357aee1f726..bb95403f3a24b98f9565d5c930fea13fa3df8c63 100644 (file)
--- a/timeside/analyzer/irit_music_SNB.py
+++ b/timeside/analyzer/irit_music_SNB.py
@@ -34,7 +34,7 @@ class IRITMusicSNB(Analyzer):
  
      def __init__(self, blocksize=1024, stepsize=None, samplerate=None):
          super(IRITMusicSNB, self).__init__()
-        self.parents.append(IRITDiverg())
+        self.parents['irit_diverg'] = IRITDiverg()
          self.wLen = 1.0
          self.wStep = 0.1
          self.threshold = 20
@@ -72,8 +72,8 @@ class IRITMusicSNB(Analyzer):
          '''
  
          '''
-
-        segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time
+        res_irit_diverg = self.parents['irit_diverg'].results
+        segList = res_irit_diverg['irit_diverg.segments'].time
          w = self.wLen / 2
          end = segList[-1]
          tLine = arange(0, end, self.wStep)
@@ -90,7 +90,7 @@ class IRITMusicSNB(Analyzer):
  
          segLenRes.data_object.value = conf
  
-        self.process_pipe.results.add(segLenRes)
+        self.add_result(segLenRes)
  
          # Segment
          convert = {False: 0, True: 1}
@@ -112,7 +112,7 @@ class IRITMusicSNB(Analyzer):
          segs.data_object.duration = [tLine[s[1]] - tLine[s[0]]
                                       for s in segList]
  
-        self.process_pipe.results.add(segs)
+        self.add_result(segs)
          return
  
  
diff --git a/timeside/analyzer/irit_singings.py b/timeside/analyzer/irit_singings.py

index f491dd3ab84c2ced32bfc21d27ea0ff987d1c3ab..e8e668ae298b753edcf3b3de470d33ad50d593e2 100644 (file)
--- a/timeside/analyzer/irit_singings.py
+++ b/timeside/analyzer/irit_singings.py
@@ -40,7 +40,7 @@ class IRITSingings(Analyzer):
      @interfacedoc
      def __init__(self):
          super(IRITSingings, self).__init__()
-        self.parents.append(IRITMonopoly())
+        self.parents['irit_monopoly'] = IRITMonopoly()
  
          self.block_read = 0
          self.pitches = []
@@ -103,11 +103,13 @@ class IRITSingings(Analyzer):
          """
  
          """
-        preproc = self.process_pipe.results.get_result_by_id('irit_monopoly.segments').data_object
-        labels = self.process_pipe.results.get_result_by_id('irit_monopoly.segments').data_object.label_metadata['label']
-        segments_monopoly = [(start, duration, labels[label])for start, duration, label in zip(preproc.time,
-                                                                                               preproc.duration,
-                                                                                               preproc.label)]
+        monopoly_results = self.parents['irit_monopoly'].results
+        preproc = monopoly_results['irit_monopoly.segments'].data_object
+        labels = preproc.label_metadata['label']
+        segments_monopoly = [(start, duration, labels[label])
+                             for start, duration, label
+                             in zip(preproc.time, preproc.duration,
+                                    preproc.label)]
          segments_chant = []
          for start, duration, label in segments_monopoly:
              cumulChant = 0
@@ -118,7 +120,8 @@ class IRITSingings(Analyzer):
                  for seg in segs:
                      if has_vibrato(seg[2], f0_frame_rate):
                          cumulChant += seg[1]-seg[0]
-                segments_chant += [(start, duration, cumulChant/duration >= self.thMono)]
+                segments_chant += [(start, duration,
+                                    cumulChant/duration >= self.thMono)]
  
              elif label == 'Poly':
                  pass
@@ -128,7 +131,6 @@ class IRITSingings(Analyzer):
          return
  
  
-
  class SinusoidalSegment(object):
  
      """
diff --git a/timeside/analyzer/limsi_diarization.py b/timeside/analyzer/limsi_diarization.py

index 623e3a2b87498e908f120ce29014fd19e8150fd8..21b713b4234d1ff080c34e5f518216f83e2b6eba 100644 (file)
--- a/timeside/analyzer/limsi_diarization.py
+++ b/timeside/analyzer/limsi_diarization.py
@@ -68,7 +68,8 @@ def segment(data, minsize):
  class LimsiDiarization(Analyzer):
      implements(IAnalyzer)
  
-    def __init__(self, sad_analyzer = None, gdiff_win_size_sec=5., min_seg_size_sec=2.5, bic_penalty_coeff=0.5):
+    def __init__(self, sad_analyzer=None, gdiff_win_size_sec=5.,
+                 min_seg_size_sec=2.5, bic_penalty_coeff=0.5):
          super(LimsiDiarization, self).__init__()
  
          self.gdiff_win_size_sec = gdiff_win_size_sec
@@ -78,13 +79,12 @@ class LimsiDiarization(Analyzer):
          if sad_analyzer is None:
              sad_analyzer = LimsiSad('etape')
          self.sad_analyzer = sad_analyzer
-        self.parents.append(sad_analyzer)
+        self.parents['sad_analyzer'] = sad_analyzer
  
          # feature extraction defition
          spec = yaafelib.FeaturePlan(sample_rate=16000)
          spec.addFeature('mfccchop: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256')
-        parent_analyzer = Yaafe(spec)
-        self.parents.append(parent_analyzer)
+        self.parents['yaafe'] = Yaafe(spec)
  
          # informative parameters
          # these are not really taken into account by the system
@@ -116,9 +116,12 @@ class LimsiDiarization(Analyzer):
  
      def post_process(self):
          # extract mfcc with yaafe and store them to be used with pyannote
-        mfcc = self.process_pipe.results.get_result_by_id('yaafe.mfccchop')['data_object']['value']
+        print self.parents['yaafe'].results.keys()
+        res_yaafe = self.parents['yaafe'].results['yaafe.mfccchop']
+        mfcc = res_yaafe.data_object.value
  
-        sw = YaafeFrame(self.input_blocksize, self.input_stepsize, self.input_samplerate)
+        sw = YaafeFrame(self.input_blocksize, self.input_stepsize,
+                        self.input_samplerate)
          pyannotefeat = SlidingWindowFeature(mfcc, sw)
  
          # gaussian divergence window size
@@ -127,13 +130,16 @@ class LimsiDiarization(Analyzer):
          min_seg_size_frame = int(self.min_seg_size_sec / timestepsize)
  
          # speech activity detection
-        sadval = self.process_pipe.results.get_result_by_id(self.sad_analyzer.id() + '.sad_lhh_diff').data_object.value[:]
+        sad_analyzer = self.parents['sad_analyzer']
+        res_sad = sad_analyzer.results['limsi_sad.sad_lhh_diff']
+        sadval = res_sad.data_object.value[:]
          # indices of frames detected as speech
          speech_threshold = 0.
-        frameids = [i for i, val in enumerate(sadval) if val > speech_threshold]
+        frameids = [i for i, val in enumerate(sadval)
+                    if val > speech_threshold]
  
          # compute gaussian divergence of speech frames only
-        gdiff = gauss_div(mfcc[frameids,:], gdiff_win_size_frame)
+        gdiff = gauss_div(mfcc[frameids, :], gdiff_win_size_frame)
  
          # initial segmentation based on gaussian divergence criterion
          seg = segment(gdiff, min_seg_size_frame)
@@ -182,7 +188,7 @@ class LimsiDiarization(Analyzer):
                  duration[-1] = t + d - time[-1]
              lastlabel = l
  
-            
+
          # store diarisation result
          diar_res = self.new_result(data_mode='label', time_mode='segment')
          diar_res.id_metadata.id += '.' + 'speakers' # + name + 'diarisation'
@@ -193,5 +199,5 @@ class LimsiDiarization(Analyzer):
          diar_res.label_metadata.label = dict()
          for lab in diar_res.data_object.label:
              diar_res.label_metadata.label[lab] = str(lab)
-            
-        self.process_pipe.results.add(diar_res)
+
+        self.add_result(diar_res)
diff --git a/timeside/analyzer/limsi_sad.py b/timeside/analyzer/limsi_sad.py

index 564319240e227aab5025c6ef2a123cce7912f964..a012e86608d25b55097f413326a5bffc14ef1005 100644 (file)
--- a/timeside/analyzer/limsi_sad.py
+++ b/timeside/analyzer/limsi_sad.py
@@ -88,7 +88,7 @@ class LimsiSad(Analyzer):
      """
      Limsi Speech Activity Detection Systems
      LimsiSad performs frame level speech activity detection based on trained GMM models
-    For each frame, it computes the log likelihood difference between a speech model and a non speech model. 
+    For each frame, it computes the log likelihood difference between a speech model and a non speech model.
      The highest is the estimate, the largest is the probability that the frame corresponds to speech.
      Dilatation and erosion procedures are used in a latter stage to obtain speech and non speech segments
  
@@ -99,9 +99,9 @@ class LimsiSad(Analyzer):
      * sad_segments: speech/non speech segments
      """
      implements(IAnalyzer)
-    
  
-    def __init__(self, sad_model, dews=0.2, speech_threshold=1., dllh_bounds=(-10., 10.)):
+
+    def __init__(self, sad_model='etape', dews=0.2, speech_threshold=1., dllh_bounds=(-10., 10.)):
          """
          Parameters:
          ----------
author	Thomas Fillon <thomas@parisson.com>
	Wed, 1 Oct 2014 10:44:54 +0000 (12:44 +0200)
committer	Thomas Fillon <thomas@parisson.com>
	Wed, 1 Oct 2014 10:44:54 +0000 (12:44 +0200)
timeside/analyzer/irit_diverg.py		patch \| blob \| history
timeside/analyzer/irit_music_.py	[deleted file]	patch \| blob \| history
timeside/analyzer/irit_music_SLN.py		patch \| blob \| history
timeside/analyzer/irit_music_SNB.py		patch \| blob \| history
timeside/analyzer/irit_singings.py		patch \| blob \| history
timeside/analyzer/limsi_diarization.py		patch \| blob \| history
timeside/analyzer/limsi_sad.py		patch \| blob \| history