def __init__(self, blocksize=1024, stepsize=None):
super(IRITDiverg, self).__init__()
- self.parents.append(Waveform())
+ self.parents['waveform'] = Waveform()
self.ordre = 2
@interfacedoc
return frames, eod
def post_process(self):
- audio_data = self.process_pipe.results.get_result_by_id('waveform_analyzer').data
+
+ audio_data = self.parents['waveform'].results['waveform_analyzer'].data
if audio_data.shape[1] > 1:
data = list(audio_data.mean(axis=1))
else:
segs.data_object.label = [s[1] for s in frontieres]
segs.data_object.time = [(float(s[0]) / self.samplerate())
for s in frontieres]
- self.process_pipe.results.add(segs)
+ self.add_result(segs)
return
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr>
-
-# This file is part of TimeSide.
-
-# TimeSide is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# TimeSide is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with TimeSide. If not, see <http://www.gnu.org/licenses/>.
-
-# Author: Maxime Le Coz <lecoz@irit.fr>
-
-from timeside.core import implements, interfacedoc
-from timeside.analyzer.core import Analyzer
-from timeside.analyzer.utils import melFilterBank, computeModulation
-from timeside.analyzer.utils import segmentFromValues
-from timeside.analyzer.irit_diverg import IRITDiverg
-from timeside.api import IAnalyzer
-from numpy import logical_and, array, hamming, dot, mean, float, arange, nonzero
-from numpy.fft import rfft
-from scipy.signal import firwin, lfilter
-from pylab import plot, show
-
-
-class IRITMusicLDN(Analyzer):
- implements(IAnalyzer)
-
- def __init__(self, blocksize=1024, stepsize=None):
- super(IRITMusicLDN, self).__init__()
- self.parents.append(IRITDiverg())
- self.wLen = 1.0
- self.wStep = 0.1
- self.threshold = 20
-
- @staticmethod
- @interfacedoc
- def id():
- return "irit_music_ldn"
-
- @staticmethod
- @interfacedoc
- def name():
- return "IRIT Music Detector - Segment Length"
-
- @staticmethod
- @interfacedoc
- def unit():
- return ""
-
- def __str__(self):
- return "Music confidence indexes"
-
- def process(self, frames, eod=False):
- return frames, eod
-
- def post_process(self):
- '''
- '''
-
- segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time
- w = self.wLen / 2
- end = segList[-1]
- tLine = arange(0, end, self.wStep)
-
- segLen = array([0] * len(tLine))
-
- for i, t in enumerate(tLine):
- idx = nonzero(logical_and(segList > (t - w), segList < (t + w)))[0]
- segLen[i] = len(idx)
-
- #plot(tLine, segLen)
- #show()
- # Confidence Index
- conf = array(segLen - self.threshold) / self.threshold
- conf[conf > 1] = 1
-
- segLenRes = self.new_result(data_mode='value', time_mode='framewise')
- segLenRes.id_metadata.id += '.' + 'energy_confidence'
- segLenRes.id_metadata.name += ' ' + 'Energy Confidence'
-
- segLenRes.data_object.value = segLen
-
- self.process_pipe.results.add(segLenRes)
-
- # Segment
- convert = {False: 0, True: 1}
- label = {0: 'nonMusic', 1: 'Music'}
-
- segList = segmentFromValues(segLen > self.threshold)
- # Hint : Median filtering could imrove smoothness of the result
- # from scipy.signal import medfilt
- # segList = segmentFromValues(medfilt(modEnergyValue > self.threshold, 31))
-
- segs = self.new_result(data_mode='label', time_mode='segment')
- segs.id_metadata.id += '.' + 'segments'
- segs.id_metadata.name += ' ' + 'Segments'
-
- segs.data_object.label_metadata.label = label
-
- segs.data_object.label = [convert[s[2]] for s in segList]
- segs.data_object.time = [tLine[s[0]] for s in segList]
- segs.data_object.duration = [tLine[s[1]] - tLine[s[0]]
- for s in segList]
-
- self.process_pipe.results.add(segs)
- return
def __init__(self, blocksize=None, stepsize=None):
super(IRITMusicSLN, self).__init__()
- self.parents.append(IRITDiverg())
+ self.parents['irit_diverg'] = IRITDiverg()
self.wLen = 1.0
self.wStep = 0.1
self.threshold = 0.05
'''
'''
-
- segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time
+ res_irit_diverg = self.parents['irit_diverg'].results
+ segList = res_irit_diverg['irit_diverg.segments'].time
w = self.wLen / 2
end = segList[-1]
segLenRes.data_object.value = segLen
- self.process_pipe.results.add(segLenRes)
+ self.add_result(segLenRes)
# Segment
convert = {False: 0, True: 1}
segs.data_object.duration = [tLine[s[1]] - tLine[s[0]]
for s in segList]
- self.process_pipe.results.add(segs)
+ self.add_result(segs)
return
def __init__(self, blocksize=1024, stepsize=None, samplerate=None):
super(IRITMusicSNB, self).__init__()
- self.parents.append(IRITDiverg())
+ self.parents['irit_diverg'] = IRITDiverg()
self.wLen = 1.0
self.wStep = 0.1
self.threshold = 20
'''
'''
-
- segList = self.process_pipe.results.get_result_by_id('irit_diverg.segments').time
+ res_irit_diverg = self.parents['irit_diverg'].results
+ segList = res_irit_diverg['irit_diverg.segments'].time
w = self.wLen / 2
end = segList[-1]
tLine = arange(0, end, self.wStep)
segLenRes.data_object.value = conf
- self.process_pipe.results.add(segLenRes)
+ self.add_result(segLenRes)
# Segment
convert = {False: 0, True: 1}
segs.data_object.duration = [tLine[s[1]] - tLine[s[0]]
for s in segList]
- self.process_pipe.results.add(segs)
+ self.add_result(segs)
return
@interfacedoc
def __init__(self):
super(IRITSingings, self).__init__()
- self.parents.append(IRITMonopoly())
+ self.parents['irit_monopoly'] = IRITMonopoly()
self.block_read = 0
self.pitches = []
"""
"""
- preproc = self.process_pipe.results.get_result_by_id('irit_monopoly.segments').data_object
- labels = self.process_pipe.results.get_result_by_id('irit_monopoly.segments').data_object.label_metadata['label']
- segments_monopoly = [(start, duration, labels[label])for start, duration, label in zip(preproc.time,
- preproc.duration,
- preproc.label)]
+ monopoly_results = self.parents['irit_monopoly'].results
+ preproc = monopoly_results['irit_monopoly.segments'].data_object
+ labels = preproc.label_metadata['label']
+ segments_monopoly = [(start, duration, labels[label])
+ for start, duration, label
+ in zip(preproc.time, preproc.duration,
+ preproc.label)]
segments_chant = []
for start, duration, label in segments_monopoly:
cumulChant = 0
for seg in segs:
if has_vibrato(seg[2], f0_frame_rate):
cumulChant += seg[1]-seg[0]
- segments_chant += [(start, duration, cumulChant/duration >= self.thMono)]
+ segments_chant += [(start, duration,
+ cumulChant/duration >= self.thMono)]
elif label == 'Poly':
pass
return
-
class SinusoidalSegment(object):
"""
class LimsiDiarization(Analyzer):
implements(IAnalyzer)
- def __init__(self, sad_analyzer = None, gdiff_win_size_sec=5., min_seg_size_sec=2.5, bic_penalty_coeff=0.5):
+ def __init__(self, sad_analyzer=None, gdiff_win_size_sec=5.,
+ min_seg_size_sec=2.5, bic_penalty_coeff=0.5):
super(LimsiDiarization, self).__init__()
self.gdiff_win_size_sec = gdiff_win_size_sec
if sad_analyzer is None:
sad_analyzer = LimsiSad('etape')
self.sad_analyzer = sad_analyzer
- self.parents.append(sad_analyzer)
+ self.parents['sad_analyzer'] = sad_analyzer
# feature extraction defition
spec = yaafelib.FeaturePlan(sample_rate=16000)
spec.addFeature('mfccchop: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256')
- parent_analyzer = Yaafe(spec)
- self.parents.append(parent_analyzer)
+ self.parents['yaafe'] = Yaafe(spec)
# informative parameters
# these are not really taken into account by the system
def post_process(self):
# extract mfcc with yaafe and store them to be used with pyannote
- mfcc = self.process_pipe.results.get_result_by_id('yaafe.mfccchop')['data_object']['value']
+ print self.parents['yaafe'].results.keys()
+ res_yaafe = self.parents['yaafe'].results['yaafe.mfccchop']
+ mfcc = res_yaafe.data_object.value
- sw = YaafeFrame(self.input_blocksize, self.input_stepsize, self.input_samplerate)
+ sw = YaafeFrame(self.input_blocksize, self.input_stepsize,
+ self.input_samplerate)
pyannotefeat = SlidingWindowFeature(mfcc, sw)
# gaussian divergence window size
min_seg_size_frame = int(self.min_seg_size_sec / timestepsize)
# speech activity detection
- sadval = self.process_pipe.results.get_result_by_id(self.sad_analyzer.id() + '.sad_lhh_diff').data_object.value[:]
+ sad_analyzer = self.parents['sad_analyzer']
+ res_sad = sad_analyzer.results['limsi_sad.sad_lhh_diff']
+ sadval = res_sad.data_object.value[:]
# indices of frames detected as speech
speech_threshold = 0.
- frameids = [i for i, val in enumerate(sadval) if val > speech_threshold]
+ frameids = [i for i, val in enumerate(sadval)
+ if val > speech_threshold]
# compute gaussian divergence of speech frames only
- gdiff = gauss_div(mfcc[frameids,:], gdiff_win_size_frame)
+ gdiff = gauss_div(mfcc[frameids, :], gdiff_win_size_frame)
# initial segmentation based on gaussian divergence criterion
seg = segment(gdiff, min_seg_size_frame)
duration[-1] = t + d - time[-1]
lastlabel = l
-
+
# store diarisation result
diar_res = self.new_result(data_mode='label', time_mode='segment')
diar_res.id_metadata.id += '.' + 'speakers' # + name + 'diarisation'
diar_res.label_metadata.label = dict()
for lab in diar_res.data_object.label:
diar_res.label_metadata.label[lab] = str(lab)
-
- self.process_pipe.results.add(diar_res)
+
+ self.add_result(diar_res)
"""
Limsi Speech Activity Detection Systems
LimsiSad performs frame level speech activity detection based on trained GMM models
- For each frame, it computes the log likelihood difference between a speech model and a non speech model.
+ For each frame, it computes the log likelihood difference between a speech model and a non speech model.
The highest is the estimate, the largest is the probability that the frame corresponds to speech.
Dilatation and erosion procedures are used in a latter stage to obtain speech and non speech segments
* sad_segments: speech/non speech segments
"""
implements(IAnalyzer)
-
- def __init__(self, sad_model, dews=0.2, speech_threshold=1., dllh_bounds=(-10., 10.)):
+
+ def __init__(self, sad_model='etape', dews=0.2, speech_threshold=1., dllh_bounds=(-10., 10.)):
"""
Parameters:
----------