From: Maxime LE COZ Date: Tue, 25 Feb 2014 12:09:11 +0000 (+0100) Subject: New Start session localizer X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=a8c039899bc0fba9ec1eae88c0a1126b5fa499db;p=timeside.git New Start session localizer IRIT speech & music detector modified to use frame_adapter --- diff --git a/timeside/analyzer/irit_noise_startSilences.py b/timeside/analyzer/irit_noise_startSilences.py new file mode 100644 index 0000000..2128bbc --- /dev/null +++ b/timeside/analyzer/irit_noise_startSilences.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2013 Maxime Le Coz + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . +# Author: Maxime Le Coz + +from timeside.analyzer.utils import segmentFromValues +from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter +from timeside.analyzer.core import Analyzer +from timeside.analyzer.preprocessors import frames_adapter +from timeside.api import IAnalyzer +from aubio import pitch +import numpy +from scipy.signal import firwin,lfilter +from scipy.ndimage.morphology import binary_opening,binary_closing +import pylab + + + + +class IRITStartSeg(Analyzer): + implements(IAnalyzer) + ''' + Segmentor MOnophony/Polyphony based on the analalysis of yin confidence. + + Properties: + ''' + + @interfacedoc + def setup(self, channels=None, samplerate=None, + blocksize=None, totalframes=None): + + super(IRITStartSeg, self).setup(channels, + samplerate, + blocksize, + totalframes) + lowFreq = 100.0 + + self.input_blocksize = int(0.02 * samplerate) + self.input_stepsize = int(0.008 * samplerate) + + + sr = float(samplerate) + highFreq = sr/2 + f1= lowFreq/sr + f2= highFreq/sr + self.filtre = firwin(10, [f1,f2], pass_zero=False) + self.energy = [] + self.maxenergy = 0.002 + self.min_overlap = 20 + self.threshold = 0.1 + @staticmethod + @interfacedoc + def id(): + return "irit_monopoly" + + @staticmethod + @interfacedoc + def name(): + return "IRIT Monophony / Polyphony classification" + + @staticmethod + @interfacedoc + def unit(): + return "" + + def __str__(self): + return "Labeled Monophonic/Polyphonic segments" + + @frames_adapter + def process(self, frames, eod=False): + ''' + + ''' + + self.energy += [numpy.sqrt(numpy.mean(lfilter(self.filtre,1.0,frames.T[0])**2))] + return frames, eod + + def post_process(self): + ''' + + ''' + self.energy = numpy.array(self.energy)/max(self.energy) + silences = numpy.zeros((1,len(self.energy)))[0] + silences[self.energy= m2 : + fin = numpy.min([l1-decal,l2]) + if fin-decal > min_overlap: + + v1_out = numpy.array(v1[decal:decal+fin]) + v2_out = numpy.array(v2[:fin]) + d = numpy.mean(numpy.abs(v1_out-v2_out)) + else : + v1_out = [0] + v2_out = [1] + d = 1 + else : + return computeDist(v2, v1,min_overlap) + + + return d,v1_out,v2_out + diff --git a/timeside/analyzer/protoStart.dat b/timeside/analyzer/protoStart.dat new file mode 100644 index 0000000..463f177 Binary files /dev/null and b/timeside/analyzer/protoStart.dat differ diff --git a/timeside/analyzer/protoStart2.dat b/timeside/analyzer/protoStart2.dat new file mode 100644 index 0000000..3f06127 Binary files /dev/null and b/timeside/analyzer/protoStart2.dat differ diff --git a/timeside/analyzer/protoStart3.dat b/timeside/analyzer/protoStart3.dat new file mode 100644 index 0000000..718c07d Binary files /dev/null and b/timeside/analyzer/protoStart3.dat differ