merge dev into diadems

author Thomas Fillon <thomas@parisson.com>

Tue, 21 Oct 2014 18:42:09 +0000 (20:42 +0200)

committer Thomas Fillon <thomas@parisson.com>

Tue, 21 Oct 2014 18:42:09 +0000 (20:42 +0200)
author Thomas Fillon <thomas@parisson.com>
Tue, 21 Oct 2014 18:42:09 +0000 (20:42 +0200)
committer Thomas Fillon <thomas@parisson.com>
Tue, 21 Oct 2014 18:42:09 +0000 (20:42 +0200)
diff --cc setup.py

index 052b149d552d70de67b3c4f9e896b3b079594b5a,ea28ff22073803d2b60039ab0f687233b4064a40..f92588269dba5e6ada99a62a9fbd6d376556fde0
--- 1/setup.py
--- 2/setup.py
+++ b/setup.py
@@@ -9,10 -9,9 +9,10 @@@ from setuptools.command.test import tes
   
   # Pytest
   class PyTest(TestCommand):
+ +
       def finalize_options(self):
           TestCommand.finalize_options(self)
-         self.test_args = ['tests', '--ignore', 'tests/sandbox']
+         self.test_args = ['tests', '--ignore', 'tests/sandbox', '--verbose']
           self.test_suite = True
   
       def run_tests(self):
diff --cc timeside/analyzer/externals/aubio_melenergy.py

index 0000000000000000000000000000000000000000,a22055fb68eed010298621e2bb91ae32bc648434..1fcfe485062ffc6beb673f07c894b2f75ecb96d5

mode 000000,100644..100644
--- /dev/null
--- 2/timeside/analyzer/externals/aubio_melenergy.py
+++ b/timeside/analyzer/externals/aubio_melenergy.py
@@@ -1,0 -1,82 +1,81 @@@
- -
+ # -*- coding: utf-8 -*-
+ #
+ # Copyright (c) 2013 Paul Brossier <piem@piem.org>
+ 
+ # This file is part of TimeSide.
+ 
+ # TimeSide is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation, either version 2 of the License, or
+ # (at your option) any later version.
+ 
+ # TimeSide is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ 
+ # You should have received a copy of the GNU General Public License
+ # along with TimeSide.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ # Author: Paul Brossier <piem@piem.org>
+ from __future__ import absolute_import
+ 
+ from ...core import implements, interfacedoc
+ from ..core import Analyzer
+ from ...api import IAnalyzer
+ from ..preprocessors import downmix_to_mono, frames_adapter
+ from aubio import filterbank, pvoc
+ 
+ class AubioMelEnergy(Analyzer):
+ 
+     """Aubio Mel Energy analyzer"""
+     implements(IAnalyzer)
+ 
+     def __init__(self):
+         super(AubioMelEnergy, self).__init__()
+         self.input_blocksize = 1024
+         self.input_stepsize = self.input_blocksize / 4
+ 
+     @interfacedoc
+     def setup(self, channels=None, samplerate=None,
+               blocksize=None, totalframes=None):
+         super(AubioMelEnergy, self).setup(
+             channels, samplerate, blocksize, totalframes)
+         self.n_filters = 40
+         self.n_coeffs = 13
+         self.pvoc = pvoc(self.input_blocksize, self.input_stepsize)
+         self.melenergy = filterbank(self.n_filters, self.input_blocksize)
+         self.melenergy.set_mel_coeffs_slaney(samplerate)
+         self.block_read = 0
+         self.melenergy_results = []
+ 
+     @staticmethod
+     @interfacedoc
+     def id():
+         return "aubio_melenergy"
+ 
+     @staticmethod
+     @interfacedoc
+     def name():
+         return "Mel Energy (aubio)"
+ 
+     @staticmethod
+     @interfacedoc
+     def unit():
+         return ""
+ 
+     @downmix_to_mono
+     @frames_adapter
+     def process(self, frames, eod=False):
+ 
+         fftgrain = self.pvoc(frames)
+         self.melenergy_results.append(self.melenergy(fftgrain))
+         self.block_read += 1
+         return frames, eod
+ 
+     def post_process(self):
+         melenergy = self.new_result(data_mode='value', time_mode='framewise')
+         melenergy.parameters = dict(n_filters=self.n_filters,
+                                     n_coeffs=self.n_coeffs)
+         melenergy.data_object.value = self.melenergy_results
+         self.add_result(melenergy)
diff --cc timeside/analyzer/irit_noise_startSilences.py
Simple merge
diff --cc timeside/analyzer/limsi_sad.py

index 73969655189ee16efe1490aa89ec6d745b08dccc,7762f96062ba4bbf441f354d148ad21b195d4624..ce492d1f97dabac5674e2e4139d177bc7a331e69
--- 1/timeside/analyzer/limsi_sad.py
--- 2/timeside/analyzer/limsi_sad.py
+++ b/timeside/analyzer/limsi_sad.py
@@@ -30,11 -30,15 +30,17 @@@ import numpy as n
   import pickle
   import os.path
   
+ # Require Yaafe
+ if not timeside._WITH_YAAFE:
+     raise ImportError('yaafelib must be missing')
+ 
   
   class GMM:
++
       """
       Gaussian Mixture Model
       """
++
       def __init__(self, weights, means, vars):
           self.weights = weights
           self.means = means
@@@ -47,9 -51,9 +53,9 @@@
                         - 2 * np.dot(x, (self.means / self.vars).T)
                         + np.dot(x ** 2, (1.0 / self.vars).T))
           + np.log(self.weights)
--        m = np.amax(llh,1)
++        m = np.amax(llh, 1)
           dif = llh - np.atleast_2d(m).T
--        return m + np.log(np.sum(np.exp(dif),1))
++        return m + np.log(np.sum(np.exp(dif), 1))
   
   
   def slidewinmap(lin, winsize, func):
@@@ -65,19 -69,19 +71,22 @@@
       winsize: size of the sliding windows in samples (int)
       func: function to be mapped on sliding windows
       """
--    tmpin = ([lin[0]] * (winsize/2)) + list(lin) + ([lin[-1]] * (winsize -1 - winsize/2))
++    tmpin = ([lin[0]] * (winsize / 2)) + list(lin) + \
++        ([lin[-1]] * (winsize - 1 - winsize / 2))
       lout = []
       for i in xrange(len(lin)):
--        lout.append(func(tmpin[i:(i+winsize)]))
++        lout.append(func(tmpin[i:(i + winsize)]))
       assert(len(lin) == len(lout))
       return lout
   
++
   def dilatation(lin, winsize):
       """
       morphological dilation
       """
       return slidewinmap(lin, winsize, max)
   
++
   def erosion(lin, winsize):
       """
       morphological erosion
@@@ -86,6 -90,6 +95,7 @@@
   
   
   class LimsiSad(Analyzer):
++
       """
       Limsi Speech Activity Detection Systems
       LimsiSad performs frame level speech activity detection based on trained GMM models
@@@ -145,10 -149,10 +155,11 @@@
           super(LimsiSad, self).__init__()
   
           # feature extraction defition
--        feature_plan = ['mfcc: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256',
--                        'mfccd1: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=1',
--                        'mfccd2: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=2',
--                        'zcr: ZCR blockSize=1024 stepSize=256']
++        feature_plan = [
++            'mfcc: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256',
++            'mfccd1: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=1',
++            'mfccd2: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=2',
++            'zcr: ZCR blockSize=1024 stepSize=256']
           yaafe_analyzer = get_processor('yaafe')
           self.parents['yaafe'] = yaafe_analyzer(feature_plan=feature_plan,
                                                  input_samplerate=16000)
@@@ -205,7 -209,7 +216,8 @@@
           features = np.concatenate((mfcc, mfccd1, mfccd2, zcr), axis=1)
   
           # compute log likelihood difference
--        res = 0.5 + 0.5 * (self.gmms[0].llh(features) - self.gmms[1].llh(features))
++        res = 0.5 + 0.5 * \
++            (self.gmms[0].llh(features) - self.gmms[1].llh(features))
   
           # bounds log likelihood difference
           if self.dllh_bounds is not None:
@@@ -213,8 -217,8 +225,10 @@@
               res = np.minimum(np.maximum(res,  mindiff), maxdiff)
   
           # performs dilation, erosion, erosion, dilatation
--        ws = int(self.dews * float(self.input_samplerate ) / self.input_stepsize)
--        deed_llh = dilatation(erosion(erosion(dilatation(res, ws), ws), ws), ws)
++        ws = int(
++            self.dews * float(self.input_samplerate) / self.input_stepsize)
++        deed_llh = dilatation(
++            erosion(erosion(dilatation(res, ws), ws), ws), ws)
   
           # infer speech and non speech segments from dilated
           # and erroded likelihood difference estimate
@@@ -222,7 -226,7 +236,8 @@@
           labels = []
           times = []
           durations = []
--        for i, val in enumerate([1 if e > self.speech_threshold else 0 for e in deed_llh]):
++        for i, val in enumerate([1 if e > self.speech_threshold else 0
++                                 for e in deed_llh]):
               if val != last:
                   labels.append(val)
                   durations.append(1)
@@@ -230,32 -234,32 +245,40 @@@
               else:
                   durations[-1] += 1
               last = val
--        times = [(float(e) * self.input_stepsize) / self.input_samplerate for e in times]
--        durations = [(float(e) * self.input_stepsize) / self.input_samplerate for e in durations]
--
++        times = [(float(e) * self.input_stepsize)
++                 / self.input_samplerate for e in times]
++        durations = [(float(e) * self.input_stepsize)
++                     / self.input_samplerate for e in durations]
   
--        # outputs the raw frame level speech/non speech log likelihood difference
++        # outputs the raw frame level speech/non speech log likelihood
++        # difference
           sad_result = self.new_result(data_mode='value', time_mode='framewise')
           sad_result.id_metadata.id += '.' + 'sad_lhh_diff'
--        sad_result.id_metadata.name += ' ' + 'Speech Activity Detection Log Likelihood Difference'
++        sad_result.id_metadata.name += ' ' + \
++            'Speech Activity Detection Log Likelihood Difference'
           sad_result.data_object.value = res
           self.add_result(sad_result)
   
           # outputs frame level speech/non speech log likelihood difference
           # altered with erosion and dilatation procedures
--        sad_de_result = self.new_result(data_mode='value', time_mode='framewise')
++        sad_de_result = self.new_result(
++            data_mode='value', time_mode='framewise')
           sad_de_result.id_metadata.id += '.' + 'sad_de_lhh_diff'
--        sad_de_result.id_metadata.name += ' ' + 'Speech Activity Detection Log Likelihood Difference | dilat | erode'
++        sad_de_result.id_metadata.name += ' ' + \
++            'Speech Activity Detection Log Likelihood Difference | dilat | erode'
           sad_de_result.data_object.value = deed_llh
           self.add_result(sad_de_result)
   
           # outputs speech/non speech segments
--        sad_seg_result = self.new_result(data_mode='label', time_mode='segment')
++        sad_seg_result = self.new_result(
++            data_mode='label', time_mode='segment')
           sad_seg_result.id_metadata.id += '.' + 'sad_segments'
--        sad_seg_result.id_metadata.name += ' ' + 'Speech Activity Detection Segments'
++        sad_seg_result.id_metadata.name += ' ' + \
++            'Speech Activity Detection Segments'
           sad_seg_result.data_object.label = labels
           sad_seg_result.data_object.time = times
           sad_seg_result.data_object.duration = durations
--        sad_seg_result.data_object.label_metadata.label = {0: 'Not Speech', 1: 'Speech'}
++        sad_seg_result.data_object.label_metadata.label = {
++            0: 'Not Speech', 1: 'Speech'}
   
           self.add_result(sad_seg_result)
author	Thomas Fillon <thomas@parisson.com>
	Tue, 21 Oct 2014 18:42:09 +0000 (20:42 +0200)
committer	Thomas Fillon <thomas@parisson.com>
	Tue, 21 Oct 2014 18:42:09 +0000 (20:42 +0200)
		1	2
setup.py	patch \|	diff1 \|	diff2 \|	blob \| history
timeside/analyzer/externals/aubio_melenergy.py	patch \|	\|	diff2 \|	blob \| history
timeside/analyzer/irit_noise_startSilences.py	patch \|	diff1 \|	diff2 \|	blob \| history
timeside/analyzer/limsi_sad.py	patch \|	diff1 \|	diff2 \|	blob \| history