From b32be77ebd1d41c4ad050ad7e27b88461ef7accf Mon Sep 17 00:00:00 2001 From: Dominique Fourer Date: Fri, 12 Dec 2014 22:05:48 +0100 Subject: [PATCH] New Analyzer for instrument detection and new timbre descriptors --- timeside/analyzer/labri/.directory | 3 + timeside/analyzer/labri/butter3.mat | Bin 0 -> 265 bytes timeside/analyzer/labri/model_inst.mat | Bin 0 -> 6999 bytes timeside/analyzer/labri/my_lda.py | 147 +++ timeside/analyzer/labri/my_tools.py | 445 +++++++ timeside/analyzer/labri/swipep.py | 215 ++++ timeside/analyzer/labri/timbre_descriptor.py | 1123 ++++++++++++++++++ timeside/analyzer/labri_instru.py | 130 ++ 8 files changed, 2063 insertions(+) create mode 100644 timeside/analyzer/labri/.directory create mode 100644 timeside/analyzer/labri/butter3.mat create mode 100644 timeside/analyzer/labri/model_inst.mat create mode 100644 timeside/analyzer/labri/my_lda.py create mode 100644 timeside/analyzer/labri/my_tools.py create mode 100644 timeside/analyzer/labri/swipep.py create mode 100644 timeside/analyzer/labri/timbre_descriptor.py create mode 100644 timeside/analyzer/labri_instru.py diff --git a/timeside/analyzer/labri/.directory b/timeside/analyzer/labri/.directory new file mode 100644 index 0000000..41967b9 --- /dev/null +++ b/timeside/analyzer/labri/.directory @@ -0,0 +1,3 @@ +[Dolphin] +Timestamp=2014,12,12,21,31,55 +ViewMode=2 diff --git a/timeside/analyzer/labri/butter3.mat b/timeside/analyzer/labri/butter3.mat new file mode 100644 index 0000000000000000000000000000000000000000..129d25a226a97ed620f51852baf66ae04aed2b4f GIT binary patch literal 265 zcmeZu4DoSvQZUssQ1EpO(M`+DN!3vZ$Vn_o%P-2cQgHY2i?A@$QE)CwO)N=GQOM7; zQV7W?Rd7j7RxmVFFf_F?HnB1?P%ttuG*KWGFfe-h@-r|nm;!OdoX5!t2^$K4E5yO+ETyH^|UGQq|foPUU oc+k9eig8{bp`T(qiE+Uy@~?5)^nHSEpYKRUTOh|nsi zr~!op*l1;3t<2o5ENPt_MQF9HJ!oaEENHp8X?eIr`1nNl`TiWZ`Dp)t7ZS3fDn1et zlIt@hB=5{I9WU+oUN73n6&JX12ub_U9J``lzs6z-ebyHOeoM{p4*gY8UES>5qKJWE z-7H>%kmySNQXbVfo&%N9(xzkGNPgY+HQ&^YTKo`ACUubTb-@0J=sE&%jA*~kMt>V2 znMy?zd>Z#m=!^K)(6~ z%X*-)J>;s|Wt&lq55B3uAbz*7!<-<=?Xh%0!3z?V@*@ zYvb+_Z!~2n&IlBR6^d2pg1)}#_Yh3uLbmU0eB?`*Mj(w_sx_kV3I22~2RW77gLX3& z)OBut_Hs*uoLc2|+LlQP&zV!rSv(UkCpPZ0^QLk#<*hemtv3_(G-dSMRBeq|T?lJ{ zWys^EoHh!f@uz9#-rfKLZS2@gYpTU4Old!D+oi2#Pu8P5q`&*rUq>OOv zT6Sf%yYKxP({(PdV%{_dK~Hnpz@2^Pr&uchC6pUn#SDfD8&=V8XLRb?j|te1c-l|M zdYURQK#7wVGnkLMwLPr_3%IjZ7sJFex{OCH$^2^Y@2hEQ7IU7Z_k;j?y8zuQO4k$9 zUfl0CO@0K^74&k5eeEq9H0dY$y6UPmv_Gyezpu5%DQzmG;7q6B!gpR|A%R_VZ2*dr zF3beey>C|qwSTYU*{7QiYj=)l7h2>bbH%~stB$hJS0Ns&og5}w0xF4)2*q#9O$E8; zpCtd4d1`;QcjmT*sc96Fxd826@K4%KtX*CdD0=!wv%y$(){miQ?6=cDEDKT?unUEh z4~GXDiZl9W6v$*bvvu;z>w*VFCYgr#y_(si$4e7g;uDKB_Uca} zN%Z>FvNMD|w9A$wFZEe0MWdddODq}|$h}siTY65IVx8$$akBFCP;uYvgp3s-Z77$= zc0WM+_37Rz^>KdpF*&kZ0qW{`VR(CTGZ7RtwH5~TEWasHHwA51@|2B_yYH@DK07pv zxB2;w_tA#jBH%4iUdsGaL~A*V>@KBTBIhwNh}>(0GFHClT!s(7;h@F2VpyH=@Lb>D z)WzqL6~XXYgIAB!wue6U*8S+R+(F(O$7$FwT+S@ES3IPVShsQHtL?lbQMiXH|9oFl zO0SB^$-xBW51Eg0FGI#_ucCr`|pMYTZ$UyTu>$}Ed3pO7jJHfYAj-4Z`%s|n@md;Rs0PfXbt zqTnN+YRqt6T*(e;eGEV2A83>wQ|P?Ytew6#O5RX++gR>;gYq)q8jUn|xHE=(Ui9lB zItb%^P4R^+p!0TTm5fbet`>Cm*c$Qyvn>Kwc>Hv7cZLH(H4EHW=#J-C42^DqB`Ngj zKNo8f&lI8HB2zWddYvk3xdLSOYabwIwX0cgH#xwo7&UftFcQ@^-4k5)$EBM)n;$U2 zJ(F^i6t*E2z&-!Wq@METHgDudc;u!<+-*!~7eg6cS-D<1kDW6E)#rjpH(Z$w7=WfSBuapRGqF~2a<0Q*S?cC_LnMwgn2<{_-7!uqrPv%#txm&vGM`4%g; zLZj(aslJ^w2!ADvR)u9{r)3Kb39kH_)QGS3b+p#;!;t#MeT@HWWS^PT^|faPI#_f?Z+u#01CGs8rfwzUU- z1d{OsJe&zRxEQfuN$wv&GU$V0`AOxKeZm`T_?!2vH&{@5L~NY?25u|8=YHAMYj9a? zG|;5S=~E>&c4e!nE8tAT5`VOE0VRd94g)9(8VWfFE6AX#OG!C~NOOtxmsqRIG-yc- z>bjI{O(}(+!ITjBu!-Em>9hQCX1$AtGB4OdoqMD2jX>DFF3y;5AoYoZ>guI0Yjq49mb~Ai}Rw+{%J~WizLVSvMu*h`}W7IvM(+IiJtH z(ot+r7xlUrU%R{8z%yDomr`ZtD|J0?^Cy+ub3-F|ukI=qf!20ibRN=? zV)AD)ZaZKst~aYGGphaSb#31Z>QNC}j|~5I;vR-$962uENzUXTNbQyY2@@G;yu&5J zv&2MqyqK+Ox@PRUK8vAHg?=~m3k+JVZ)tWuiLxo>^vmHF;EiPMdM;1+3RJz|z}&Bh zSEC-i99~3;;bHaEvK#{m$+D+l5C&jSNkmvz@sgTrtx*pN4Rs5;r-dNINm}1^aC;ar z&}iQWRa8w{ZsdKR!svPL=2CQ7tU0qvax^beJg1fYyznV8jykz;`&q(FMR>E=mlnO! zZR;X|k_RcMQxG|>=Ev`S+ZMjg4@Tc!TB-6;lz%7GrK3#947eKNM70)jQ|HooyQ3bp zz?VZXr4WI34XA-(#;4OJ4xF>j#sE%)zYUmdgd1(vBxt8z5J30JOS6V|rhM#nqsliO zHH2-n@_K-@1$@;#U|WOeuerHE8(e<(GkBb}sAgz)NT~i^4DpY?K1OWL=UxQE_NA8` zl@U?Ichy%I7hI#4$~z1GNH~LCOdf~;Y2_y8cDNSn?PV4?ybD4L7(jI@ClcSuc$w1V zVnSfQazf7%7*LzZLHxpYKfoDVP!?rr`Kav zZt9^M~JvYqh*+G<3WhbyY;! zSPqUt%ma$zM0{=42EuBsZE>dpUSQar1x?I6SrUo2a#OjM&gRvj%E6ixxP+DuyY~6b zb0z$KZQa$IZN;b^#S99w+TW*03oaRjaL(2g@Ei)f0a=Z>nh&D9&!oG>wsN%altC*5 zd!phT-q3JtH$_<#h8eY=s}r-$jR-DcC=bc|dR#B`BSn!;TRGlP2WQCRYhQe3gpSd9 z&7zTe#2&gg`3AJ2FL=sn!#%w$l`U&8(iyu~8Pj<2%C4V@uK@xN#$@8@jn-)~TXHpyYW@)|1!CnG(K{JL{gxD|1n;RbIq_zjv0ww%b zHBG|%i}}xxBh#gp%9?nkjOSeOJ~svn&p{tMM0dOVxwTcTyV1<&TK41>eYbZLK67`K zv3d^}i0Cj}0ifnS)qVJ$uMB7=zA;butBDox&HH_zp>++{Z7M-{_T9Z=k?}|;2NlDy zwWip6S?Y7HO1D;lBKmF}9C_5SzWhXKY-N$Vqv`k0Dlc7(hgWT<#ISL(I8X6P;Tw9F zU6Et!Dw{<$(seqk3eqQ}CK^2x>XvHB?Pk?g#IbI=1jN;Q1YZ~IuHyMIL17NkODgnI z_IGgnppy<~V!hM1k0pHR?_HT1fy4=XFz6TC12IRcYvQ5>R>sVO%KKo#|QU0wfFL@^%FC*Bm?D~j) zPk8@WR&Q!>2bZ|jwIw{n`A%QR`rT61f$#BBmn(tp9}hl3BCbMh?;Bvedkr~L$0ZD{ zD>gwq+bW#feT4V3N-ywj?%>Bx&4)0GK+dkUyi?|ZRiXrhLaFfp+m>LJOzN4hyP+4uHu!ZKYgUcs`^St(}q2GLfGw| zastbaPNx;N&0_Mr*wSyQv_&*S6#?i7`e+hbUHY_xSkAl9Wy->VtjioUX*5^SNP}c< zkvdus{(q4q^Y4KnE7gZ@)`w;QLXyJsE(Qbg#kN}Dr;0>U))yr?0|I8fYB}bgW8V-4 zhs|L}R!tSoLuYj)((h7V(3ua`yD3!PuJ8rK|y==+rP^A&Oh*+Yi%GR!B3p=PbMAe8z)Y@l0i!F;Ox$K zG}@Lw0qFCEox5*JJU=BEJa5crsgdOgtZ8*VSgo~C{=(`U z2f3}51Zhg_e9*g$C>4)Jt2hNmBEI7cf>V!9#X!h`%R41l^*I=q4tbozR_Y<%>%;b% zl4L2w0fXW%?M!{_aDzeWwO)3{JoZcWp3Xh)sA6pp$_6!Fzy(fyJg?RH^Q)fBUowt% zz(sW26u(?Dx#okifU0a}4d#yocrvpEerm=_jE&fTdyTt!6~fPY4XcU*PKw$gf_#+} z#_*aTp*qInPy`p_UE+I{!x|qJP2~7_9+CKL&o|z9jsG=`FvW=gx~PK}*T539ATNGB zmCje~^`qhvgt^Xp=Vv18Y($b79tRQ!bZZl_xIWL!quRW_Oa{vmJr~{8w|n@aTHPGf zyju7eOv~6b`2bTQ8hccD+ng&GQ|3{fI6c!k0Bt}Rpr{asO-pjVvpUc=u0fdCdRz{CackpP z{Mi4@<|}^FZ!bnCHGzTdj~OO8g0rYn`6>4op#u0@sVTY*N7FW6p6;f-<>&9KeWmI$ z%f}v3-SY8;l~r|IdZLX9Zk=&#gGfR`5huy~j1vScq!DKK-T~3S26jf5?=rL{N24Wn zzLfW~lp*XoTm03V{i--I6K&$ym$Q^yAurj09s8Osrhj|!Exp>iS)_ZXyCW_%6i2@- zBLAT54Me@}HL9Z7lcjN`Qc{K~&$8sLevqo|55@c+61wdK?p#;<3IFzcBX3wYqri#x zqwsQqF9r}+BKd%oFWovsm{#nOR{8U-@otPFe*$Zg0RD~3^~}dD<9a9fz1!6fo?qJS zdQh6ax;eu&Ndy~!L-Am--GK6~7{mHOLXNQ9&Oox^)kI$M$Mf1v_Y(wNUAh@T7yMO< z`)rT6KJ_06Q=NW7`)h&DmJi1CoH?lCvmgj$xXttnM>CJ}licLc`&4~}~U zl-hp?bFKYFlZQ?Z756L*r4L8&3!MREd7lLDr=F2HL}dHzGLM!=qy|w}VEUhwKQIuh zsi}wyem?ssI+4N_4M-lUjr9!aKNC4H27zqJ|0ZgtRN8!uZL@f~P#@i(UQ3Ul$K_HY zFL|-J%YW=2ocmUAB4`;@^bKs7mlfXc2m5VtEa3n7%ccXVbL79e`;$guQn{VU`k!~9v=CnKpz3R~;2 zvkzYQ!;yfALEyHZ0m}=V6Sj0vvih*-r?ds#mCz+Oua**qfWfqq{w#lA!o;A^IY{aV z#P!dUNxK8kjx3}_x8oEtKSYhfA5!?gVzk4YBn>bE*rV6{yf2p-cLQn8BgsROohELrN3`p z^k4ik$G0T6L*0ZE85WrO0|ByG&yJ)2VeX+e##emqm!JSA*E!Lk(mD!cL2$5(-mIVO zsIm1HLKRQtvTeT?Kb0tt_<*9W8I6lj5Bk1?He%(Yrlq>^- zSp$LsF!>V@DYdF^1`Dme)2G-_?+nYr;j}3zXH)CD$CM(J|H-_c9abOA>enzN{rW~i zzv)HqU4ZaI`~?%;gjyE7_D){@BNMz_ArlyJM8P(1$~H8dapQB|2Ncu!QT{Ko zbAk#<1isn?q*LIX{=X8q@8+YJxE9&9xDFm`z^Nn+M2q2tZC$^oG=p1CoHs`E<+rWr zu)+Ha)C(G+#4?MF(w{5B$(=!TwK(Je=R`Ak2$Fd8dsuzT09KvIOka#>KkaX zMddi{zEjdhwg3j1X1JoT&3eGsZC|RHx69dyRuSiQ&)>Dm>q$z{xcV~&4*h(A6+&S2 z-DgA^Vak*@aI!V2Y092=knTbi-SRB`rWu%IEqr`!CqcY#+G^xlUKVRSNJ{VG($F zyknxMvRlefVNzr6b;M3TFWSMZOHP{fb?odKuY&_3L>a;^4vV&5u&2@F(V>v9Fw9UC zq$udp>t7iaR^qn2EQtKZ*ambS>MRObqd{e^+#_eT;bNw74PkBFp_)w&Uc7s&0GDA( zgnUnc=`+igX)``e4!q3|`pG*f6N-n%!JtMin?UfFGSsf&e2XhZAXmyZRQ1T)VUGEc z&pQU9FzZ-B1pRjjvI{fI?6`gYgJFX_bkSvC-%}&~d}H?P(Y$DIanhQWLNWDel7^pk z4D}zO73|{>PSVcCwh*1R?HPyC+EN<1d22rZx0xY+eq@rBtA*AkA|U#sW9NVC;ApKR PjO>!6+oWcaZT0^Ee>}(% literal 0 HcmV?d00001 diff --git a/timeside/analyzer/labri/my_lda.py b/timeside/analyzer/labri/my_lda.py new file mode 100644 index 0000000..3a84152 --- /dev/null +++ b/timeside/analyzer/labri/my_lda.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +# +# my_lda.py +# +# Copyright (c) 2014 Dominique Fourer + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: D Fourer http://www.fourer.fr + +import numpy +import my_tools as mt + + # test unitaire + # = v, d, data,indices = my_lda.test_my_lda() +def test_my_lda(): + + d = 2; + ##### MATLAB COMPARISON + #a = [0.5377 0.8622 -0.4336; 1.8339 0.3188 0.3426; -2.2588 -1.3077 3.5784]; + #b = [2.7694 0.7254 -0.2050;-1.3499 -0.0631 -0.1241; 3.0349 0.7147 1.4897]; + a = numpy.array( [[0.5377, 0.8622, -0.4336],\ + [1.8339, 0.3188, 0.3426],\ + [-2.2588, -1.3077, 3.5784]]); + b = numpy.array( [[2.7694, 0.7254, -0.2050],\ + [-1.3499, -0.0631, -0.1241],\ + [3.0349, 0.7147, 1.4897]]); + #indices = [1 3;4 6]; + indices = numpy.array([[0,2],[3,5]]); + + # data = [a;b]; + data = numpy.concatenate((a,b)); + v,d,l = lda(data, indices ); + return v, d, data,indices + + +# LDA function +# name: unknown +# @param +# @return +# +def lda(data, indices): + data = numpy.real(data); + nb_desc = numpy.shape(data)[1]; + nb_unit = numpy.shape(data)[0]; + nb_clust = numpy.shape(indices)[0]; + + mu = numpy.mean(data,0); + V = numpy.cov(data.T); + + W = numpy.zeros((nb_desc, nb_desc),float); + B = numpy.zeros((nb_desc, nb_desc), float); + nk = numpy.zeros(nb_clust, float); + + for i in range(0,nb_clust): + I = range(indices[i,0],indices[i,1]+1); + nk[i] = len(I); + W = W + nk[i] * numpy.cov(data[I,:].T); + tmp = numpy.zeros((1,nb_desc),float); + tmp[:] = numpy.mean(data[I,:],0)-mu; + B = B + nk[i] * numpy.dot(tmp.T, tmp); + + W = W / nb_unit; + B = B / nb_unit; + lmd = numpy.linalg.det(W) / (numpy.linalg.det(V)); + d, v = numpy.linalg.eig( numpy.dot( numpy.linalg.pinv(V) , B)); + return v,d,lmd + + +# Compute the lda predicted values +# name: pred_lda +# @param Vect:projected vector, repr1: centroid, repr2: structure with var-cov matrices +# @return +# +def pred_lda(TestSet, Vect, repr1, repr2=None): + + nbe = numpy.shape(TestSet)[0]; # number of line of Testset matrix + gr1 = numpy.zeros(nbe,int); + gr2 = numpy.zeros(nbe,int); + p1 = numpy.zeros(nbe,float); + p2 = numpy.zeros(nbe,float); + + for it in xrange(0,nbe): + p = numpy.dot(TestSet[it,:], Vect); + gr1[it], p1[it] = confidence_dist(p, repr1); + gr2[it], p2[it] = confidence_dist(p, repr1, repr2); + + return gr1, gr2, p1, p2; + + +# Compute the probability (assuming a gaussian model) that +# p is in the cluster modeled by repr1, repr2 +# +# name: confidence_dist +# @param +# @return +# +def confidence_dist(x, repr1, repr2=0): + + nb_f, d = numpy.shape(repr1); + d_hat = numpy.zeros(nb_f, float); + gr_hat = 0; + + if repr2 == 0: # minimize Euclidean distance to class centroid + + for f in xrange(0, nb_f): + d_hat[f] = numpy.linalg.norm( repr1[f, :] - x); + d_min, gr_hat = mt.my_min(d_hat); + p_hat = pow(d_min, -2.0) / sum( pow(d_hat, -2.0)); + + else: # maximize likelihood + for f in xrange(0, nb_f): + d_hat[f] = gauss_prob(x, d, repr1[f, :], repr2[f]); + d_max,gr_hat = mt.my_max(d_hat); + p_hat = d_max / (sum(d_hat)+mt.EPS); + + return gr_hat, p_hat + + +def gauss_prob(x, d, mu, sigma): + sigma = sigma+mt.EPS; + + if d == 1: + detA = abs(sigma); + B = -1./2. * pow(x-mu, 2.0) / sigma; + else: + detA = numpy.linalg.det(sigma); + dif = numpy.array([x-mu,]); + B = -0.5 * numpy.dot( numpy.dot(dif, numpy.linalg.pinv(sigma)), dif.T); + A = pow(pow(2. * numpy.pi, d) * detA, -0.5); + p = numpy.real(A * numpy.exp(B)); + return p; + + diff --git a/timeside/analyzer/labri/my_tools.py b/timeside/analyzer/labri/my_tools.py new file mode 100644 index 0000000..3a26a26 --- /dev/null +++ b/timeside/analyzer/labri/my_tools.py @@ -0,0 +1,445 @@ +# -*- coding: utf-8 -*- +# +# my_tools.py +# +# Copyright (c) 2014 Dominique Fourer + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: D Fourer http://www.fourer.fr + +import numpy +import scipy +import matplotlib +import matplotlib.pyplot as plt + + +EPS = 2.2204 * pow(10, -16.0); + +# Compute submatrix +# +def sub_matrix(a, I1, I2): + return a.take(I1,axis=0).take(I2,axis=1); + +# DFT bin to frequency conversion +# replace < [f] = Fmatlabversf(k, sr_hz, sizeFFT) > +# name: unknown +# @param +# @return +# +def Index_to_freq(k, sr_hz, sizeFFT): + return k / sizeFFT*sr_hz; + +def nextpow2(x): + return numpy.ceil(numpy.log2(x) + EPS); + +def my_max(vec): + vm = numpy.max(vec); + if numpy.isnan(vm): + im = 0; + else: + im = (vec == vm).nonzero()[0][0]; + return vm, im; + +def my_min(vec): + vm = numpy.min(vec); + if numpy.isnan(vm): + im = 0; + else: + im = (vec == vm).nonzero()[0][0]; + return vm, im; + +def my_sort(x, order='ascend'): + I = numpy.argsort(x); + if order == 'descend': + I = I[::-1]; + x_hat = x[I]; + return x_hat,I; + +def my_linspace(deb, fin, step): + out_val = numpy.array([deb]); + last_e = out_val[len(out_val)-1]; + while last_e+step+EPS < fin: + last_e = last_e+step; + out_val = numpy.concatenate((out_val, [last_e])); + + out_val = numpy.concatenate((out_val, [fin])); + #nb_case = round((fin-deb)/step)+1; + return out_val;#numpy.linspace(deb, fin, nb_case); + +## +# Equivalent to xcorr(vec, 'coeff') +# name: unknown +# @param +# @return +# +def xcorr( vec ): + n = len(vec)-1; + if n < 0: + return []; + cor = numpy.zeros(2 * n + 1, float); + for idx in xrange(0, n): + m = n-idx; + vtmp2 = vec[m:]; + vtmp1 = vec[0:len(vtmp2)]; ## bug ? + cor[idx] = numpy.dot(vtmp1 , vtmp2); #numpy.sum( + cor[len(cor)-idx-1] = cor[idx]; + ##print "TAILLE !! : ", len(vec); + cor[n] = numpy.dot(vec, vec); + cor = cor / cor[n]; + return cor; + #return numpy.real( scipy.ifft(scipy.fft(vec, 2*n+1) * scipy.fft(vec, 2*n+1) )) ; + +## used for debug only +def disp_var(x, name="var", shape=True, plot=True): + print name," : "; + #print x, "\n"; + print sum(abs(x)), "\n"; + + if shape: + print name," shape : ", numpy.shape(x), "\n"; + if plot: + my_plot(x, [], "X", "Y", name); + +# My plotting function +# name: unknown +# @param +# @return +# +def my_plot(y, x=[], xlab="X axis", ylab="Y axis", title="Figure"): + is_complex = numpy.iscomplex(y).any(); + + y_old = y; + if is_complex and len(x) != len(y): + x = numpy.real(y_old); + y = numpy.imag(y_old); + + if len(x) != len(y): + plt.plot(y); + else: + plt.plot(x, y); + + plt.title(title); + plt.xlabel(xlab); + plt.ylabel(ylab) + plt.show(); + return; + +# Compute RMS energy of input signal +# name: rms +# @param +# @return +# +def rms(s,N=0,rec=2,Fs=44100): + #s = numpy.array(s); + + if N == 0 or N > len(s): + N=len(s); + if N == len(s): + v=numpy.sqrt(numpy.mean( s ** 2)) + t=numpy.arange(1,len(s)+1) + else: + hop=numpy.floor(N / rec) + nb_trame=int(numpy.ceil(len(s) / hop)) + v=numpy.zeros(nb_trame, float); + t=numpy.zeros(nb_trame, float); + for i in xrange(nb_trame): + i0= i * hop; + i1=min_(i0 + N - 1,len(s)); + t0=(i0 + i1) / 2.0; + t[i]=t0 / Fs; + v[i]=numpy.sqrt(numpy.mean(s[i0:(i1+1)] ** 2.0)); + return v,t + +def IQR(x): + return numpy.percentile(x, 75) - numpy.percentile(x, 25); + +def hz2erbs(hz): + return 6.44 * ( numpy.log2( 229. + hz ) - 7.84 ); + +def erbs2hz(erbs): + return pow(2, erbs / 6.44 + 7.84 ) - 229; + +def my_interpolate(x, y, xi, kind='linear'): + f = scipy.interpolate.interp1d(x, y, kind, 0, False, 0.0); + y_tmp = f(xi); + return y_tmp; + +def primes(n): + """ Input n>=6, Returns a array of primes, 2 <= p < n """ + sieve = numpy.ones(n/3 + (n%6==2), dtype=numpy.bool) + for i in xrange(1,int(n**0.5)/3+1): + if sieve[i]: + k=3*i+1|1 + sieve[ k*k/3 ::2*k] = False + sieve[k*(k-2*(i&1)+4)/3::2*k] = False + return numpy.r_[2,3,((3*numpy.nonzero(sieve)[0][1:]+1)|1)] + + +def specgram(x, nfft, Fs, window, noverlap): + if nfft != len(window): + padding = nfft; + nfft = len(window); + else: + padding = None; + B_m, F_v, T_v = matplotlib.mlab.specgram(x, nfft, Fs, matplotlib.mlab.detrend_none, window, noverlap, padding, 'default', False); #False #''default' + T_v = T_v - T_v [0]; + return B_m, F_v, T_v; + + +def my_specgram(x, nfft, Fs, window, noverlap): + if nfft != len(window): + padding = nfft; + nfft = len(window); + else: + padding = len(window); + + hop = float(nfft-noverlap+1); + nb_trame = int(numpy.floor( float( (len(x)-noverlap) / hop))); + + step = hop/float(Fs); + T_v = numpy.arange(0, nb_trame, 1.) * step; + F_v = numpy.arange(0, padding, 1.) / float(padding) * float(Fs); + B_m = numpy.zeros( (padding, len(T_v)), complex); + + for i in xrange(0, nb_trame): + i0 = int(i * hop); + i1 = min(len(x), i0+nfft-1); + trame = numpy.zeros(nfft, float); + trame[0:len(x[i0:i1])] = x[i0:i1]; + B_m[:, i] = scipy.fft( trame * window, padding); + + if ~numpy.iscomplex(x).any(): + Hs = numpy.round(padding/2+1); + F_v = F_v[0:Hs]; + B_m = B_m[0:Hs, :]; + + return B_m, F_v, T_v; + +def centroid(x): + if numpy.ndim(x) < 2: + x = numpy.array([x, ]); + m,n = numpy.shape(x); + if m < n: + x = x.T; + m,n = numpy.shape(x); + M = numpy.array([numpy.arange(1,m+1,1.),]); + idx = numpy.repeat(M.T, n, axis=0); + c = numpy.sum(x * idx, axis=0.); + w = numpy.sum(x, axis=0); + return c / (w+EPS); + +def ERB(cf): + return 24.7 * (1. +4.37*cf/1000.); + +# e=ERBfromhz(f,formula) - convert frequency from Hz to erb-rate scale +# f: Hz - frequency +# formula: 'glasberg90' [default] or 'moore83' +def ERBfromhz(f, formula='glasberg90'): + + if formula == 'glasberg90': + e = 9.26 * numpy.log(0.00437*f + 1.); + else: #'moore83' + erb_k1 = 11.17; + erb_k2 = 0.312; + erb_k3 = 14.675; + erb_k4 = 43; + f = f/1000.; + e = erb_k1 * numpy.log((f + erb_k2) / (f + erb_k3)) + erb_k4; + return e; + +# y=gtwindow(n,b,order) - window shaped like a time-reversed gammatone envelope +def gtwindow(n, b=2., order=4.): + + t = numpy.arange(0,n, 1.) / n; + y = pow(b, order) * pow(t, (order-1)) * numpy.exp(-2. * numpy.pi * b * t); + y = numpy.flipud(y); + y = y/numpy.max(y); + return y; + +# y = ERBspace(lo,hi,N) - values uniformly spaced on erb-rate scale +def ERBspace(lo,hi,N=100): + EarQ = 9.26449; + minBW = 24.7; + order = 1; + a = EarQ * minBW; + cf = -a + numpy.exp((numpy.arange(0,N,1.))*(-numpy.log(hi + a) + numpy.log(lo + a))/(N-1)) * (hi + a); + y = numpy.squeeze(numpy.fliplr([cf,])); + return y; + +# b=frames(a,fsize,hopsize,nframes) - make matrix of signal frames +# b: matrix of frames (columns) +# a: signal vector +# fsize: samples - frame size +# hopsize: samples - interval between frames +# nframes: number of frames to return (default: as many as fits) +# +# hopsize may be fractional (frame positions rounded to nearest integer multiple) + +def frames(a,fsize,hopsize,nframes=0): + n = len(a); + max_nf = numpy.ceil((n-fsize)/hopsize); + if (nframes == 0) or (nframes > max_nf): + nframes = max_nf; + b = numpy.ones((fsize, nframes), int); + t = 1. + numpy.round( hopsize * numpy.arange(0, nframes, 1.) ); + b[0,:] = t; + b = numpy.cumsum(b, axis=0); + a = numpy.concatenate( (a, numpy.zeros(fsize, float))); + b = a[b]; + return b,t + + +def gtfbank( a, Fs, cfarray, bwfactor): + + lo = 30.; + hi = 16000.; + hi = min(hi, (Fs / 2. - numpy.squeeze(ERB(Fs/2.))/2.)); + nchans = numpy.round( 2. * ( ERBfromhz(hi) - ERBfromhz(lo)) ); + cfarray = ERBspace(lo, hi, nchans); + + nchans = len(cfarray); + n = len(a); + + # make array of filter coefficients + fcoefs = MakeERBCoeffs(Fs, cfarray, bwfactor); + A0 = fcoefs[:,0]; + A11 = fcoefs[:,1]; + A12 = fcoefs[:,2]; + A13 = fcoefs[:,3]; + A14 = fcoefs[:,4]; + A2 = fcoefs[:,5]; + B0 = fcoefs[:,6]; + B1 = fcoefs[:,7]; + B2 = fcoefs[:,8]; + gain= fcoefs[:,9]; + + b = numpy.zeros((nchans, n), float); + output = numpy.zeros((nchans, n),float); + for chan in xrange(0,nchans): + B = numpy.concatenate(([A0[chan]/gain[chan],], [A11[chan]/gain[chan],],[A2[chan]/gain[chan],])); + A = numpy.concatenate(([B0[chan],],[B1[chan],],[B2[chan],])); + y1 = scipy.signal.lfilter(B,A, a); + + B = numpy.concatenate(([A0[chan],],[A12[chan],],[A2[chan],])); + A = numpy.concatenate(([B0[chan],],[B1[chan],],[B2[chan],])); + y2 = scipy.signal.lfilter(B,A, y1); + + B = numpy.concatenate(([A0[chan],],[A13[chan],],[A2[chan],])); + A = numpy.concatenate(([B0[chan],],[B1[chan],],[B2[chan],])); + y3 = scipy.signal.lfilter(B, A, y2); + + B = numpy.concatenate(([A0[chan],],[A14[chan],],[A2[chan],])); + A = numpy.concatenate(([B0[chan],],[B1[chan],],[B2[chan],])); + y4 = scipy.signal.lfilter(B,A, y3); + b[chan, :] = y4; + return b #f + +def fbankpwrsmooth(a,Fs,cfarray): + nchans = len(cfarray); + # index matrix to apply pi/2 shift at each cf: + shift = numpy.round(Fs / (4.*cfarray)); + a = pow(a, 2.); + b = a * 0; + for j in xrange(0, nchans): + b[j,:] = a[j,:]+ numpy.concatenate( (a[j,shift[j]:], numpy.zeros(shift[j], float))) / 2.; + return b; + + +def rsmooth(x, smooth, npasses, clip=0): + smooth = int(smooth); + m,n = numpy.shape(x); + + mm = m + smooth + npasses*(smooth-1); + a = numpy.zeros( (mm,n), float); + b = numpy.zeros( (mm,n), float); + tmp = 0; + + # transfer data to buffer, preceded with leading zeros */ + a[smooth:(smooth+m), :] = x; + + # smooth repeatedly */ + for h in xrange(0, npasses): + for k in xrange(0,n): + b[smooth:mm, k] = numpy.cumsum( a[smooth:mm, k] - a[0:(mm-smooth), k] )/smooth; + tmp=a; a=b; b=tmp; + + if clip>0: + y = numpy.zeros( (m,n), float); + mm2 = npasses*(smooth-1)/2. + smooth; + y[:,:] = a[ mm2:(mm2+m), :]; + else: + mm2 = m+npasses*(smooth-1); + y = numpy.zeros( (mm2,n), float) + y[:,:] = a[smooth:mm2, :]; + return y; + + +def MakeERBCoeffs(Fs, cfArray=0, Bfactor=1.): + + if len(cfArray) < 1: + cfArray = ERBSpace(100., Fs/4,25); + T = 1./Fs; + EarQ = 9.26449; + minBW = 24.7; + order = 1; + cf = cfArray; + + vERB = pow( pow(cf/EarQ, order) + pow(minBW,order), 1./order); + B = 1.019 * 2 * numpy.pi * vERB * Bfactor; + + A0 = T; + A2 = 0; + B0 = 1; + B1 = -2 * numpy.cos(2*cf*numpy.pi*T) / numpy.exp(B*T); + B2 = numpy.exp(-2*B*T); + + A11 = -(2*T*numpy.cos(2*cf*numpy.pi*T) / numpy.exp(B*T) + 2*numpy.sqrt(3+pow(2,1.5))*T*numpy.sin(2*cf*numpy.pi*T) / numpy.exp(B*T))/2.; + A12 = -(2*T*numpy.cos(2*cf*numpy.pi*T) / numpy.exp(B*T) - 2*numpy.sqrt(3+pow(2,1.5))*T*numpy.sin(2*cf*numpy.pi*T) / numpy.exp(B*T))/2.; + A13 = -(2*T*numpy.cos(2*cf*numpy.pi*T) / numpy.exp(B*T) + 2*numpy.sqrt(3-pow(2,1.5))*T*numpy.sin(2*cf*numpy.pi*T) / numpy.exp(B*T))/2.; + A14 = -(2*T*numpy.cos(2*cf*numpy.pi*T) / numpy.exp(B*T) - 2*numpy.sqrt(3-pow(2,1.5))*T*numpy.sin(2*cf*numpy.pi*T) / numpy.exp(B*T))/2.; + + gain = numpy.abs((-2*numpy.exp(4*1j*cf*numpy.pi*T)*T + 2. * numpy.exp(-(B*T) + 2*1j*cf*numpy.pi*T) * T * (numpy.cos(2. * cf * numpy.pi * T) - numpy.sqrt(3 - pow(2,3./2.)) * numpy.sin(2*cf*numpy.pi*T))) * (-2. * numpy.exp(4. * 1j * cf * numpy.pi*T)*T + 2. * numpy.exp(-(B*T) + 2*1j*cf*numpy.pi*T) * T * (numpy.cos(2.*cf*numpy.pi*T) + numpy.sqrt(3. - pow(2,3./2.)) * numpy.sin(2*cf*numpy.pi*T))) * (-2.*numpy.exp(4.*1j*cf*numpy.pi*T)*T + 2. * numpy.exp(-(B*T) + 2 * 1j * cf * numpy.pi*T) * T * (numpy.cos(2. * cf *numpy.pi*T) - numpy.sqrt(3. + pow(2,3/2)) * numpy.sin(2. * cf * numpy.pi*T))) * (-2. * numpy.exp( 4*1j*cf*numpy.pi*T) * T + 2. * numpy.exp(-(B*T) + 2*1j*cf*numpy.pi*T) * T * (numpy.cos(2. * cf * numpy.pi * T) + numpy.sqrt(3. + pow(2, 3/2)) * numpy.sin(2. * cf * numpy.pi*T))) / pow(-2. / numpy.exp(2.*B*T) - 2. * numpy.exp(4.*1j*cf*numpy.pi*T) + 2. * (1. + numpy.exp(4.*1j*cf*numpy.pi*T)) / numpy.exp(B*T), 4.)); + allfilts = numpy.ones(len(cf), float); + fcoefs = numpy.concatenate( ([A0*allfilts,], [A11,], [A12,], [A13,], [A14,], [A2*allfilts,], [B0*allfilts,], [B1,], [B2,], [gain,]) ).T; + #print "SHAPE", numpy.shape(fcoefs); + + return fcoefs; + + +def chirpz(x,A,W,M): + A = np.complex(A) + W = np.complex(W) + if np.issubdtype(np.complex,x.dtype) or np.issubdtype(np.float,x.dtype): + dtype = x.dtype + else: + dtype = float + x = np.asarray(x,dtype=np.complex); + N = x.size; + L = int(2**np.ceil(np.log2(M+N-1))); + + n = np.arange(N,dtype=float); + y = np.power(A,-n) * np.power(W,n**2 / 2.) * x; + Y = np.fft.fft(y,L); + + v = np.zeros(L,dtype=np.complex); + v[:M] = np.power(W,-n[:M]**2/2.); + v[L-N+1:] = np.power(W,-n[N-1:0:-1]**2/2.); + V = np.fft.fft(v); + g = np.fft.ifft(V*Y)[:M]; + k = np.arange(M); + g *= np.power(W,k**2 / 2.); + return g; diff --git a/timeside/analyzer/labri/swipep.py b/timeside/analyzer/labri/swipep.py new file mode 100644 index 0000000..4fb8400 --- /dev/null +++ b/timeside/analyzer/labri/swipep.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- +# +# swipep.py +# +# Copyright (c) 2014 Dominique Fourer + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: D Fourer http://www.fourer.fr + +import numpy +import scipy +import scipy.interpolate +import my_tools as mt + +# SWIPEP Pitch estimation using SWIPE'. +# P = SWIPEP(X,Fs,[PMIN PMAX],DT,DLOG2P,DERBS,STHR) estimates the pitch +# of the vector signal X every DT seconds. The sampling frequency of +# the signal is Fs (in Hertz). The spectrum is computed using a Hann +# window with an overlap WOVERLAP between 0 and 1. The spectrum is +# sampled uniformly in the ERB scale with a step size of DERBS ERBs. The +# pitch is searched within the range [PMIN PMAX] (in Hertz) with samples +# distributed every DLOG2P units on a base-2 logarithmic scale of Hertz. +# The pitch is fine-tuned using parabolic interpolation with a resolution +# of 1 cent. Pitch estimates with a strength lower than STHR are treated +# as undefined. +# +# [P,T,S] = SWIPEP(X,Fs,[PMIN PMAX],DT,DLOG2P,DERBS,WOVERLAP,STHR) +# returns the times T at which the pitch was estimated and the pitch +# strength S of every pitch estimate. +# +# P = SWIPEP(X,Fs) estimates the pitch using the default settings PMIN = +# 30 Hz, PMAX = 5000 Hz, DT = 0.001 s, DLOG2P = 1/48 (48 steps per +# octave), DERBS = 0.1 ERBs, WOVERLAP = 0.5, and STHR = -Inf. +# +# P = SWIPEP(X,Fs,...,[],...) uses the default setting for the parameter +# replaced with the placeholder []. +# +# REMARKS: (1) For better results, make DLOG2P and DERBS as small as +# possible and WOVERLAP as large as possible. However, take into account +# that the computational complexity of the algorithm is inversely +# proportional to DLOG2P, DERBS and 1-WOVERLAP, and that the default +# values have been found empirically to produce good results. Consider +# also that the computational complexity is directly proportional to the +# number of octaves in the pitch search range, and therefore , it is +# recommendable to restrict the search range to the expected range of +# pitch, if any. (2) This code implements SWIPE', which uses only the +# first and prime harmonics of the signal. To convert it into SWIPE, +# which uses all the harmonics of the signal, replace the word +# PRIMES with a colon (it is located almost at the end of the code). +# However, this may not be recommendable since SWIPE' is reported to +# produce on average better results than SWIPE (Camacho and Harris, +# 2008). +# +# EXAMPLE: Estimate the pitch of the signal X every 10 ms within the +# range 75-500 Hz using the default resolution (i.e., 48 steps per +# octave), sampling the spectrum every 1/20th of ERB, using a window +# overlap factor of 50%, and discarding samples with pitch strength +# lower than 0.2. Plot the pitch trace. +# [x,Fs] = wavread(filename); +# [p,t,s] = swipep(x,Fs,[75 500],0.01,[],1/20,0.5,0.2); +# plot(1000*t,p) +# xlabel('Time (ms)') +# ylabel('Pitch (Hz)') +# +# REFERENCES: Camacho, A., Harris, J.G, (2008) "A sawtooth waveform +# inspired pitch estimator for speech and music," J. Acoust. Soc. Am. +# 124, 1638-1652. + +def swipep(x, fs,plim=numpy.array([30, 5000]),dt=0.001,dlog2p=float(1./48.), dERBs=0.1, woverlap=0.5, sTHR=-numpy.infty): + fs = float(fs); + if woverlap>1 or woverlap<0: + woverlap=0.5 + print "Window overlap must be between 0 and 1." + t = numpy.arange(0, len(x)/fs+dt, dt); # Times + # Define pitch candidates + log2pc = numpy.arange(numpy.log2(plim[0]), numpy.log2(plim[1]), dlog2p); + pc = pow(2., log2pc); + S = numpy.zeros( (len(pc), len(t)), float ); # Pitch strength matrix + # Determine P2-WSs + logWs = numpy.round( numpy.log2( 8. * fs / plim ) ); + ws = pow(2., numpy.arange(logWs[0], logWs[1]-1, -1.)); # P2-WSs + pO = 8. * fs / ws; # Optimal pitches for P2-WSs + # Determine window sizes used by each pitch candidate + d = 1. + log2pc - numpy.log2( 8. * fs / ws[0] ); + + # Create ERB-scale uniformly-spaced frequencies (in Hertz) + fERBs = mt.erbs2hz( numpy.arange( mt.hz2erbs( min(pc)/4), mt.hz2erbs(fs/2.), dERBs )); + + for i in xrange(0, len(ws)): + dn = max( 1, numpy.round( 8*(1-woverlap) * fs / pO[i] ) ); # Hop size + # Zero pad signal + xzp = numpy.concatenate( ( numpy.zeros( ws[i]/2, float), x, numpy.zeros( dn + ws[i] / 2, float)) ); + # Compute spectrum + w = scipy.signal.hanning( ws[i] ); # Hann window + o = max( 0, numpy.round( ws[i] - dn ) ); # Window overlap + + X, f, ti = mt.my_specgram( xzp, int(ws[i]), int(fs), w, int(o)); # mt.specgram + + #Select candidates that use this window size + if len(ws) == 1: + j = numpy.array([pc]); + k = numpy.array([]); + else: + if i == (len(ws)-1): + j = (d-(i+1)>-1).nonzero()[0]; + k = (d[j]-(i+1)<0).nonzero()[0]; + else: + if i == 0: + j = ( (d-(i+1))<1).nonzero()[0]; + k = (d[j]-(i+1)>0).nonzero()[0]; + else: + j = (abs(d-(i+1))<1).nonzero()[0]; + k = numpy.arange(0, len(j), 1); + + # Compute loudness at ERBs uniformly-spaced frequencies + i_tmp = (fERBs > pc[j[0]]/4.).nonzero()[0][0]; + fERBs = fERBs[ i_tmp:]; + #L = numpy.sqrt( max( 0, scipy.interpolate.interp1d( f, abs(X), fERBs, 'spline') ) ); + L = numpy.sqrt( mt.my_interpolate( f, abs(X), fERBs, 'linear')); #'spline' linear + # Compute pitch strength + Si = pitchStrengthAllCandidates( fERBs, L, pc[j] ); + # Interpolate pitch strength at desired times + if numpy.shape(Si)[1] > 1: + Si = mt.my_interpolate(ti, Si.T, t, 'linear'); # numpy.interp( ti, Si, t); + Si = Si.T; + else: + Si = numpy.repeat( numpy.repeat( numpy.nan, numpy.shape(Si)[0], axis=0), len(t), axis=1); + # Add pitch strength to combination + lmbda = d[ j[k] ] - (i+1); + mu = numpy.ones( len(j), float ); + mu[k] = 1. - abs( lmbda ); + S[j,:] = S[j,:] + numpy.repeat(numpy.array([mu,]).T, numpy.shape(Si)[1], axis=1) * Si; + #end loop + + # Fine tune pitch using parabolic interpolation + p = numpy.repeat( numpy.nan, numpy.shape(S)[1], 0 ); + s = p; + for j in xrange(0,numpy.shape(S)[1]): + s[j], i = mt.my_max( S[:,j] ); + if s[j] < sTHR: + continue; + if i == 0 or i == len(pc)-1: + p[j] = pc[i]; + else: + I = numpy.arange(i-1, i+2, 1); + tc = 1. / pc[I]; + ntc = ( tc/tc[1] - 1 ) * 2 * numpy.pi; + c = numpy.polyfit( ntc, S[I,j], 2 ); + ftc = 1. / pow(2., numpy.arange( numpy.log2(pc[I[0]]), numpy.log2(pc[I[2]]), 1./12./100. )); + nftc = ( ftc/tc[1] - 1 ) * 2 * numpy.pi; + s[j], k = mt.my_max( numpy.polyval( c, nftc ) ); + p[j] = 2. ** ( numpy.log2(pc[I[0]]) + (k-1)/12./100. ); + return p, t, s + + +def pitchStrengthAllCandidates( f, L, pc ): + # Create pitch strength matrix + S = numpy.zeros( (len(pc), numpy.shape(L)[1]), float ); + # Define integration regions + k = numpy.zeros( len(pc)+1, int ); #ones + for j in xrange(0, len(k)-1): + k[j+1] = k[j] + (f[ k[j]: ] > pc[j]/4.).nonzero()[0][0]; # -1 + k = k[1:]; + # Create loudness normalization matrix + N = numpy.sqrt( numpy.flipud( numpy.cumsum( numpy.flipud(L*L), axis=0 ) ) ); + for j in xrange(0, len(pc)): + # Normalize loudness + n = N[k[j],:]; + n[ (n==0).nonzero()[0] ] = numpy.inf; # to make zero-loudness equal zero after normalization + NL = L[k[j]:,:] / numpy.repeat( numpy.array([n,]), numpy.shape(L)[0]-k[j], axis=0); #+1 + # Compute pitch strength + S[j,:] = pitchStrengthOneCandidate( f[k[j]:], NL, pc[j] ); ## shape(NL) ? + return S + + +def pitchStrengthOneCandidate( f, NL, pc ): + n = numpy.fix( f[len(f)-1] / pc - 0.75 ); # Number of harmonics + if n == 0: + return numpy.nan + + k = numpy.zeros( (numpy.shape(f)), float ); # Kernel + #Normalize frequency w.r.t. candidate + q = numpy.dot(f, pow(pc, -1.) ); + # Create kernel + for i in numpy.concatenate( ([1], mt.primes(n))): + a = abs( q - i ); + # Peak's weigth + p = a < .25; + k[p] = numpy.cos( 2. * numpy.pi * q[p] ); + # Valleys' weights + v = (.25 < a) & (a < .75); + k[v] = k[v] + numpy.cos( 2. * numpy.pi * q[v] ) / 2.; + + # Apply envelope + k = k * numpy.sqrt( 1. / f ); + # K+-normalize kernel + k = k / numpy.linalg.norm( k[ k>0 ] ); + # Compute pitch strength + S = numpy.dot( k, NL); + return S; + diff --git a/timeside/analyzer/labri/timbre_descriptor.py b/timeside/analyzer/labri/timbre_descriptor.py new file mode 100644 index 0000000..0a89085 --- /dev/null +++ b/timeside/analyzer/labri/timbre_descriptor.py @@ -0,0 +1,1123 @@ +# -*- coding: utf-8 -*- +# +# timbre_descriptor.py +# +# Copyright (c) 2014 Dominique Fourer + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: D Fourer http://www.fourer.fr + +import numpy +from collections import namedtuple +import scipy +import scipy.signal +from scipy.io import wavfile + +import matplotlib +import swipep as swp # used for single-F0 estimation +import warnings # used for warning removal +import time # used performance benchmark +import my_tools as mt + + +EPS = mt.EPS +NB_DESC = 164; +desc_settings = namedtuple("desc_settings", "b_TEE b_STFTmag b_STFTpow b_Harmonic b_ERBfft b_ERBgam xcorr_nb_coeff threshold_harmo nb_harmo"); + +desc_rep = ["TEE", "AS", "STFTmag", "STFTpow", "Harmonic", "ERBfft", "ERBgam"]; +dTEE = namedtuple("dTEE", "Att Dec Rel LAT AttSlope DecSlope TempCent EffDur FreqMod AmpMod RMSEnv"); #ADSR_v +dAS = namedtuple("dAS_s", "AutoCorr ZcrRate"); #ADSR_v +dSTFT = namedtuple("dSTFT", "SpecCent SpecSpread SpecSkew SpecKurt SpecSlope SpecDecr SpecRollOff SpecVar FrameErg SpecFlat SpecCrest"); +dHAR = namedtuple("dHAR", "FrameErg HarmErg NoiseErg Noisiness F0 InHarm TriStim1 TriStim2 TriStim3 HarmDev OddEvenRatio SpecCent SpecSpread SpecSkew SpecKurt SpecSlope SpecDecr SpecRollOff SpecVar"); + +dPart = namedtuple("dPart", "f_Freq_v f_Ampl_v"); + +config_s = desc_settings( +b_TEE = 1, # descriptors from the Temporal Energy Envelope +b_STFTmag = 1, # descriptors from the STFT magnitude +b_STFTpow = 1, # descriptors from the STFT power +b_Harmonic = 1, # descriptors from Harmonic Sinusoidal Modeling representation +b_ERBfft = 1, # descriptors from ERB representation (ERB being computed using FFT) +b_ERBgam = 1, # descriptors from ERB representation (ERB being computed using Gamma Tone Filter) +xcorr_nb_coeff = 12, # === defines the number of auto-correlation coefficients that will be sued +threshold_harmo = 0.3, # === defines the threshold [0,1] below which harmonic-features are not computed +nb_harmo = 20); # === defines the number of harmonics that will be extracted + +nbits = 16; +MAX_VAL = pow(2,(nbits-1)) * 1.0; + + + +#################################################################################### +#### Main functions #### +#################################################################################### + +# Compute statistics from time series (median / iqr) +# name: unknown +# @param +# @return +# +def temporalmodeling(desc): + + field_name = []; + param_val = numpy.zeros(NB_DESC, float) + index = 0; + + for i in xrange(0, len(desc)): + for j in xrange(0, len(desc[i]._fields)): + + if i == 0 and j < len(desc[i]._fields)-1: + field_n = [desc[i]._fields[j]]; + field_v = numpy.array([desc[i][j]]); + else: ## compute median / IRQ + if desc[i]._fields[j] == "AutoCorr": + field_n = []; + field_v = numpy.zeros(config_s.xcorr_nb_coeff * 2, float); + for k in xrange(0, config_s.xcorr_nb_coeff): + field_n.append( desc[i]._fields[j]+str(k+1)+"_median" ) + field_n.append( desc[i]._fields[j]+str(k+1)+"_iqr" ) + field_v[k*2] = numpy.median(desc[i][j][k,:]); + field_v[k*2+1] = 0.7413 * mt.IQR(desc[i][j][k,:]); + else: + field_n = [ desc[i]._fields[j]+"_median" , desc[i]._fields[j]+"_iqr"]; + field_v = numpy.array( [ numpy.median(desc[i][j]) , 0.7413 * mt.IQR( desc[i][j] ) ]); + + ## add values + for f in xrange(0,len(field_n)): + field_name.append(desc_rep[i]+"_"+field_n[f]); + param_val[index] = field_v[f]; + index = index +1; + + return param_val, field_name + + + +# Main Function : compute all descriptor for given signal trame_s +# name: unknown +# @param +# @return +# +def compute_all_descriptor(trame_s, Fs): + + if numpy.isscalar(Fs): + Fs = numpy.array([Fs]); + + trame_s = trame_s / (numpy.max(trame_s) + EPS); # normalize input sound + ret_desc = numpy.zeros((1, NB_DESC), float); + + ## 1) descriptors from the Temporal Energy Envelope (do_s.b_TEE=1) [OK] + desc_TEE, desc_AS = TCalcDescr(trame_s, Fs); + + ## 2) descriptors from the STFT magnitude and STFT power (do_s.b_STFTmag= 1) + S_mag, S_pow, i_SizeX, i_SizeY, f_SupX_v, f_SupY_v = FFTrep(trame_s, Fs); + desc_STFTmag = FCalcDescr(S_mag, i_SizeX, i_SizeY, f_SupX_v, f_SupY_v); + desc_STFTpow = FCalcDescr(S_pow, i_SizeX, i_SizeY, f_SupX_v, f_SupY_v); + + #3) descriptors from Harmonic Sinusoidal Modeling representation (do_s.b_Harmonic=1) + f0_hz_v, f_DistrPts_m, PartTrax_s = FHarrep(trame_s, Fs); + desc_Har = HCalcDescr(f0_hz_v, f_DistrPts_m, PartTrax_s); + + #m = scipy.io.loadmat('sig_erb.mat'); + #trame_s = numpy.squeeze(m['f_Sig_v']); + + #4) descriptors from ERB representation (ERB being computed using FFT) + S_erb, S_gam, i_SizeX1, i_SizeY1, f_SupX_v1, f_SupY_v1, i_SizeX2, i_SizeY2, f_SupX_v2, f_SupY_v2 = ERBRep(trame_s, Fs); + + desc_ERB = FCalcDescr(S_erb, i_SizeX1, i_SizeY1, f_SupX_v1, f_SupY_v1); + desc_GAM = FCalcDescr(S_gam, i_SizeX2, i_SizeY2, f_SupX_v2, f_SupY_v2); + + return desc_TEE, desc_AS, desc_STFTmag, desc_STFTpow, desc_Har, desc_ERB, desc_GAM; + +# Short term all descriptor computation +# return a matrix completed with descriptor values +# @param +# @return +# +# Compute descriptors for given signal s +# Temporal approach with frame processing +def timbre_desc_time(s, Fs, N, rec): + step = round(N/rec); + nb_trame = int(numpy.ceil( len(s) / step)); + T_NOISE = -60; #-numpy.inf; + t = numpy.zeros(nb_trame,float) + desc_struc = numpy.zeros((nb_trame, NB_DESC), float); + + ## loop to compute all descriptors + for k in xrange(0, nb_trame): + i0 = int(k*step); + i1 = int(min(len(s)-1, i0+N-1)); + t[k] = (i0 + i1)/2 * Fs; + trame_s = s[i0:(i1+1)]; + rms_v,t0 = mt.rms(trame_s); + rms_v = 10 * numpy.log10(rms_v); + + if rms_v > T_NOISE: + print "rms=",rms_v,"\n" + desc = compute_all_descriptor(trame_s, Fs); + param_val, field_name = temporalmodeling(desc); + desc_struc[k, :] = param_val; + return desc_struc, t; + +# Compute ERB/Gammatone representation +# name: unknown +# @param +# @return +# +def ERBRep(f_Sig_v, Fs): + f_HopSize_sec = 256./44100.; + i_HopSize = f_HopSize_sec * Fs; + f_Exp = 0.25; + + f_Sig_v_hat = outmidear(f_Sig_v, Fs); + + ## ERB + S_erb,f_erb,t_erb = ERBpower( f_Sig_v_hat, Fs, i_HopSize); + S_erb = pow(S_erb, f_Exp); #cochleagram + i_SizeX1 = len(t_erb); + i_SizeY1 = len(f_erb); + f_SupX_v1 = t_erb; + f_SupY_v1 = f_erb/Fs; + + + ## GAMMATONE + S_gam,f_gam,t_gam = ERBpower2(f_Sig_v_hat, Fs, i_HopSize); + S_gam = pow(S_gam, f_Exp); #cochleagram + i_SizeX2 = len(t_gam); + i_SizeY2 = len(f_gam); + f_SupX_v2 = t_gam; + f_SupY_v2 = f_gam/Fs; + + return S_erb, S_gam, i_SizeX1, i_SizeY1, f_SupX_v1, f_SupY_v1, i_SizeX2, i_SizeY2, f_SupX_v2, f_SupY_v2; + + +# Compute ERB spectrum +# name: unknown +# @param +# @return +# +def ERBpower(a,Fs, hopsize, bwfactor=1.): + lo = 30.; + hi = 16000.; + hi = min(hi, (Fs/2.-mt.ERB(Fs/2.)/2.)); + nchans = numpy.round(2.*(mt.ERBfromhz(hi)-mt.ERBfromhz(lo))); + cfarray = mt.ERBspace(lo,hi,nchans); + nchans = len(cfarray); + + bw0 = 24.7; # Hz - base frequency of ERB formula (= bandwidth of "0Hz" channel) + b0 = bw0/0.982; # gammatone b parameter (Hartmann, 1997) + ERD = 0.495 / b0; # based on numerical calculation of ERD + wsize = pow(2., mt.nextpow2(ERD*Fs*2)); + window = mt.gtwindow(wsize, wsize/(ERD*Fs)); + + # pad signal with zeros to align analysis point with window power centroid + m = len(a); + offset = numpy.round( mt.centroid( pow(window, 2.)) ); + a = numpy.concatenate( (numpy.zeros(offset), a, numpy.zeros(wsize-offset)) ); + + # matrix of windowed slices of signal + fr, startsamples = mt.frames(a,wsize,hopsize); + nframes = numpy.shape(fr)[1]; + fr = fr * numpy.repeat(numpy.array([window,]).T, nframes, axis=1); + wh = int( numpy.round( wsize/2. )); + + # power spectrum + pwrspect = pow( abs(scipy.fft(fr, int(wsize) , axis=0)), 2.); + pwrspect = pwrspect[0:wh, :]; + + # array of kernel bandwidth coeffs: + b = mt.ERB(cfarray) / 0.982; + b = b * bwfactor; + bb = numpy.sqrt( pow(b, 2.) - pow(b0, 2.)); + + # matrix of kernels (array of gammatone power tfs sampled at fft spectrum frequencies). + iif = numpy.array([numpy.arange(1, wh+1),]); + + f = numpy.repeat( iif * Fs / wsize, nchans, axis=0).T; + cf = numpy.repeat( numpy.array([cfarray,]) , wh, axis=0); + bb = numpy.repeat( numpy.array([bb,]), wh, axis=0); + + wfunct = pow( abs(1./pow(1j * (f - cf) + bb, 4.)), 2.); + adjustweight = mt.ERB(cfarray) / sum(wfunct); + wfunct = wfunct * numpy.repeat( numpy.array([adjustweight,]), wh, axis=0); + wfunct = wfunct / numpy.max(numpy.max(wfunct)); + + # multiply fft power spectrum matrix by weighting function matrix: + c = numpy.dot(wfunct.T, pwrspect); + f = cfarray; + t = startsamples/Fs; + return c,f,t; + +# Compute Gammatone spectrum +# name: unknown +# @param +# @return +# +def ERBpower2(a, Fs, hopsize, bwfactor=1.): + + lo = 30.; # Hz - lower cf + hi = 16000.; # Hz - upper cf + hi = min(hi, (Fs/ 2. - numpy.squeeze(mt.ERB( Fs / 2.)) / 2. )); # limit to 1/2 erb below Nyquist + + nchans = numpy.round( 2. * (mt.ERBfromhz(hi)-mt.ERBfromhz(lo)) ); + cfarray = mt.ERBspace(lo,hi,nchans); + nchans = len(cfarray); + + # apply gammatone filterbank + b = mt.gtfbank(a, Fs, cfarray, bwfactor); + + # instantaneous power + b = mt.fbankpwrsmooth(b, Fs, cfarray); + + # smooth with a hopsize window, downsample + b = mt.rsmooth(b.T, hopsize, 1, 1); + + b = numpy.maximum(b.T, 0); + m,n = numpy.shape(b); + + nframes = numpy.floor( float(n) / hopsize ); + startsamples= numpy.squeeze(numpy.array(numpy.round(numpy.arange(0, nframes, 1) * int(hopsize)), int)); + + c = b[:,startsamples]; + f = cfarray; + t = startsamples / Fs; + + return c, f, t + + +# Compute Harmonic representation (seems Ok) +# name: unknown +# @param +# @return +# +def FHarrep(f_Sig_v, Fs): + f0_hz_v = []; + PartTrax_s = []; + f_DistrPts_m = []; + + p_v, t_v, s_v = swp.swipep(f_Sig_v, float(Fs), numpy.array([50,500]), 0.01, 1./ 48., 0.1, 0.2, -numpy.inf); + + # remove nan values + i_n = (~numpy.isnan(p_v)).nonzero()[0]; + p_v = p_v[i_n]; t_v = t_v[i_n]; s_v = s_v[i_n]; + + if max(s_v) > config_s.threshold_harmo: + f0_bp = numpy.zeros( (len(t_v), 2), float); + f0_bp[:,0] = t_v; + f0_bp[:,1] = p_v; + else: + f0_bp = []; + return f0_hz_v, f_DistrPts_m, PartTrax_s; #, f_SupX_v, f_SupY_v; + + # ========================================================== + # === Compute sinusoidal harmonic parameters + L_sec = 0.1; # === analysis widow length + STEP_sec = L_sec/4.; # === hop size + L_n = numpy.round(L_sec*Fs); + STEP_n = numpy.round(STEP_sec*Fs); + N = int(4*pow(2. , mt.nextpow2(L_n))); # === large zero-padding to get better frequency resolution + fenetre_v = numpy.ones(L_n, float); #scipy.signal.boxcar(L_n); + fenetre_v = 2 * fenetre_v / sum(fenetre_v); + + B_m,F_v,T_v = mt.my_specgram(f_Sig_v, int(N), int(Fs), fenetre_v, int(L_n-STEP_n)); #mt.specgram + B_m = abs(B_m); + + T_v = T_v+L_sec/2.; + nb_frame = numpy.shape(B_m)[1]; + f_DistrPts_m = pow(abs(B_m), 2.); + + lag_f0_hz_v = numpy.arange(-5, 5+0.1, 0.1); + nb_delta = len(lag_f0_hz_v); + inharmo_coef_v = numpy.arange(0, 0.001+0.00005, 0.00005); + nb_inharmo = len(inharmo_coef_v); + totalenergy_3m = numpy.zeros( (nb_frame, len(lag_f0_hz_v), len(inharmo_coef_v)), float); + stock_pos_4m = numpy.zeros( (nb_frame, len(lag_f0_hz_v), len(inharmo_coef_v), config_s.nb_harmo), int); + + f0_hz_v = Fevalbp(f0_bp, T_v); + # === candidate_f0_hz_m (nb_frame, nb_delta) + candidate_f0_hz_m = numpy.repeat(numpy.array([f0_hz_v,]).T, nb_delta, axis=1) + numpy.repeat([lag_f0_hz_v,], nb_frame, axis=0); + stock_f0_m = candidate_f0_hz_m; + + h = numpy.arange(1, config_s.nb_harmo+1.,1.); + for num_inharmo in xrange(0, nb_inharmo): + inharmo_coef = inharmo_coef_v[num_inharmo]; + nnum_harmo_v = h * numpy.sqrt( 1 + inharmo_coef * pow(h, 2.)); + + for num_delta in xrange(0, nb_delta): + # === candidate_f0_hz_v (nb_frame, 1) + candidate_f0_hz_v = candidate_f0_hz_m[:,num_delta]; + # === candidate_f0_hz_m (nb_frame, nb_harmo): (nb_frame,1)*(1,nb_harmo) + C1 = numpy.array([candidate_f0_hz_v,]).T; + C2 = numpy.array([nnum_harmo_v,]); + candidate_harmo_hz_m = numpy.dot(C1, C2); + # === candidate_f0_hz_m (nb_frame, nb_harmo) + candidate_harmo_pos_m = numpy.array( numpy.round(candidate_harmo_hz_m/Fs*N)+1, int ); + stock_pos_4m[:, num_delta, num_inharmo, :] = candidate_harmo_pos_m; + for num_frame in xrange(0, nb_frame): + totalenergy_3m[num_frame, num_delta, num_inharmo] = numpy.sum( B_m[ candidate_harmo_pos_m[num_frame, :], num_frame] ); + + # === choix du coefficient d'inharmonicite + score_v = numpy.zeros( nb_inharmo, float); + for num_inharmo in xrange(0, nb_inharmo): + score_v[num_inharmo] = numpy.sum( numpy.max( numpy.squeeze( totalenergy_3m[:, :, num_inharmo]), axis=0) ); + + max_value, max_pos = mt.my_max(score_v); + calcul = (score_v[max_pos]-score_v[0])/ (EPS+ score_v[0]); + if calcul > 0.01: + num_inharmo = max_pos; + else: + num_inharmo = 1; + totalenergy_2m = numpy.squeeze( totalenergy_3m[:, :, num_inharmo] ); + + PartTrax_s = numpy.zeros( (nb_frame, 2, 20), float); + for num_frame in xrange(0, nb_frame): + max_value, num_delta = mt.my_max(totalenergy_2m[num_frame, :]); + f0_hz_v[num_frame] = stock_f0_m[num_frame,num_delta]; + cur_par = dPart; + f_Freq_v = numpy.squeeze( F_v[ stock_pos_4m[num_frame,num_delta,num_inharmo, :] ] ); + f_Ampl_v = B_m[ stock_pos_4m[num_frame,num_delta,num_inharmo,:], num_frame]; + PartTrax_s[num_frame, 0, :] = f_Freq_v; + PartTrax_s[num_frame, 1, :] = f_Ampl_v; + + return f0_hz_v, f_DistrPts_m, PartTrax_s; + + +# HarRep sub-routine - (seems Ok) +# name: unknown +# @param +# @return +# +def Fevalbp(bp, x_v): + + y_v = numpy.zeros(len(x_v), float); + pos1 = (x_v < bp[0,0]).nonzero()[0]; + if len(pos1) > 0: + y_v[pos1] = bp[0,1]; + pos2 = (x_v > bp[numpy.shape(bp)[0]-1,0]).nonzero()[0]; + if len(pos2)>0: + y_v[pos2] = bp[numpy.shape(bp)[0]-1, 1]; + pos = ((x_v >= bp[0,0]) & (x_v <= bp[numpy.shape(x_v)[0]-1, 1])).nonzero()[0]; + + if len(x_v[pos]) > 1: + y_v[pos] = mt.my_interpolate( bp[:,0], bp[:,1], x_v[pos], 'linear'); + else: + for n in xrange(0, len(x_v)): + x = x_v[n]; + min_value, min_pos = mt.my_min( abs( bp[:, 0] - x)); + L = numpy.shape(bp)[0]; + t1 = (bp[min_pos, 0] == x) or (L == 1); + t2 = ( bp[min_pos, 0] < x) and (min_pos == L); + t3 = (bp[min_pos, 0] > x) and (min_pos == 0); + if t1 or t2 or t3: + y_v[n] = bp[min_pos,1]; + else: + if bp[min_pos, 0] < x: + y_v[n] = (bp[min_pos+1, 0] - bp[min_pos, 0]) / (bp[min_pos+1,0] - bp[min_pos,0]) * (x - bp[min_pos, 0]) + bp[min_pos, 0]; + else: + if bp[min_pos, 0] > x: + y_v[n] = (bp[min_pos, 1] - bp[min_pos-1,1]) / (bp[min_pos, 0] - bp[min_pos-1, 0]) * (x - bp[min_pos-1,0]) + bp[min_pos-1,1]; + + return y_v + + +# Compute Harmonic descriptors +# name: unknown +# @param +# @return +# +def HCalcDescr(f_F0_v, f_DistrPts_m, PartTrax_s): + + i_Offset = 0; + i_EndFrm = len(PartTrax_s); + + if i_EndFrm == 0: + return dHAR(FrameErg=0,HarmErg=0,NoiseErg=0,Noisiness=0,F0=0, + InHarm=0,TriStim1=0,TriStim2=0,TriStim3=0,HarmDev=0,OddEvenRatio=0, + SpecCent=0,SpecSpread=0,SpecSkew=0,SpecKurt=0,SpecSlope=0, + SpecDecr=0,SpecRollOff=0,SpecVar=0); + else: + desc_har = dHAR( + FrameErg = numpy.zeros(i_EndFrm-1, float), + HarmErg = numpy.zeros(i_EndFrm-1, float), + NoiseErg = numpy.zeros(i_EndFrm-1, float), + Noisiness = numpy.zeros(i_EndFrm-1, float), + F0 = numpy.zeros(i_EndFrm-1, float), + InHarm = numpy.zeros(i_EndFrm-1, float), + TriStim1 = numpy.zeros(i_EndFrm-1, float), + TriStim2 = numpy.zeros(i_EndFrm-1, float), + TriStim3 = numpy.zeros(i_EndFrm-1, float), + HarmDev = numpy.zeros(i_EndFrm-1, float), + OddEvenRatio = numpy.zeros(i_EndFrm-1, float), + SpecCent = numpy.zeros(i_EndFrm-1, float), + SpecSpread = numpy.zeros(i_EndFrm-1, float), + SpecSkew = numpy.zeros(i_EndFrm-1, float), + SpecKurt = numpy.zeros(i_EndFrm-1, float), + SpecSlope = numpy.zeros(i_EndFrm-1, float), + SpecDecr = numpy.zeros(i_EndFrm-1, float), + SpecRollOff = numpy.zeros(i_EndFrm-1, float), + SpecVar = numpy.zeros(i_EndFrm-1, float)) + + + for i in xrange( 1, i_EndFrm): + # === Energy + f_Energy = sum( f_DistrPts_m[:, i+i_Offset] ); + f_HarmErg = sum( pow(PartTrax_s[i,1,:] , 2.) ); #Amp + f_NoiseErg = f_Energy - f_HarmErg; + + # === Noisiness + f_Noisiness = f_NoiseErg / (f_Energy+EPS); + + # === Inharmonicity + i_NumHarm = len( PartTrax_s[i,1, :] ); + if( i_NumHarm < 5 ): + f_InHarm = []; + continue; + + f_Harms_v = f_F0_v[i] * numpy.arange(1, i_NumHarm+1, 1.); + f_InHarm = numpy.sum( abs(PartTrax_s[i, 0, :] - f_Harms_v) * pow(PartTrax_s[i,1,:], 2.) ) / (numpy.sum( pow(PartTrax_s[i,1,:], 2.))+EPS) * 2. / f_F0_v[i]; + + # === Harmonic spectral deviation + f_SpecEnv_v = numpy.zeros(i_NumHarm, float); + f_SpecEnv_v[0] = PartTrax_s[i,1,0]; + l = len(PartTrax_s[i,1, :]); + f_SpecEnv_v[1:(i_NumHarm-1)]= ( PartTrax_s[i,1,0:(l-2)] + PartTrax_s[i,1,1:(l-1)] + PartTrax_s[i,1,2:] ) / 3.; + f_SpecEnv_v[i_NumHarm-1] = ( PartTrax_s[i,1, l-2] + PartTrax_s[i,1, l-1] ) / 2.; + f_HarmDev = numpy.sum( abs( PartTrax_s[i,1,:] - f_SpecEnv_v ) ) / i_NumHarm; + + # === Odd to even harmonic ratio + f_OddEvenRatio = numpy.sum(pow( PartTrax_s[i,1,0::2], 2.)) / (numpy.sum( pow( PartTrax_s[i,1,1::2], 2.))+EPS); + + # === Harmonic tristimulus + f_TriStim_v = numpy.zeros(3,float) + f_TriStim_v[0] = PartTrax_s[i,1,0] / (sum(PartTrax_s[i,1,:])+EPS); + f_TriStim_v[1] = sum(PartTrax_s[i,1,1:4]) / (sum(PartTrax_s[i,1,:])+EPS); + f_TriStim_v[2] = sum(PartTrax_s[i,1,4:]) / (sum(PartTrax_s[i,1,:])+EPS); + + # === Harmonic centroid + f_NormAmpl_v = PartTrax_s[i,1,:] / (sum( PartTrax_s[i,1,:] ) + EPS); + f_Centroid = sum( PartTrax_s[i,0,:] * f_NormAmpl_v ); + f_MeanCentrFreq = PartTrax_s[i,0,:] - f_Centroid; + + # === Harmonic spread + f_StdDev = numpy.sqrt( sum( pow(f_MeanCentrFreq, 2.) * f_NormAmpl_v ) ); + + # === Harmonic skew + f_Skew = sum( pow(f_MeanCentrFreq, 3.) * f_NormAmpl_v ) / pow(f_StdDev+EPS, 3.); + + # === Harmonic kurtosis + f_Kurtosis = sum( pow(f_MeanCentrFreq, 4.) * f_NormAmpl_v ) / pow(f_StdDev+EPS, 4.); + + # === Harmonic spectral slope (linear regression) + f_Num = i_NumHarm * numpy.sum(PartTrax_s[i,0,:] * f_NormAmpl_v) - numpy.sum(PartTrax_s[i,0,:]); + f_Den = i_NumHarm * numpy.sum(pow(PartTrax_s[i,0,:], 2.)) - numpy.sum(pow(PartTrax_s[i,0,:], 2.)); + f_Slope = f_Num / f_Den; + + # === Spectral decrease (according to peeters report) + f_Num = sum( (PartTrax_s[i,1,1:i_NumHarm] - PartTrax_s[i,1,0]) / numpy.arange(1., i_NumHarm)); + f_Den = sum( PartTrax_s[i,1,1:i_NumHarm] ); + f_SpecDecr = f_Num / (f_Den+EPS); + + # === Spectral roll-off + f_Thresh = 0.95; + f_CumSum_v = numpy.cumsum( PartTrax_s[i,1,:]); + f_CumSumNorm_v = f_CumSum_v / (sum(PartTrax_s[i,1,:])+EPS); + i_Pos = ( f_CumSumNorm_v > f_Thresh ).nonzero()[0]; + if len(i_Pos) > 0: + f_SpecRollOff = PartTrax_s[i,0,i_Pos[0]]; + else: + f_SpecRollOff = PartTrax_s[i,0,0]; + + # === Spectral variation (Spect. Flux) + # === Insure that prev. frame has same size as current frame by zero-padding + i_Sz = max( len(PartTrax_s[i-1,1,:]), len(PartTrax_s[i,1,:]) ); + f_PrevFrm_v = numpy.zeros(i_Sz, float); + f_CurFrm_v = numpy.zeros(i_Sz, float); + + f_PrevFrm_v[0:len(PartTrax_s[i-1,1,:])] = PartTrax_s[i-1,1,:]; + f_CurFrm_v[0:len(PartTrax_s[i,1,:])] = PartTrax_s[i,1,:]; + + f_CrossProd = sum( f_PrevFrm_v * f_CurFrm_v ); + f_AutoProd = numpy.sqrt( sum(pow(f_PrevFrm_v, 2.)) * sum(pow( f_CurFrm_v, 2.)) ); + f_SpecVar = 1 - f_CrossProd / (f_AutoProd+EPS); + + # === Build output structure + desc_har.FrameErg[i-1] = f_Energy; + desc_har.HarmErg[i-1] = f_HarmErg; + desc_har.NoiseErg[i-1] = f_NoiseErg; + desc_har.Noisiness[i-1] = f_Noisiness; + desc_har.F0[i-1] = f_F0_v[i]; + desc_har.InHarm[i-1] = f_InHarm; + desc_har.TriStim1[i-1] = f_TriStim_v[0]; + desc_har.TriStim2[i-1] = f_TriStim_v[1]; + desc_har.TriStim3[i-1] = f_TriStim_v[2]; + desc_har.HarmDev[i-1] = f_HarmDev; + desc_har.OddEvenRatio[i-1] = f_OddEvenRatio; + + desc_har.SpecCent[i-1] = f_Centroid; + desc_har.SpecSpread[i-1] = f_StdDev; + desc_har.SpecSkew[i-1] = f_Skew; + desc_har.SpecKurt[i-1] = f_Kurtosis; + desc_har.SpecSlope[i-1] = f_Slope; + desc_har.SpecDecr[i-1] = f_SpecDecr; + desc_har.SpecRollOff[i-1] = f_SpecRollOff; + desc_har.SpecVar[i-1] = f_SpecVar; + + return desc_har; + +def FFTrep(s, Fs): + + #i_FFTSize = 2048; + f_WinSize_sec = 1025./44100.; + f_HopSize_sec = 256./44100.; + + i_WinSize = int(f_WinSize_sec*Fs); + i_HopSize = int(f_HopSize_sec*Fs); + i_FFTSize = int( pow(2.0, mt.nextpow2(i_WinSize)) ); + + f_Win_v = scipy.signal.hamming(i_WinSize); + f_SampRateX = float(Fs) / float(i_HopSize); + f_SampRateY = i_FFTSize / Fs; + f_BinSize = Fs / i_FFTSize; + iHWinSize = int(numpy.fix((i_WinSize-1)/2)); + + # === get input sig. (make analytic) + f_Sig_v = s; + + if numpy.isreal(f_Sig_v[0]): + f_Sig_v = scipy.signal.hilbert(f_Sig_v); + + # === pre/post-pad signal + f_Sig_v = numpy.concatenate( (numpy.zeros(iHWinSize,float), f_Sig_v, numpy.zeros(iHWinSize,float)) ); + + # === support vectors + i_Len = len(f_Sig_v); + i_Ind = numpy.arange( iHWinSize, i_Len-iHWinSize+1, i_HopSize ); #ind + i_SizeX = len(i_Ind); + i_SizeY = i_FFTSize; + f_SupX_v = numpy.arange(0, i_SizeX, 1.0) / f_SampRateX; + f_SupY_v = numpy.arange(0, i_SizeY, 1.0) / i_SizeY / 2.0; + + # === calculate power spectrum + f_DistrPts_m = numpy.zeros( (i_SizeY, i_SizeX), complex); + + for i in xrange(0, i_SizeX ): + #print "i_WinSize", i_WinSize, "iHWinSize", iHWinSize, "[a,b]", (i_Ind[i] - iHWinSize), (i_Ind[i]+iHWinSize), "\n" + f_DistrPts_m[0:i_WinSize, i] = f_Sig_v[(i_Ind[i] - iHWinSize):(i_Ind[i]+iHWinSize+1)] * f_Win_v; + + # === fft (cols of dist.) + # === Power distribution (pow) + X = scipy.fft( f_DistrPts_m, i_FFTSize, axis=0); + S_pow = 1.0 / i_FFTSize * pow( abs( X ), 2.0); + S_pow = S_pow / sum( pow(f_Win_v, 2.)); + S_pow[1:,:] = S_pow[1:,:] / 2.; + + S_mag = numpy.sqrt(1.0 / i_FFTSize) * abs( X ); + S_mag = S_mag / sum( abs(f_Win_v)); + S_mag[1:,:] = S_mag[1:,:] / 2.; + + return S_mag, S_pow, i_SizeX, i_SizeY, f_SupX_v, f_SupY_v; + +# Compute descriptor from spectral representation (FFT/ERB/GAM) +# name: FCalcDescr +# @param +# @return +# +def FCalcDescr( f_DistrPts_m, i_SizeX, i_SizeY, f_SupX_v, f_SupY_v ): + + #i_SizeY, i_SizeX = f_DistrPts_m.shape; + x_tmp = sum(f_DistrPts_m, 0)+EPS; + f_ProbDistrY_m = f_DistrPts_m / numpy.repeat( [x_tmp,], i_SizeY, 0); # === normalize distribution in Y dim + i_NumMoments = 4; # === Number of moments to compute + f_Moments_m = numpy.zeros((i_NumMoments, i_SizeX), float); # === create empty output array for moments + + # === Calculate moments + # === f_Moments_m must be empty on first iter. + f_MeanCntr_m = numpy.repeat( numpy.array([f_SupY_v,]).T, i_SizeX, 1) - numpy.repeat( numpy.array([f_Moments_m[0,:],]), i_SizeY, 0); + + for i in xrange(0, i_NumMoments): + f_Moments_m[i,:] = sum( pow(f_MeanCntr_m, float(i+1)) * f_ProbDistrY_m); + + # === Descriptors from first 4 moments + f_Centroid_v = f_Moments_m[0,:]; + f_StdDev_v = numpy.sqrt( f_Moments_m[1,:] ); + f_Skew_v = f_Moments_m[2,:] / pow(f_StdDev_v+EPS, 3.); + f_Kurtosis_v = f_Moments_m[3,:] / pow(f_StdDev_v+EPS, 4.); + + # === Spectral slope (linear regression) + f_Num_v = i_SizeY * (f_SupY_v.dot(f_ProbDistrY_m)) - numpy.sum(f_SupY_v) * sum(f_ProbDistrY_m); + f_Den = i_SizeY * sum(f_SupY_v ** 2.) - pow(sum(f_SupY_v), 2.); + f_Slope_v = f_Num_v / (EPS + f_Den); + + # === Spectral decrease (according to peeters report) + f_Num_m = f_DistrPts_m[1:i_SizeY, :] - numpy.repeat( [f_DistrPts_m[0,:],], i_SizeY-1, 0); + ## a verifier + + f_Den_v = 1. / numpy.arange(1, i_SizeY, 1.); + f_SpecDecr_v = numpy.dot(f_Den_v, f_Num_m) / numpy.sum(f_DistrPts_m+EPS, axis=0); + + # === Spectral roll-off + f_Thresh = 0.95; + f_CumSum_m = numpy.cumsum(f_DistrPts_m, axis=0); + f_Sum_v = f_Thresh * numpy.sum(f_DistrPts_m, axis=0); + i_Bin_m = f_CumSum_m > numpy.repeat( [f_Sum_v,], i_SizeY, 0 ); + tmp = numpy.cumsum(i_Bin_m, axis=0); + trash,i_Ind_v = ( tmp.T == 1 ).nonzero(); + f_SpecRollOff_v = f_SupY_v[i_Ind_v]; + + # === Spectral variation (Spect. Flux) + f_CrossProd_v = numpy.sum( f_DistrPts_m * numpy.concatenate( (numpy.zeros((1, i_SizeY), float), f_DistrPts_m[:,0:(i_SizeX-1)].T ) ).T , axis=0); + f_AutoProd_v = numpy.sum( pow(f_DistrPts_m, 2.), axis=0 ) * numpy.sum( pow( numpy.concatenate( (numpy.zeros((1,i_SizeY), float), f_DistrPts_m[:,0:(i_SizeX-1)].T)).T , 2. ) , axis=0); + + f_SpecVar_v = 1. - f_CrossProd_v / (numpy.sqrt(f_AutoProd_v) + EPS); + f_SpecVar_v[0] = f_SpecVar_v[1]; # === the first value is alway incorrect because of "c.f_DistrPts_m .* [zeros(c.i_SizeY,1)" + + # === Energy + f_Energy_v = numpy.sum(f_DistrPts_m, axis=0); + + # === Spectral Flatness + f_GeoMean_v = numpy.exp( (1. / i_SizeY) * numpy.sum(numpy.log( f_DistrPts_m+EPS ), axis=0) ); + f_ArthMean_v = numpy.sum(f_DistrPts_m, axis=0) / float(i_SizeY); + f_SpecFlat_v = f_GeoMean_v / (f_ArthMean_v+EPS); + + # === Spectral Crest Measure + f_SpecCrest_v = numpy.max(f_DistrPts_m, axis=0) / (f_ArthMean_v+EPS); + + # ============================== + # ||| Build output structure ||| + # ============================== + desc_stft = dSTFT( + SpecCent = f_Centroid_v, # spectral centroid - OK + SpecSpread = f_StdDev_v, # spectral standard deviation - OK + SpecSkew = f_Skew_v, # spectral skew - OK + SpecKurt = f_Kurtosis_v, # spectral kurtosis - OK + SpecSlope = f_Slope_v, # spectral slope - OK + SpecDecr = f_SpecDecr_v, # spectral decrease - ? + SpecRollOff = f_SpecRollOff_v, # spectral roll-off - OK + SpecVar = f_SpecVar_v, # spectral variation - OK + FrameErg = f_Energy_v, # frame energy - OK + SpecFlat = f_SpecFlat_v, # spectral flatness - OK + SpecCrest = f_SpecCrest_v) # spectral crest - OK + return desc_stft; + +# Compute Temporal escriptors from input trame_s signal +# name: time_desc(trame_s, Fs) +# @param +# @return +# +def TCalcDescr(trame_s, Fs): + Fs = float(Fs); + Fc = 5.0; + f_ThreshNoise = 0.15; + #trame_s = trame_s / (numpy.max(trame_s) + EPS); # normalize input sound + + ## compute signal enveloppe (Ok) + f_AnaSig_v = scipy.signal.hilbert(trame_s); # analytic signal + f_AmpMod_v = abs(f_AnaSig_v); # amplitude modulation of analytic signal + + ## seems to have problem with Python (replaced with Matlab version)... + #with warnings.catch_warnings(): + # warnings.simplefilter("ignore") + #[B_v, A_v] = scipy.signal.butter(3., 2 * Fc / Fs,'lowpass', False,'ba'); #3rd order Butterworth filter + #B_v = [ 4.51579830e-11]; + #A_v = [ 1., -2.99857524, 2.9971515, -0.99857626]; + + m = scipy.io.loadmat('butter3.mat'); + B_v = numpy.squeeze(m['B_v']); + A_v = numpy.squeeze(m['A_v']); + + f_Env_v = scipy.signal.lfilter(B_v, A_v, numpy.squeeze(numpy.array(f_AmpMod_v))); + + # Log-Attack (Ok) + f_LAT, f_Incr, f_Decr, f_ADSR_v = FCalcLogAttack(f_Env_v, Fs, f_ThreshNoise); + + # temporal centroid (in seconds) (Ok) + f_TempCent = FCalcTempCentroid(f_Env_v, f_ThreshNoise) / Fs; + # === Effective duration (Ok) + f_EffDur = FCalcEffectiveDur(f_Env_v, 0.4) / Fs; # effective duration (in seconds) + # === Energy modulation (tremolo) (Ok) + f_FreqMod, f_AmpMod = FCalcModulation(f_Env_v, f_ADSR_v, Fs); + + f_HopSize_sec = 128.0/44100; # === is 0.0029s at 44100Hz + f_WinLen_sec = 1024.0/44100; # === is 0.0232s at 44100Hz + step = int(round(f_HopSize_sec * Fs)); + N = int(round(f_WinLen_sec * Fs)); + win = scipy.signal.hamming(N); + l_en = len(trame_s); + i2 = numpy.arange(0, N, 1); + idx = 0; + nb_trame = int( (l_en-N) / step)+1; + f_AutoCoeffs_v = numpy.zeros( (config_s.xcorr_nb_coeff, nb_trame), float ); + f_ZcrRate_v = numpy.zeros( nb_trame, float ); + frame_ind = numpy.arange(0, N, 1); + + + for n in xrange(0, l_en-N, step): + #print "Processing frame ", (idx+1)," / ", (nb_trame),"\n" + i1 = numpy.round(int(n) + frame_ind ); + f_Frm_v = trame_s[ i1 ] * win; + + # === Autocorrelation + f_Coeffs_v = numpy.fft.fftshift( mt.xcorr(f_Frm_v + EPS) ); # GFP divide by zero issue + f_AutoCoeffs_v[:,idx] = f_Coeffs_v[ 0:(config_s.xcorr_nb_coeff)]; # only save 12 coefficients + #=== Zero crossing rate + i_Sign_v = numpy.sign( f_Frm_v - numpy.mean(f_Frm_v) ); + i_Zcr_v = numpy.diff(i_Sign_v).nonzero()[0]; + f_ZcrRate_v[idx] = len(i_Zcr_v) / (len(f_Frm_v) / Fs); + idx = idx + 1; + + ## Store results + dTEE_s = dTEE( + Att = f_ADSR_v[0], + Dec = f_ADSR_v[1], + Rel = f_ADSR_v[4], + LAT = f_LAT, # === log attack time + AttSlope = f_Incr, # === temporal increase + DecSlope = f_Decr, # === temporal decrease + TempCent = f_TempCent, # === temporal centroid + EffDur = f_EffDur, # === effective duration + FreqMod = f_FreqMod, # === energy modulation frequency + AmpMod = f_AmpMod, # === energy modulation amplitude + RMSEnv = f_Env_v); # === RMS + + dAS_s = dAS( + AutoCorr = f_AutoCoeffs_v, + ZcrRate = f_ZcrRate_v); + return dTEE_s, dAS_s; + + +#################################################################################### +#### Sub-functions #### +#################################################################################### +# y = outmidear(x, Fs) - Tested OK +# name: unknown +# @param +# @return +# +def outmidear(x, Fs): + maf, f = isomaf([], 'killion'); # minimum audible field sampled at f + g,tg = isomaf([1000])-maf; # gain re: 1kHz + g = pow(10., g/20.); # dB to lin + f = numpy.concatenate( ([0], f, [20000]) ); # add 0 and 20 kHz points + g = numpy.concatenate( ([EPS], g, [ g[len(g)-1]] )); # give them zero amplitude + + if (Fs/2.) > 20000: + f = numpy.concatenate( ( f, numpy.array( [ numpy.squeeze(Fs)/2.]) )); + g = numpy.concatenate( ( g, numpy.array([g[len(g)-1]]) )); + + # Model low frequency part with 2 cascaded second-order highpass sections: + fc = 680.; # Hz - corner frequency + q = 0.65; # quality factor + pwr = 2; # times to apply same filter + a = sof( fc / Fs, q); # second order low-pass + b = numpy.concatenate( ([sum(a)-1], [-a[1]], [-a[2]]) ); # convert to high-pass + + for k in xrange(0, pwr): + x = scipy.signal.lfilter(b,a,x); + + # Transfer function of filter applied: + ff, gg = scipy.signal.freqz(b,a); + gg = pow(abs(gg), float(pwr)); + ff = ff * Fs / (2.* numpy.pi); + + # Transfer function that remains to apply: + g = mt.my_interpolate(f, g, ff, 'linear'); + gain = g / (gg+EPS); + gain[ (ffmax(freqs)).nonzero()[0]; + mafs[I2] = datamaf[len(datamaf)-1]; + return mafs,f + +def FCalcLogAttack(f_Env_v, Fs, f_ThreshNoise): + Fs = float(Fs); + my_eps = pow(10.0,-3.0); #increase if errors occur + f_ThreshDecr = 0.4; + percent_step = 0.1; + param_m1 = int(round(0.3/percent_step)); # === BORNES pour calcul mean + param_m2 = int(round(0.6/percent_step)); + param_s1att = int(round(0.1/percent_step)); # === BORNES pour correction satt (start attack) + param_s2att = int(round(0.3/percent_step)); + param_e1att = int(round(0.5/percent_step)); # === BORNES pour correction eatt (end attack) + param_e2att = int(round(0.9/percent_step)); + param_mult = 3.0; # === facteur multiplicatif de l'effort + + # === calcul de la pos pour chaque seuil + f_EnvMax = max(f_Env_v); #i_EnvMaxInd + f_Env_v = f_Env_v / (f_EnvMax+EPS); # normalize by maximum value + + percent_value_v = mt.my_linspace(percent_step, 1.0, percent_step); # numpy.arange(percent_step, 1.0, percent_step) + nb_val = int(len(percent_value_v)); + #numpy.linspace(percent_step, 1, nb_val); + #nb_val = int(numpy.round(1.0/ percent_step)); + percent_value_v = numpy.linspace(percent_step, 1, nb_val); + percent_posn_v = numpy.zeros(nb_val, int); + + for p in xrange(0, nb_val): + pos_v = (f_Env_v >= percent_value_v[p]-my_eps).nonzero()[0]; + if len(pos_v) > 0: + percent_posn_v[p] = pos_v[0]; + else: + x, percent_posn_v[p] = mt.my_max(f_Env_v); + + # === NOTATION + # satt: start attack + # eatt: end attack + #==== detection du start (satt_posn) et du stop (eatt_posn) de l'attaque + pos_v = (f_Env_v > f_ThreshNoise).nonzero()[0]; + dpercent_posn_v = numpy.diff(percent_posn_v); + M = numpy.mean(dpercent_posn_v[(param_m1-1):param_m2]); + + # === 1) START ATTACK + pos2_v = ( dpercent_posn_v[ (param_s1att-1):param_s2att ] > param_mult*M ).nonzero()[0]; + if len(pos2_v) > 0: + result = pos2_v[len(pos2_v)-1]+param_s1att+1; + else: + result = param_s1att; + result = result -1; + + satt_posn = percent_posn_v[result]; + + # === raffinement: on cherche le minimum local + n = percent_posn_v[result]; + delta = int(numpy.round(0.25*(percent_posn_v[result+1]-n))); + + if n-delta >= 0: + min_value, min_pos = mt.my_min(f_Env_v[ int(n-delta):int(n+delta) ]); + satt_posn = min_pos + n-delta; + + # === 2) END ATTACK + pos2_v = (dpercent_posn_v[(param_e1att-1):param_e2att] > param_mult*M).nonzero()[0]; + if len(pos2_v) >0: + result = pos2_v[0]+param_e1att-1; + else: + result = param_e2att-1; + + # === raffinement: on cherche le maximum local + delta = int(numpy.round(0.25*( percent_posn_v[result] - percent_posn_v[result-1] ))); + n = percent_posn_v[result]; + if n+delta < len(f_Env_v): + #print "n", n, " delta", delta, "len(f_Env_v)", len(f_Env_v),"\n" + max_value, max_pos = mt.my_max(f_Env_v[int(n-delta):int(n+delta)]); + eatt_posn = max_pos + n-delta;#-1; + + # === D: Log-Attack-Time + if satt_posn == eatt_posn: + satt_posn = satt_posn - 1; + + risetime_n = (eatt_posn - satt_posn); + f_LAT = numpy.log10(risetime_n/Fs); + + # === D: croissance temporelle + satt_value = f_Env_v[satt_posn]; + eatt_value = f_Env_v[eatt_posn]; + seuil_value_v = numpy.arange(satt_value, eatt_value, 0.1); + + seuil_possec_v = numpy.zeros(len(seuil_value_v), float); + for p in xrange(0, len(seuil_value_v)): + pos3_v = ( f_Env_v[ satt_posn:(eatt_posn+1) ] >= seuil_value_v[p] ).nonzero()[0]; + seuil_possec_v[p] = pos3_v[0]/Fs; + + + pente_v = numpy.diff( seuil_value_v) / numpy.diff(seuil_possec_v); + mseuil_value_v = 0.5*(seuil_value_v[0:(len(seuil_value_v)-1)]+seuil_value_v[1:]); + weight_v = numpy.exp( -pow(mseuil_value_v-0.5,2.0) / 0.25); + f_Incr = numpy.sum( pente_v * weight_v) / (EPS+numpy.sum(weight_v)); + tempsincr = numpy.arange( satt_posn, eatt_posn+1); + tempsincr_sec_v = tempsincr / Fs; + const = numpy.mean( f_Env_v[ numpy.round(tempsincr)] - f_Incr*tempsincr_sec_v); + mon_poly_incr = numpy.concatenate(([f_Incr],[const])); + mon_poly_incr2 = numpy.polyfit(tempsincr_sec_v, f_Env_v[tempsincr], 1); + incr2 = mon_poly_incr2[0]; + + # === D: decroissance temporelle + fEnvMax, iEnvMaxInd = mt.my_max(f_Env_v); + iEnvMaxInd = round(0.5*(iEnvMaxInd+eatt_posn)); + pos_v = (f_Env_v > f_ThreshDecr).nonzero()[0]; + stop_posn = pos_v[len(pos_v)-1]; + + if iEnvMaxInd == stop_posn: + if stop_posn < len(f_Env_v): + stop_posn = stop_posn+1; + else: + if iEnvMaxInd>1: + iEnvMaxInd = iEnvMaxInd-1; + + tempsdecr = numpy.array(range(int(iEnvMaxInd), int(stop_posn+1))); + tempsdecr_sec_v = tempsdecr / Fs; + mon_poly_decr = numpy.polyfit( tempsdecr_sec_v, numpy.log( f_Env_v[tempsdecr]+mt.EPS), 1); + f_Decr = mon_poly_decr[0]; + + # === D: enveloppe ADSR = [A(1) | A(2)=D(1) | D(2)=S(1) | S(2)=D(1) | D(2)] + f_ADSR_v = numpy.array( [satt_posn, iEnvMaxInd, 0.0, 0.0, stop_posn] ) / Fs; + return f_LAT, f_Incr, f_Decr, f_ADSR_v + +def FCalcTempCentroid(f_Env_v, f_Thresh): + f_MaxEnv, i_MaxInd = mt.my_max(f_Env_v); + f_Env_v = f_Env_v / f_MaxEnv; # normalize + i_Pos_v = (f_Env_v > f_Thresh).nonzero()[0]; + i_StartFrm = i_Pos_v[0]; + if i_StartFrm == i_MaxInd: + i_StartFrm = i_StartFrm - 1; + i_StopFrm = i_Pos_v[len(i_Pos_v)-1]; + f_Env2_v = f_Env_v[range(i_StartFrm, i_StopFrm+1)]; + f_SupVec_v = numpy.array(range(1, len(f_Env2_v)+1))-1; + f_Mean = numpy.sum( f_SupVec_v * f_Env2_v) / numpy.sum(f_Env2_v); # centroid + f_TempCent = (i_StartFrm + f_Mean); # temporal centroid (in samples) + return f_TempCent + +def FCalcEffectiveDur(f_Env_v, f_Thresh): + f_MaxEnv, i_MaxInd = mt.my_max(f_Env_v); #=== max value and index + f_Env_v = f_Env_v / f_MaxEnv; # === normalize + i_Pos_v = (f_Env_v > f_Thresh).nonzero()[0]; + + i_StartFrm = i_Pos_v[0]; + if i_StartFrm == i_MaxInd: + i_StartFrm = i_StartFrm - 1; + i_StopFrm = i_Pos_v[len(i_Pos_v)-1]; + f_EffDur = (i_StopFrm - i_StartFrm + 1); + return f_EffDur; + +def FCalcModulation(f_Env_v, f_ADSR_v, Fs): + + # do.method = 'fft'; % === 'fft' 'hilbert' + sustain_Thresh = 0.02; #in sec + envelopfull_v = f_Env_v; + tempsfull_sec_v = numpy.arange(0, len(f_Env_v), 1.) / Fs; + + sr_hz = 1.0/numpy.mean( numpy.diff(tempsfull_sec_v)); + ss_sec = f_ADSR_v[1]; # === start sustain + es_sec = f_ADSR_v[4]; # === end sustain + + flag_is_sustained = 0; + if (es_sec - ss_sec) > sustain_Thresh: # === if there is a sustained part + a = (ss_sec <= tempsfull_sec_v).nonzero()[0]; + b = (tempsfull_sec_v <= es_sec).nonzero()[0]; + pos_v = ( (ss_sec <= tempsfull_sec_v) & (tempsfull_sec_v <= es_sec)).nonzero()[0]; + if len(pos_v) > 0: + flag_is_sustained = 1; + + if flag_is_sustained == 1: + envelop_v = envelopfull_v[pos_v]; + temps_sec_v = tempsfull_sec_v[pos_v]; + M = numpy.mean(envelop_v); + + # === TAKING THE ENVELOP + mon_poly = numpy.polyfit(temps_sec_v, numpy.log(envelop_v+EPS), 1); + hatenvelop_v= numpy.exp(numpy.polyval(mon_poly, temps_sec_v)); + signal_v = envelop_v - hatenvelop_v; + + L_n = len(signal_v); + N = int(round(max(numpy.array([sr_hz, pow(2.0, mt.nextpow2(L_n))])))); + fenetre_v = scipy.hamming(L_n); + norma = numpy.sum(fenetre_v) ; + fft_v = scipy.fft(signal_v * fenetre_v*2/norma, N); + ampl_v = numpy.abs(fft_v); + phas_v = numpy.angle(fft_v); + freq_v = mt.Index_to_freq( numpy.arange(0,N,1, float), sr_hz, N); + + param_fmin = 1.0; + param_fmax = 10.0; + pos_v = ( (freq_v < param_fmax) & (freq_v > param_fmin) ).nonzero()[0]; + + pos_max_v = Fcomparepics2(ampl_v[pos_v], 2); + + if len(pos_max_v) > 0: + max_value, max_pos = mt.my_max( ampl_v[ pos_v[ pos_max_v]]); + max_pos = pos_v[pos_max_v[max_pos]]; + else: + max_value, max_pos = mt.my_max(ampl_v[pos_v]); + max_pos = pos_v[max_pos]; + + MOD_am = max_value/(M+EPS); + MOD_fr = freq_v[max_pos]; + MOD_ph = phas_v[max_pos]; + + else: # === if there is NO sustained part + MOD_fr = 0; + MOD_am = 0; + + return MOD_fr, MOD_am + +def Fcomparepics2(input_v, lag_n=2, do_affiche=0, lag2_n=0, seuil=0): + if lag2_n == 0: + lag2_n = 2*lag_n; + L_n = len(input_v); + pos_cand_v = (numpy.diff( numpy.sign( numpy.diff(input_v))) < 0).nonzero()[0]; + + pos_cand_v = pos_cand_v+1; + pos_max_v = numpy.array([],int); + + for p in xrange(0, len(pos_cand_v)): + pos = pos_cand_v[p]; + i1 = (pos-lag_n); + i2 = (pos+lag_n+1); + i3 = (pos-lag2_n); + i4 = (pos+lag2_n+1); + + if (i1 >= 0) and (i2 <= L_n): + tmp = input_v[i1:i2]; + maximum, position = mt.my_max(tmp); + position = position + i1; + + if (i3>=0) and (i4<=L_n): + tmp2 = input_v[i3:i4]; + if (position == pos) and (input_v[position] > seuil*numpy.mean(tmp2)): + pos_max_v = numpy.concatenate( (pos_max_v, numpy.array([pos])) ); + + if lag_n < 2: + if input_v[0] > input_v[1]: + pos_max_v = numpy.arange(0,pos_max_v); + if input_v[len(input_v)-1] > input_v[len(input_v)-1]: + pos_max_v = numpy.concatenate( (pos_max_v, numpy.array([L_n]))); + + return pos_max_v; + diff --git a/timeside/analyzer/labri_instru.py b/timeside/analyzer/labri_instru.py new file mode 100644 index 0000000..2a943d5 --- /dev/null +++ b/timeside/analyzer/labri_instru.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +# +# labri_instru_detect.py +# +# Copyright (c) 2014 Dominique Fourer + +# This file is part of TimeSide. + +# TimeSide is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# TimeSide is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with TimeSide. If not, see . + +# Author: D Fourer http://www.fourer.fr + +from timeside.core import Processor, implements, interfacedoc, FixedSizeInputAdapter +from timeside.analyzer.core import * +from timeside.api import IValueAnalyzer +from timeside.api import IAnalyzer +import numpy, scipy +from timeside.analyzer.preprocessors import frames_adapter + +## plugin specific +import sys, os +REL_PATH='labri'; +PLUGIN_PATH=os.path.join(timeside.__path__[0], 'REL_PATH'); +sys.path.append(PLUGIN_PATH); +sys.path.append(REL_PATH); ## can be commented +import timbre_descriptor +import my_tools as mt +import my_lda + +class LABRIInstru(Analyzer): + implements(IAnalyzer) + + @interfacedoc + def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): + super(LABRIInstru, self).setup(channels, samplerate, blocksize, totalframes) + # do setup things... + #n_max = 8; + + ## Load models + parameters + m = scipy.io.loadmat('model_inst.mat'); + self.nb_desc = numpy.squeeze(numpy.array(m['nb_desc'])); ## nb_desc + self.i_fs = numpy.squeeze(numpy.array(m['i_fs']))-1; ## IRMFSP indices + self.Vect = numpy.squeeze(numpy.array(m['Vect'])); ## Projection basis + self.n_max = numpy.shape(self.Vect)[0]; + self.repr_mu = numpy.squeeze(numpy.array(m['repr'])); + self.repr_sigma = numpy.squeeze(numpy.array(m['repr2'])) + 0.0; #+ mt.EPS + self.inst = numpy.squeeze(numpy.array(m['inst'])); ## instrument name / structure + self.T_NOISE = -60; ## noise threshold in dB + + self.container = AnalyzerResultContainer(); + self.signal = numpy.array([],float); + self.processed = False; + self.param_val = 0; + self.field_name = 0; + self.Fs = self.samplerate(); + self.cur_pos = 0; + self.famille = ""; + self.jeu = ""; + + @staticmethod + @interfacedoc + def id(): + return "labri_instru" + + @staticmethod + @interfacedoc + def name(): + return "Labri instrument classification / detection system" + + @staticmethod + @interfacedoc + def unit(): + # return the unit of the data dB, St, ... + return "Instrument name / index" + + def process(self, frames, eod=False): + + N = len(frames); + self.cur_pos += N; + time = (self.cur_pos - N/2.)/ self.Fs; #current time + self.signal = numpy.concatenate( (self.signal, numpy.squeeze(numpy.array(frames)))); + + ## wait until the end is reached to process file + if eod and not self.processed: + + desc = timbre_descriptor.compute_all_descriptor(self.signal, self.Fs); + param_val, self.field_name = timbre_descriptor.temporalmodeling(desc); + self.param_val = numpy.array([param_val[self.i_fs],]); + + ## estimate instrument family + gr1, gr2, p1, p2 = my_lda.pred_lda(numpy.real(self.param_val)+mt.EPS, self.Vect, self.repr_mu, self.repr_sigma); + i_res = gr1[0]; ## use euclidean distance criterion as default + + self.famille = self.inst[i_res][0][0]; + self.jeu = ""; + if i_res > 0: + self.jeu = self.inst[i_res][1][0]; + + ## uncomment for debug + #print "Detected as ", self.famille," - ",self.jeu, " according to res1: res1=",gr1[0]," res2=", gr2[0],"p1=",p1[0]," p2=", p2[0],"\n\n"; + self.result_param= numpy.array([gr1, gr2, p1, p2]); + self.result_data = self.famille+" - "+self.jeu; + + res1 = self.new_result(data_mode='label', time_mode='global'); + res1.id_metadata.id += '.' + 'instrument_label'; + res1.id_metadata.name += ' ' + 'Instrument Label'; + res1.data_object.label_metadata.label= self.result_data; + self.results.add(res1); ##store results in correct format ?? + + res2 = self.new_result(data_mode='value', time_mode='global'); + res2.id_metadata.id += '.' + 'instrument_label'; + res2.id_metadata.name += ' ' + 'Instrument Label'; + res2.data_object.value = numpy.array([gr1, gr2]); ## instrument index + res2.data_object.y_value = numpy.array([p1, p2]); ## confidence value + self.results.add(res2); ##store results as numeric in correct format ?? + + self.processed = True; + + return frames, eod; -- 2.39.5