}
@Misc{telemetaCREM,
- key = "telemetaCREM",
+ key = "telemetaCREM",
title = "Archives sonores du {CNRS} - {Musée de l'Homme}",
howpublished = {\url{http://archives.crem-cnrs.fr}}
}
school = {Centre for Digital music, Queen Mary University of
London, UK},
year = 2006,
- url = {http://aubio.org/}
-
+ url = {http://aubio.org/}
}
@inproceedings{yaafe_ISMIR2010,
and Prado, Jacques and Richard, Gaël},
title = {Yaafe, An Easy To Use And Efficient Audio Feature
Extraction Software},
- booktitle = {Proc. of the International Society for Music Information Retrieval Conference, Utrecht, Netherlands},
+ booktitle = ISMIR,
+ address = {Utrecht, Netherlands},
year = 2010,
pages = {441-446},
- howpublished = {\url{http://ismir2010.ismir.net/proceedings/ismir2010-75.pdf}}
+ howpublished =
+ {\url{http://ismir2010.ismir.net/proceedings/ismir2010-75.pdf}}
}
@Misc{vamp-plugins,
- title = {The {Vamp} audio analysis plugin system},
+ title = {The {Vamp} audio analysis plugin system},
howpublished = {\url{http://www.vamp-plugins.org}}
}
@Misc{Simonnot_ICTM_2014,
- author = {Simonnot, Joséphine and Mifune, Marie-France and Lambert, Jean},
- title = {TELEMETA: Resources of an online archive of ethnomusicological recordings},
- howpublished = {Panel presented at ICTM Study Group on Historical Sources of Traditional Music, Aveiro, Portugal, May 12-17 2014},
- year = 2014}
+ author = {Simonnot, Joséphine and Mifune, Marie-France and
+ Lambert, Jean},
+ title = {TELEMETA: Resources of an online archive of
+ ethnomusicological recordings},
+ howpublished = {Panel presented at ICTM Study Group on Historical
+ Sources of Traditional Music, Aveiro, Portugal, May
+ 12-17 2014},
+ year = 2014
+}
@Article{Simmonot_IASA_2011,
- author = {Simonnot, Joséphine},
- title = {{TELEMETA}: an audio Content Management System for the Web},
- journal = {International Association of Sound and Audiovisual Archives journal},
- year = 2011,
- volume = 36,
- month = {jan}}
+ author = {Simonnot, Joséphine},
+ title = {{TELEMETA}: an audio Content Management System for
+ the Web},
+ journal = {International Association of Sound and Audiovisual
+ Archives journal},
+ year = 2011,
+ volume = 36,
+ month = {jan}
+}
@Article{Julien_IASA_2011,
- author = {Julien Da Cruz Lima, Aude},
- title = {The {CNRS} — {M}usée de l’{H}omme audio archives: a short introduction},
- journal = {International Association of Sound and Audiovisual Archives journal},
- year = 2011,
- volume = 36,
- month = {jan}}
-
+ author = {Julien Da Cruz Lima, Aude},
+ title = {The {CNRS} — {M}usée de l’{H}omme audio archives: a
+ short introduction},
+ journal = {International Association of Sound and Audiovisual
+ Archives journal},
+ year = 2011,
+ volume = 36,
+ month = {jan}
+}
@article{Gomez_JNMR_2013,
-author = {Gómez, Emilia and Herrera, Perfecto and Gómez-Martin, Francisco},
-title = {Computational Ethnomusicology: perspectives and challenges},
-journal = {Journal of New Music Research},
-volume = {42},
-number = {2},
-pages = {111-112},
-year = {2013},
-doi = {10.1080/09298215.2013.818038},
-
-URL = {http://www.tandfonline.com/doi/abs/10.1080/09298215.2013.818038},
-eprint = {http://www.tandfonline.com/doi/pdf/10.1080/09298215.2013.818038}
+ author = {Gómez, Emilia and Herrera, Perfecto and
+ Gómez-Martin, Francisco},
+ title = {Computational Ethnomusicology: perspectives and
+ challenges},
+ journal = {Journal of New Music Research},
+ volume = 42,
+ number = 2,
+ pages = {111-112},
+ year = 2013,
+ doi = {10.1080/09298215.2013.818038},
+ URL =
+ {http://www.tandfonline.com/doi/abs/10.1080/09298215.2013.818038},
+ eprint =
+ {http://www.tandfonline.com/doi/pdf/10.1080/09298215.2013.818038}
}
@Article{Tzanetakis_2007_JIMS,
- author = {Tzanetakis, George and Kapur, Ajay and Schloss, W. Andrew and Wright, Matthew},
- title = {Computational ethnomusicology},
- journal = {Journal of Interdisciplinary Music Studies},
- year = 2007,
- volume = 1,
- number = 2,
- pages = {1-24},
- url = {http://www.musicstudies.org/CompEthno_JIMS_071201.pdf},
+ author = {Tzanetakis, George and Kapur, Ajay and Schloss,
+ W. Andrew and Wright, Matthew},
+ title = {Computational ethnomusicology},
+ journal = {Journal of Interdisciplinary Music Studies},
+ year = 2007,
+ volume = 1,
+ number = 2,
+ pages = {1-24},
+ url =
+ {http://www.musicstudies.org/CompEthno_JIMS_071201.pdf},
}
@article{barras2006multistage,
- title={Multistage speaker diarization of broadcast news},
- author={Barras, Claude and Zhu, Xuan and Meignier, Sylvain and Gauvain, J},
- journal=TASLP,
- volume=14,
- number=5,
- pages={1505--1512},
- year=2006,
- publisher={IEEE}
+ title = {Multistage speaker diarization of broadcast news},
+ author = {Barras, Claude and Zhu, Xuan and Meignier, Sylvain
+ and Gauvain, J},
+ journal = TASLP,
+ volume = 14,
+ number = 5,
+ pages = {1505--1512},
+ year = 2006,
+ publisher = {IEEE}
}
@inproceedings{cannam2006sonic,
- title={The Sonic Visualiser: A Visualisation Platform for Semantic Descriptors from Musical Signals.},
- author={Cannam, Chris and Landone, Christian and Sandler, Mark B and Bello, Juan Pablo},
- booktitle={Proc. of the International Society for Music Information Retrieval Conference},
- pages={324--327},
- year=2006
+ title = {The Sonic Visualiser: A Visualisation Platform for
+ Semantic Descriptors from Musical Signals.},
+ author = {Cannam, Chris and Landone, Christian and Sandler,
+ Mark B and Bello, Juan Pablo},
+ booktitle = ISMIR,
+ address = {Victoria, Canada},
+ pages = {324--327},
+ year = 2006
}
-
@Article{DeCheveigne2002,
- author = {De Cheveigné, A. and Kawahara, H.},
- title = {YIN, a Fundamental Frequency Estimator for Speech and Music},
- journal = JASA,
- year = 2002,
- volume = 111,
- number = 4,
- pages = {1917-1930}}
-
-
+ author = {De Cheveigné, A. and Kawahara, H.},
+ title = {YIN, a Fundamental Frequency Estimator for Speech
+ and Music},
+ journal = JASA,
+ year = 2002,
+ volume = 111,
+ number = 4,
+ pages = {1917-1930}
+}
@Article{Houtgast1985,
- author = {Houtgast T. and Steeneken, J. M. },
- title = {{A Review of the MTF Concept in Room Acoustics and its Use for Estimating Speech Intelligibility in Auditoria}},
- journal = JASA,
- year = 1985,
- volume = 77,
- number = 3,
- pages = {1069-1077}}
-
-
+ author = {Houtgast T. and Steeneken, J. M. },
+ title = {{A Review of the MTF Concept in Room Acoustics and
+ its Use for Estimating Speech Intelligibility in
+ Auditoria}},
+ journal = JASA,
+ year = 1985,
+ volume = 77,
+ number = 3,
+ pages = {1069-1077}
+}
@Article{Lachambre2011,
- author = {Lachambre, Hélène and Pinquier, Julien and André-Obrecht, Régine},
- title = {Distinguishing Monophonies from Polyphonies using {W}eibull {B}ivariate Distributions},
- journal = TASLP,
- year = 2011,
- volume = 19,
- number = 6,
- pages = {1837-1842},
- month = {august}}
-
-
+ author = {Lachambre, Hélène and Pinquier, Julien and
+ André-Obrecht, Régine},
+ title = {Distinguishing Monophonies from Polyphonies using
+ {W}eibull {B}ivariate Distributions},
+ journal = TASLP,
+ year = 2011,
+ volume = 19,
+ number = 6,
+ pages = {1837-1842},
+ month = {august}
+}
@Article{Obrecht1988,
- author = {André-Obrecht, Régine},
- title = {A New Statistical Approach for Automatic Speech Segmentation},
- journal = {IEEE Trans. on Audio, Speech, and Signal Processing},
- year = 1988,
- volume = 36,
- number = 1,
- pages = {29-40},
- month = {january}}
+ author = {André-Obrecht, Régine},
+ title = {A New Statistical Approach for Automatic Speech
+ Segmentation},
+ journal = {IEEE Trans. on Audio, Speech, and Signal Processing},
+ year = 1988,
+ volume = 36,
+ number = 1,
+ pages = {29-40},
+ month = {january}
+}
@InProceedings{Pinquier2003,
- author = {Pinquier, Julien and Rouas, Jean-Luc and André-Obrecht, Régine},
- title = {A Fusion Study in Speech / Music Classification},
- booktitle = {IEEE International Conference on Audio, Speech and Signal Processing, Hong-Kong, China},
- year = 2003,
- month = {april}}
+ author = {Pinquier, Julien and Rouas, Jean-Luc and
+ André-Obrecht, Régine},
+ title = {A Fusion Study in Speech / Music Classification},
+ booktitle = {IEEE International Conference on Audio, Speech and
+ Signal Processing, Hong-Kong, China},
+ year = 2003,
+ month = {april}
+}
@Article{Urban88,
- author = {Urban, Greg},
- title = {{Ritual Wailing in Amerindian Brazil}},
- journal = {American Anthropologist},
- year = 1988,
- volume = 90,
- number = 2,
- pages = {385-400}}
+ author = {Urban, Greg},
+ title = {{Ritual Wailing in Amerindian Brazil}},
+ journal = {American Anthropologist},
+ year = 1988,
+ volume = 90,
+ number = 2,
+ pages = {385-400}
+}
@article{taxonomy_sachs,
- author = {E. v. Hornbostel and C. Sachs},
- title = {The classification of musical instruments},
- journal = {Galpin Society Journal},
- year = 1961,
- volume = 3,
- number = 25,
- pages = {3--29}
+ author = {E. v. Hornbostel and C. Sachs},
+ title = {The classification of musical instruments},
+ journal = {Galpin Society Journal},
+ year = 1961,
+ volume = 3,
+ number = 25,
+ pages = {3--29}
}
@article{taxonomy_sachs2,
- author = {E. v. Hornbostel and C. Sachs},
- title = {Systematik der Musikinstrumente},
- journal = {Zeitschrift für Ethnologie},
- year = {1914},
- volume = {46},
- number = {},
- pages = {553--590}
+ author = {E. v. Hornbostel and C. Sachs},
+ title = {Systematik der Musikinstrumente},
+ journal = {Zeitschrift für Ethnologie},
+ year = 1914,
+ volume = 46,
+ pages = {553--590}
}
- @article{timbre_toolbox,
- author = {G. Peeters and B. Giordano and P. Susini and N. Misdariis and S. McAdams},
- title = {The Timbre Toolbox: Audio descriptors of musical signals},
- journal = JASA,
- year = {2011},
- month = {Nov.},
- volume = {5},
- number = {130},
- pages = {2902--2916}
+@article{timbre_toolbox,
+ author = {G. Peeters and B. Giordano and P. Susini and
+ N. Misdariis and S. McAdams},
+ title = {The Timbre Toolbox: Audio descriptors of musical
+ signals},
+ journal = JASA,
+ year = 2011,
+ month = {Nov.},
+ volume = 5,
+ number = 130,
+ pages = {2902--2916}
}
-
@book{lda_book,
- author = {T. W. Anderson},
- title = {An Introduction to Multivariate Statistical Analysis},
- publisher = {Wiley-Blackwell},
- year = {1958},
- edition = {},
- address = {New York, USA}
+ author = {T. W. Anderson},
+ title = {An Introduction to Multivariate Statistical
+ Analysis},
+ publisher = {Wiley-Blackwell},
+ year = 1958,
+ address = {New York, USA}
}
@inproceedings{aes_irmfsp,
- author = {G. Peeters},
- booktitle = {115th convention of AES},
- title = {Automatic classification of large musical instrument databases using hierarchical classifiers with intertia ratio maximization},
- year = {2003},
- month = {Oct.},
- address = {New York, USA}
+ author = {G. Peeters},
+ booktitle = {115th convention of AES},
+ title = {Automatic classification of large musical instrument
+ databases using hierarchical classifiers with
+ intertia ratio maximization},
+ year = 2003,
+ month = {Oct.},
+ address = {New York, USA}
}
@inproceedings{ismir14_dfourer,
- author = {D. Fourer and J-L. Rouas and Pierre Hanna and Matthias Robine},
- booktitle = {Proc. International Society for Music Information Retrieval Conference (ISMIR'2014)},
- title = {Automatic timbre classification of ethnomusicological audio recordings},
- year = 2014,
- month = {Oct.},
- address = {Taipei, Taiwan},
- note = {Accepted for publication}
+ author = {D. Fourer and J-L. Rouas and Pierre Hanna and
+ Matthias Robine},
+ booktitle = ISMIR,
+ title = {Automatic timbre classification of
+ ethnomusicological audio recordings},
+ year = 2014,
+ month = {Oct.},
+ address = {Taipei, Taiwan},
+ note = {Accepted for publication}
}
-
@InBook{Dournon92,
- author = {Dournon, Geneviève},
- title = {The New Grove Handbook in Music},
- chapter = {"Organology", Ethnomusicology, an Introduction},
- publisher = {Macmillan Press},
- year = 1992,
- edition = {DOURNON, Geneviève},
- pages = {245-300}}
+ author = {Dournon, Geneviève},
+ title = {The New Grove Handbook in Music},
+ chapter = {"Organology", Ethnomusicology, an Introduction},
+ publisher = {Macmillan Press},
+ year = 1992,
+ edition = {DOURNON, Geneviève},
+ pages = {245-300}
+}
@inproceedings{gravier2012etape,
- title={The ETAPE corpus for the evaluation of speech-based TV content processing in the French language},
- author={Gravier, Guillaume and Adda, Gilles and Paulson, Niklas and Carr{\'e}, Matthieu and Giraudel, Aude and Galibert, Olivier and others},
- booktitle={International Conference on Language Resources, Evaluation and Corpora},
- year=2012
+ title = {The ETAPE corpus for the evaluation of speech-based
+ TV content processing in the French language},
+ author = {Gravier, Guillaume and Adda, Gilles and Paulson,
+ Niklas and Carr{\'e}, Matthieu and Giraudel, Aude
+ and Galibert, Olivier and others},
+ booktitle = {International Conference on Language Resources,
+ Evaluation and Corpora},
+ year = 2012
}
-
-
-
\section{Expanding development: the DIADEMS project}\label{sec:Diadems}
-The goals and expectations of the platform are of many kinds and expand through time, as users experience new ways to work with the archives database and request new tools to broaden the scope of their research activities linked to it. The reflexion collectively engaged by engineers and researchers on the use of the sound archives database led us to set up a large scale project called DIADEMS (\emph{Description, Indexation, Access to Ethnomusicological and Sound Documents})\footnote{\url{http://www.irit.fr/recherches/SAMOVA/DIADEMS/en/welcome/}}.
+The goals and expectations of the platform are of many kinds and expand through time, as users experience new ways to work with the archives database and request new tools to broaden the scope of their research activities linked to it. The reflection collectively engaged by engineers and researchers on the use of the sound archives database led us to set up a large scale project called DIADEMS (\emph{Description, Indexation, Access to Ethnomusicological and Sound Documents})\footnote{\url{http://www.irit.fr/recherches/SAMOVA/DIADEMS/en/welcome/}}.
%DIADEMS is a French national research program, started in January 2013, with three IT research labs (IRIT\footnote{Institut de Recherche en Informatique de Toulouse}, , , LIMSI\footnote{Laboratoire d’Informatique pour la Mécanique et les Sciences de l’Ingénieur}, LABRI\footnote{Laboratoire Bordelais de Recherche en Informatique})\comment{TF: + LAM + labo ethno + Parisson. Plutôt dire a collaboration between ethno + IT}
Started in January 2013, the French national research program DIADEMS is a multi-disciplinary project whose consortium includes research laboratories from \emph{ Science and Technology of Information and Communication}\footnote{IRIT (Institute of research in computing science of Toulouse), LABRI (Bordeaux Computer Science Research Laboratory), LIMSI (Laboratory of computing and mechanics for engineering sciences), LAM (String instruments - Acoustic - Music, Jean Le Rond d'Alembert Institute)} (IT) domain, \emph{Musicology and Ethnomusicology}\footnote{LESC (Laboratory of Ethnology and Comparative Sociology), MNHN (National Museum of Natural History)} domain and Parisson, a company involved in the development of Telemeta.
\subsection{The method of a new interdisciplinary research}
-In this research program, groups from different backgrounds are working together to specify the automatic analysis tools: IT developers, humanities researchers (anthropologists, ethnomusicologists, ethnolinguists) and specialists in speech processing and MIR. The first challenge was to initiate a common interest and a mutual understanding. In this process, DIADEMS gave us the opportunity to improve our understanding on the link between the semantics and acoustics of voice production. As a prelimirary work we attempted to first define vocal categories with a particular interest for liminal oral productions. At the border between speech and song, utterances such as psalmody or recitation are at the center of an old debate in ethnomusicology\footnote{A colloquium on liminal utterances between speech and song will be organised by the International Council for Traditional Music (ICTM) in May 2015 and hosted by the Centre of research in Ethnomusicology (CREM). A round table will be dedicated to the presentation of the main results and findings of the ANR project Diadems}. Gathering specialists from various fields, DIADEMS project goes well beyond the usual disciplinary boundaries. Our aim, through the study of a large range of audio components (pitch range, syllabic flow, metric, polyphonic and so on) is to define and characterize the variability of vocal productions, keeping in mind the semantic aspects. By doing so, we wish to reduce the traditional gap in academic studies between sounds and semantics and to propose combined analytical tools for the study of vocal production\footnote{As an example, research will be conducted on the recognition of "icons of crying"
+In this research program, groups from different backgrounds are working together to specify the automatic analysis tools: IT developers, humanities researchers (anthropologists, ethnomusicologists, ethnolinguists) and specialists in speech processing and MIR. The first challenge was to initiate a common interest and a mutual understanding. In this process, DIADEMS gave us the opportunity to improve our understanding on the link between the semantics and acoustics of voice production. As a prelimirary work we attempted to first define vocal categories with a particular interest for liminal oral productions. At the border between speech and song, utterances such as psalmody or recitation are at the center of an old debate in ethnomusicology\footnote{A colloquium on liminal utterances between speech and song will be organized by the International Council for Traditional Music (ICTM) in May 2015 and hosted by the Centre of research in Ethnomusicology (CREM). A round table will be dedicated to the presentation of the main results and findings of the ANR project DIADEMS}. Gathering specialists from various fields, DIADEMS project goes well beyond the usual disciplinary boundaries. Our aim, through the study of a large range of audio components (pitch range, syllabic flow, metric, polyphonic and so on) is to define and characterize the variability of vocal productions, keeping in mind the semantic aspects. By doing so, we wish to reduce the traditional gap in academic studies between sounds and semantics and to propose combined analytical tools for the study of vocal production\footnote{As an example, research will be conducted on the recognition of "icons of crying"
in lamented utterances. As defined by Urban in \cite{Urban88}, "icons of crying" include cry break, voice inhalation, creaky voice and falsetto vowels.}.
-One of the goals of the DIADEMS project is also to provide useful tools for musical analysis such as detection of musical instrument families, analysis of musical content (tonal, metric and rythmic features), musical similarities and structure (chorus localisation, musical pattern replication).
+One of the goals of the DIADEMS project is also to provide useful tools for musical analysis such as detection of musical instrument families, analysis of musical content (tonal, metric and rhythmic features), musical similarities and structure (chorus localisation, musical pattern replication).
The study follows three steps :
\begin{enumerate}
\squeezeup\paragraph{Speech segmentation, with 2 features: 4 Hz modulation energy and entropy modulation}
Speech signal has a characteristic energy modulation peak around the 4 Hertz syllabic rate \cite{Houtgast1985}. In order to model this property, the signal is filtered with a FIR band pass filter, centered on 4 Hertz.
-Entropy modulation is dedicated to discriminate between speech and music~\cite{Pinquier2003}. We first evaluate the signal entropy ($H=-\sum_{i=1}^{k}p_i\cdot log_2(p_i)$, where $p_i$ denotes the probability of event~$i$). Entropy modulation values are usually larger for speech than for music. This measure is used to compute the entropy modulation on each segment.
+Entropy modulation is dedicated to discriminate between speech and music~\cite{Pinquier2003}. We first evaluate the signal entropy ($H=-\sum_{i=1}^{k}p_i\cdot \log_2(p_i)$, where $p_i$ denotes the probability of event~$i$). Entropy modulation values are usually larger for speech than for music. This measure is used to compute the entropy modulation on each segment.
\squeezeup\paragraph{Speech activity detection based on GMM models}
Speech activity detection is a prerequisite for several speech-related tasks to be integrated in the platform such as speech segmentation, speaker diarization and so on.
The proposed method is based on the supervised learning approach and uses a set of 164 acoustic descriptors
proposed by Peeters \textit{et al.} in \cite{timbre_toolbox}.
-This system (see Figure~\ref{fig:inst_classif_method}) applies the Inertia Ratio Maximization Features Space (IRMFSP) algorihtm~\cite{aes_irmfsp}
+This system (see Figure~\ref{fig:inst_classif_method}) applies the Inertia Ratio Maximization Features Space (IRMFSP) algorithm~\cite{aes_irmfsp}
on annotated samples at the training step to reduce the number of features (to avoid overfitting) while selecting the most discriminative ones. %% see Figure 6
\begin{figure}[htb]