In this work we present a new scenario of analyzing and separating linear mixtures of musical instrument signals. When instruments are playing in unison, traditional source separation methods are not performing well. Although the sources share the same pitch, they often still differ in their modulation frequency caused by vibrato and/or tremolo effects. In this paper we propose source separation schemes that exploit AM/FM characteristics to improve the separation quality of such mixtures. We show a method to process mixtures based on differences in their amplitude modulation frequency of the sources by using non-negative tensor factorization. Further, we propose an informed warped time domain approach for separating mixtures based on variations in the instantaneous frequencies of the sources.
Instrument | Vibrato | General Midi ID |
---|---|---|
Violin | yes | 40 |
Viola | yes | 41 |
Violon Cello | yes | 42 |
Trumpet | no | 56 |
Horn | no | 60 |
Bariton Sax | yes | 67 |
Oboe | no | 68 |
Clarinet | no | 71 |
Flute | yes | 73 |
Evaluation Measure / Algorithm | NMF | PVI-NMF | MOD-NTF |
---|---|---|---|
Overall Perceptual Score (OPS) | 15.76 | 17.64 | 17.35 |
Target-related Perceptual Score (TPS) | 30.17 | 32.80 | 34.03 |
Interference-related Perceptual Score (IPS) | 26.07 | 27.03 | 22.73 |
Artifacts-related Perceptual Score (APS) | 46.14 | 54.74 | 46.06 |
Signal to Distortion Ratio (SDRi) | 2.96 | 2.54 | 2.20 |
Source to Interference Ratio (SIRi) | 2.31 | 1.80 | 3.13 |
Sources to Artifacts Ratio (SARi) | 22.87 | 23.35 | 26.09 |
NMF and NTF Factorisations are processed into k=4 components.
@inproceedings{barker2013non, Author = {Barker, Tom and Virtanen, Tuomas}, Booktitle = {Proceedings of INTERSPEECH}, Date-Added = {2014-03-09 16:57:55 +0000}, Date-Modified = {2014-03-09 16:57:55 +0000}, Title = {Non-negative tensor factorisation of modulation spectrograms for monaural sound source separation}, Year = {2013}}
@inproceedings{stoter2013human, Author = {St{\"o}ter, Fabian-Robert and Schoeffler, Michael and Edler, Bernd and Herre, J{\"u}rgen}, Booktitle = {Proceedings of Meetings on Acoustics}, Date-Added = {2014-03-10 17:59:04 +0000}, Date-Modified = {2014-03-10 17:59:04 +0000}, Number = {1}, Organization = {Acoustical Society of America}, Title = {Human ability of counting the number of instruments in polyphonic music}, Volume = {19}, Year = {2013}}
@inproceedings{schoeffler2013experiment, Author = {Schoeffler, Michael and St{\"o}ter, Fabian-Robert and Bayerlein, Harald and Edler, Bernd and Herre, J{\"u}rgen}, Booktitle = {Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})}, Date-Added = {2014-03-10 18:01:10 +0000}, Date-Modified = {2014-03-12 21:02:06 +0000}, Title = {An experiment about estimating the number of instruments in polyphonic music: a comparison between internet and laboratory results}, Year = {2013}}
@inproceedings{yoshiibeyond, Author = {Yoshii, Kazuyoshi and Tomioka, Ryota and Mochihashi, Daichi and Goto, Masataka}, Booktitle = {Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})}, Date-Added = {2014-03-12 20:59:15 +0000}, Date-Modified = {2014-03-12 21:01:31 +0000}, Title = {Beyond {NMF}: Time-Domain Audio Source Separation without Phase Reconstruction}, Year = {2013}}
@inproceedings{lintimbre, Author = {Lin, Yiju and Chang, Wei-Chen and Wang, Tien-Ming and Su, Alvin WY and Liao, Wei-Hsiang}, Booktitle = {Proceedings of the 16th International Conference on Digital Audio Effects (DAFx)}, Date-Added = {2014-03-13 22:21:42 +0000}, Date-Modified = {2014-03-13 22:21:54 +0000}, Pages = {2--6}, Title = {Timbre-Constrained Recursive Time-Varying Analysis for Musical Note Separation}, Year = {2013}}
@techreport{ozerov2013weighted, Author = {Ozerov, Alexey and Duong, Ngoc Q. K. and Chevallier, Louis}, Date-Added = {2014-03-16 20:40:50 +0000}, Date-Modified = {2014-03-16 20:40:50 +0000}, Title = {Weighted nonnegative tensor factorization: on monotonicity of multiplicative update rules and application to user-guided audio source separation}, Year = {2013}}
@inproceedings{sisec2013, Author = {Ono, Nobutaka and Koldovsky, Zbynek and Miyabe, Shigeki and Ito, Nobutaka}, Booktitle = {Proceedings of the {IEEE} International Workshop on Machine Learning for Signal Processing ({MLSP})}, Date-Added = {2014-03-16 21:11:31 +0000}, Date-Modified = {2014-03-16 21:11:31 +0000}, Pages = {1--6}, Title = {The 2013 Signal Separation Evaluation Campaign}, Year = {2013}}
@inproceedings{cano2013, Author = {Cano, Estefan{\'\i}a and Dittmar, Christian and Schuller, Gerald}, Booktitle = {Proceedings of the 16th Int. Conference on Digital Audio Effects ({DAFx})}, Date-Added = {2014-03-16 23:47:22 +0000}, Date-Modified = {2014-03-16 23:48:13 +0000}, Title = {RE-THINKING SOUND SEPARATION: PRIOR INFORMATION AND ADDITIVITY CONSTRAINT IN SEPARATION ALGORITHMS}, Year = {2013}}
@article{salamon2012melody, Author = {Salamon, Justin and G{\'o}mez, Emilia}, Date-Added = {2014-03-09 16:25:52 +0000}, Date-Modified = {2014-03-09 16:25:52 +0000}, Journal = {{IEEE} Transactions on Audio, Speech, and Language Processing}, Number = {6}, Pages = {1759--1770}, Publisher = {IEEE}, Title = {Melody extraction from polyphonic music signals using pitch contour characteristics}, Volume = {20}, Year = {2012}}
@inproceedings{ewert2012using, Author = {Ewert, Sebastian and M{\"u}ller, Meinard}, Booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing ({ICASSP})}, Date-Added = {2014-03-27 10:23:34 +0000}, Date-Modified = {2014-03-27 10:23:34 +0000}, Pages = {129--132}, Title = {Using score-informed constraints for {NMF}-based source separation}, Year = {2012}}
@article{ozerov2012general, Author = {Ozerov, Alexey and Vincent, Emmanuel and Bimbot, Fr{\'e}d{\'e}ric}, Date-Added = {2014-03-27 10:44:11 +0000}, Date-Modified = {2014-03-27 10:44:11 +0000}, Journal = {{IEEE} Transactions on Audio, Speech, and Language Processing}, Number = {4}, Pages = {1118--1133}, Publisher = {IEEE}, Title = {A general flexible framework for the handling of prior information in audio source separation}, Volume = {20}, Year = {2012}}
@article{vincent2012signal, Author = {Vincent, Emmanuel and Araki, Shoko and Theis, Fabian and Nolte, Guido and Bofill, Pau and Sawada, Hiroshi and Ozerov, Alexey and Gowreesunker, Vikrham and Lutter, Dominik and Duong, Ngoc Q. K.}, Date-Added = {2014-03-27 10:45:51 +0000}, Date-Modified = {2014-03-27 10:45:51 +0000}, Journal = {Signal Processing}, Number = {8}, Pages = {1928--1936}, Publisher = {Elsevier}, Title = {The signal separation evaluation campaign (2007--2010): Achievements and remaining challenges}, Volume = {92}, Year = {2012}}
@misc{genesis, Date-Added = {2014-03-27 12:18:35 +0000}, Date-Modified = {2014-03-27 12:18:54 +0000}, Title = {{GENESIS S.A.}: Loudness Toolbox (Version 1.2)}, Year = {2012}}
@article{hennequin2011nmf, Author = {Hennequin, Romain and Badeau, Roland and David, Bertrand}, Date-Added = {2014-03-15 22:13:50 +0000}, Date-Modified = {2014-03-15 22:13:50 +0000}, Journal = {{IEEE} Transactions on Audio, Speech, and Language Processing}, Number = {4}, Pages = {744--753}, Publisher = {IEEE}, Title = {{NMF} with time--frequency activations to model nonstationary audio events}, Volume = {19}, Year = {2011}}
@article{emiya2011subjective, Author = {Emiya, Valentin and Vincent, Emmanuel and Harlander, Niklas and Hohmann, Volker}, Date-Added = {2014-03-16 21:00:31 +0000}, Date-Modified = {2014-03-16 21:00:31 +0000}, Journal = {{IEEE} Transactions on Audio, Speech, and Language Processing}, Number = {7}, Pages = {2046--2057}, Publisher = {IEEE}, Title = {Subjective and objective quality assessment of audio source separation}, Volume = {19}, Year = {2011}}
@incollection{nakano2010nonnegative, Author = {Nakano, Masahiro and Le Roux, Jonathan and Kameoka, Hirokazu and Kitano, Yu and Ono, Nobutaka and Sagayama, Shigeki}, Booktitle = {Latent Variable Analysis and Signal Separation}, Date-Added = {2014-03-15 22:26:19 +0000}, Date-Modified = {2014-03-15 22:26:19 +0000}, Pages = {149--156}, Publisher = {Springer}, Title = {Nonnegative matrix factorization with Markov-chained bases for modeling time-varying patterns in music spectrograms}, Year = {2010}}
@inproceedings{hennequin2010time, Author = {Hennequin, Romain and Badeau, Roland and David, Bertrand}, Booktitle = {Proceedings of the International Conference on Digital Audio Effects (DAFx)}, Date-Added = {2014-03-22 18:49:03 +0000}, Date-Modified = {2014-03-22 18:49:03 +0000}, Pages = {246--253}, Title = {Time-dependent parametric and harmonic templates in non-negative matrix factorization}, Year = {2010}}
@inproceedings{edler2009, Address = {Munich, Germany}, Author = {Edler, Bernd and Disch, Sascha and Bayer, Stefan and Guillaume, Fuchs and Geiger, Ralf}, Booktitle = {126th AES Convention}, Date-Modified = {2014-03-26 01:54:18 +0000}, Month = {May}, Note = {Preprint 7710}, Title = {{A Time-Warped MDCT Approach to Speech Transform Coding}}, Year = {2009}}
@inproceedings{backstrom2009pitch, Author = {B{\"a}ckstr{\"o}m, Tom and Bayer, Stefan and Disch, Sascha}, Booktitle = {Proceedings of INTERSPEECH}, Date-Added = {2014-03-26 01:45:25 +0000}, Date-Modified = {2014-03-26 01:45:25 +0000}, Pages = {2595--2598}, Title = {Pitch variation estimation}, Year = {2009}}
@article{li2009monaural, Author = {Li, Yipeng and Woodruff, John and Wang, DeLiang}, Date-Added = {2014-03-27 15:04:28 +0000}, Date-Modified = {2014-03-27 15:04:28 +0000}, Journal = {Audio, Speech, and Language Processing, IEEE Transactions on}, Number = {7}, Pages = {1361--1371}, Publisher = {IEEE}, Title = {Monaural musical sound separation based on pitch and common amplitude modulation}, Volume = {17}, Year = {2009}}
@inproceedings{smaragdis2008sparse, Author = {Smaragdis, Paris and Raj, Bhiksha and Shashanka, Madhusudana VS}, Booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing ({ICASSP})}, Date-Added = {2014-03-23 21:36:55 +0000}, Date-Modified = {2014-03-23 21:36:55 +0000}, Pages = {2069--2072}, Title = {Sparse and shift-invariant feature extraction from non-negative data.}, Year = {2008}}
@article{virtanen2007monaural, Author = {Virtanen, Tuomas}, Date-Added = {2014-03-09 16:57:56 +0000}, Date-Modified = {2014-03-09 16:57:56 +0000}, Journal = {{IEEE} Transactions on Audio, Speech, and Language Processing}, Number = {3}, Pages = {1066--1074}, Publisher = {IEEE}, Title = {Monaural sound source separation by nonnegative matrix factorization with temporal continuity and sparseness criteria}, Volume = {15}, Year = {2007}}
@article{macleod2006influences, Author = {MacLeod, Rebecca Bowman}, Date-Added = {2014-03-15 23:18:06 +0000}, Date-Modified = {2014-03-15 23:18:06 +0000}, Title = {Influences of dynamic level and pitch height on the vibrato rates and widths of violin and viola players}, Year = {2006}}
@article{vincent2006performance, Author = {Vincent, Emmanuel and Gribonval, R{\'e}mi and F{\'e}votte, C{\'e}dric}, Date-Added = {2014-03-21 20:18:56 +0000}, Date-Modified = {2014-03-21 20:18:56 +0000}, Journal = {{IEEE} Transactions on Audio, Speech, and Language Processing}, Number = {4}, Pages = {1462--1469}, Publisher = {IEEE}, Title = {Performance measurement in blind audio source separation}, Volume = {14}, Year = {2006}}
@inproceedings{smaragdis2003non, Author = {Smaragdis, Paris and Brown, Judith C}, Booktitle = {Proceedings of the {IEEE} Workshop on Applications of Signal Processing to Audio and Acoustics ({WASPAA})}, Date-Added = {2014-03-15 21:47:01 +0000}, Date-Modified = {2014-03-15 21:47:01 +0000}, Pages = {177--180}, Title = {Non-negative matrix factorization for polyphonic music transcription}, Year = {2003}}
@inproceedings{wang1995instantaneous, Author = {Wang, Avery}, Booktitle = {Proceedings of the {IEEE} Workshop on Applications of Signal Processing to Audio and Acoustics ({ASSP})}, Date-Added = {2014-03-09 16:27:11 +0000}, Date-Modified = {2014-03-09 16:27:11 +0000}, Pages = {47--50}, Title = {Instantaneous and frequency-warped techniques for source separation and signal parametrization}, Year = {1995}}
@article{huron89, Author = {Huron, D.}, Date-Added = {2014-03-10 18:06:49 +0000}, Date-Modified = {2014-03-10 18:06:49 +0000}, Journal = {Music Perception}, Pages = {361--382}, Publisher = {JSTOR}, Title = {Voice Denumerability in Polyphonic Music of Homogeneous Timbres}, Year = {1989}}