This is the accompanying website for the following paper:
@inproceedings{StrahlM24_PianoTranscriptionSemiSup_ISMIR, author = {Sebastian Strahl and Meinard M{\"u}ller}, title = {Semi-Supervised Piano Transcription Using Pseudo-Labeling Techniques}, booktitle = {Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})}, address = {San Francisco, CA, United States}, year = {2024} url-details = {https://www.audiolabs-erlangen.de/resources/MIR/2024-ISMIR-WeaklySupervisedMPE} }
Automatic piano transcription (APT) transforms piano recordings into symbolic note events. In recent years, APT has relied on supervised deep learning, which demands a large amount of labeled data that is often limited. This paper introduces a semi-supervised approach to APT, leveraging unlabeled data with techniques originally introduced in computer vision (CV): pseudo-labeling, consistency regularization, and distribution matching. The idea of pseudo-labeling is to use the current model for producing artificial labels for unlabeled data, and consistency regularization makes the model's predictions for unlabeled data robust to augmentations. Finally, distribution matching ensures that the pseudo-labels follow the same marginal distribution as the reference labels, adding an extra layer of robustness. Our method, tested on three piano datasets, shows improvements over purely supervised methods and performs comparably to existing semi-supervised approaches. Conceptually, this work illustrates that semi-supervised learning techniques from CV can be effectively transferred to the music domain, considerably reducing the dependence on large annotated datasets.
This work was funded by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) under Grant No.\,350953655 (MU 2686/11-2) and Grant No.\,500643750 (MU 2686/15-1). The authors are with the International Audio Laboratories Erlangen, a joint institution of the Friedrich-Alexander-Universität Erlangen-Nürnberg (FAU) and Fraunhofer Institute for Integrated Circuits IIS.
@inproceedings{HawthorneESRSRE18_OnsetsFrames_ISMIR, author = {Curtis Hawthorne and Erich Elsen and Jialin Song and Adam Roberts and Ian Simon and Colin Raffel and Jesse H. Engel and Sageev Oore and Douglas Eck}, title = {Onsets and Frames: {D}ual-Objective Piano Transcription}, booktitle = {Proceedings of the International Society for Music Information Retrieval Conference, ({ISMIR})}, pages = {50--57}, address = {Paris, France}, doi = {10.5281/zenodo.1492341}, year = {2018} }
@inproceedings{SohnBCZZRCKL20_FixMatch_NIPS, author = {Kihyuk Sohn and David Berthelot and Nicholas Carlini and Zizhao Zhang and Han Zhang and Colin Raffel and Ekin Dogus Cubuk and Alexey Kurakin and Chun{-}Liang Li}, title = {{FixMatch}: {S}implifying Semi-Supervised Learning with Consistency and Confidence}, booktitle = {Advances in Neural Information Processing Systems ({NeurIPS})}, year = {2020}, address = {Virtual}, }
@inproceedings{CheukHS21_SemiSupervisedAMT_ACMMM, author = {Kin Wai Cheuk and Dorien Herremans and Li Su}, OPTeditor = {Heng Tao Shen and Yueting Zhuang and John R. Smith and Yang Yang and Pablo Cesar and Florian Metze and Balakrishnan Prabhakaran}, title = {ReconVAT: {A} Semi-Supervised Automatic Music Transcription Framework for Low-Resource Real-World Data}, booktitle = {Proceedings of the {ACM} Multimedia Conference}, pages = {3918--3926}, address = {Virtual Event, China}, year = {2021}, doi = {10.1145/3474085.3475405} }
@inproceedings{MamanBermano22_UnalignedAMT_ICML, title = {Unaligned Supervision for Automatic Music Transcription in The Wild}, author = {Ben Maman and Amit H. Bermano}, booktitle = {Proceedings of the International Conference on Machine Learning ({ICML})}, pages = {14918--14934}, address = {Baltimore, Maryland, USA}, year = {2022}, }