This is a joint seminar offered by the professors of the International Audio Laboratories Erlangen. In our seminar, we offer topics in various areas of audio and acoustic signal processing. In close cooperation with a tutor, students can deepen their knowledge in signal processing and machine learning by studying individually assigned research papers. In addition, the seminar allows students to further develop their skills in scientific reading, writing, and presenting.
Note that this is a technically oriented seminar aiming at master students of engineering and computer science. The offered topics require a good mathematical background, a solid understanding of digital signal processing, as well as general knowledge and interest in the field of audio. Below, we provide further information on the available subgroups and examples for seminar topics and literature. Note that the specific topics may change and will be announced at the beginning of each semester.
to be defined
In this seminar group, we study research papers about audio algorithms in the context of emerging IoT applications. Students will use the role playing concept to study and present the assigned papers from the perspectives of different expert groups.
@inproceedings{nelus2021estimation, title={Estimation of Microphone Clusters in Acoustic Sensor Networks Using Unsupervised Federated Learning}, author={Alexandru Nelus, Rene Glitza, and Rainer Martin}, booktitle={Proceedings of the {IEEE} international Conference on Acoustics, Speech, and Signal Processing ({ICASSP})}, pages={761--765}, year={2021}, organization={IEEE}, doi = {10.1109/ICASSP39728.2021.9414186} }
@article{zhu2018admm, title={{ADMM}-Based Sensor Network Localization Using Low-Rank Approximation}, author={Yanping Zhu, Aimin Jiang and Hon Keung Kwan}, journal={{IEEE} Sensors Journal}, volume={18}, number={20}, pages={8463--8471}, year={2018}, publisher={IEEE}, doi = {10.1109/JSEN.2018.2866686} }
@inproceedings{subramani2021point, title={Point Cloud Audio Processing}, author={Krishna Subramani and Paris Smaragdis}, booktitle={Proceedings of the {IEEE} Workshop on Applications of Signal Processing to Audio and Acoustics ({WASPAA})}, pages={31--35}, year={2021}, organization={IEEE}, url-pdf = {https://arxiv.org/pdf/2105.02469.pdf} }
@inproceedings{kotti2021clock, title={Clock-Offset and Microphone Gain Mismatch Invariant Beamforming}, author={Sofia-Eirini Kotti, Richard Heusdens, and Richard C Hendriks}, booktitle={Proceedings of the 28th European Signal Processing Conference ({EUSIPCO})}, pages={176--180}, year={2021}, organization={IEEE}, doi = {10.23919/Eusipco47968.2020.9287852}, url-pdf = {https://pure.tudelft.nl/ws/portalfiles/portal/83830242/0000176.pdf} }
@article{hayashi2020conformer, title={Conformer-Based ID-Aware Autoencoder for Unsupervised Anomalous Sound Detection}, author={Tomoki Hayashi, Takenori Yoshimura, and Yusuke Adachi}, journal={DCASE2020 Challenge, Tech. Rep.}, year={2020}, url-pdf = {https://dcase.community/documents/challenge2020/technical_reports/DCASE2020_Hayashi_111_t2.pdf} }
@article{alsouda2018machine, title={A Machine Learning Driven IoT Solution for Noise Classification in Smart Cities}, author={Yasser Alsouda, Sabri Pllana, and Arianit Kurti}, journal={arXiv preprint arXiv:1809.00238}, year={2018}, url-pdf = {https://arxiv.org/pdf/1809.00238} }
@article{breebaart2019spatial, author={Breebaart, Jeroen and Cengarle, Giulio and Lu, Lie and Mateos, Toni and Purnhagen, Heiko and Tsingos, Nicolas}, journal={Journal of the Audio Engineering Society}, title={Spatial Coding of Complex Object-Based Program Material}, year={2019}, volume={67}, number={7/8}, pages={486-497}, doi={https://doi.org/10.17743/jaes.2018.0067}, month={July} }
@inproceedings{9054414, author={Kentgens, Maximilian and Behler, Andreas and Jax, Peter}, booktitle={Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing ({ICASSP})}, title={Translation of a Higher Order Ambisonics Sound Scene Based on Parametric Decomposition}, year={2020}, volume={}, number={}, pages={151-155}, doi={10.1109/ICASSP40776.2020.9054414} }
@inproceedings{kelly20213d, author={Kelly, Jack and Woszczyk, Wieslaw and King, Richard}, booktitle={Proceedings of the 151st Audio Engineering Society Convention}, title={3D Impulse Response Convolution With Multichannel Direct Sound: Assessing Perceptual Equivalency Between Room- and Source-Impression for Music Production}, year={2021}, month={October}, url-details={https://www.aes.org/e-lib/online/browse.cfm?elib=21496} }
@inproceedings{nishiguchi2019spatial, author={Nishiguchi, Masayuki and Kato, Kodai and Watanabe, Kanji and Abe, Koji and Takane, Shouichi}, booktitle={Proceedings of the 147th Audio Engineering Society Convention}, title={Spatial Auditory Masking for Three-Dimensional Audio Coding}, year={2019}, month={October}, url-details={https://www.aes.org/e-lib/browse.cfm?elib=20632} }
@inproceedings{vandepar2019temporal, author={van de Par, Steven and Disch, Sascha and Niedermeier, Andreas and Burdiel Pérez, Elena and Edler, Bernd}, booktitle={Proceedings of the 147th Audio Engineering Society Convention}, title={Temporal Envelope-Based Psychoacoustic Modelling for Evaluating Non-Waveform Preserving Audio Codecs}, year={2019}, month={October}, url-details={https://www.aes.org/e-lib/online/browse.cfm?elib=20686} }
Music processing and music information retrieval (MIR) are exciting and challenging research areas. Music is not only a ubiquitous and vital part of our lives but also relates to many different research disciplines including signal processing, information retrieval, machine learning, and musicology. In this subgroup, we study computational techniques for processing, searching, organizing, and accessing music-related data. This will require a good understanding of general concepts in signal processing, data science, and machine learning. Furthermore, basic knowledge in music theory and a strong interest in music are extremely helpful to get enthusiastic about the field of music processing. This subgroup particularly aims at students who have participated in the lecture Music Processing Analysis.
@article{DriedgerMueller16_ReviewTSM_AppliedSciences, author = {Jonathan Driedger and Meinard M{\"u}ller}, journal = {Applied Sciences}, title = {A Review on Time-Scale Modification of Music Signals}, year = {2016}, month = {February}, volume = {6}, number = {2}, pages = {57--82}, url-pdf = {aps-w23/papers/2016_DriedgerMueller_TSMOverview_AppliedSciences_PrintedVersion.pdf} }
@inproceedings{EwertM12_ScoreInformedNMF_ICASSP, author = {Sebastian Ewert and Meinard M{\"u}ller}, title = {Using Score-Informed Constraints for {NMF}-based Source Separation}, booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing ({ICASSP})}, address = {Kyoto, Japan}, year = {2012}, pages = {129--132}, url-pdf = {aps-w23/papers/2012_EwertMueller_ScoreConstrainedNMF_ICASSP.pdf}, url-details = {http://resources.mpi-inf.mpg.de/MIR/ICASSP2012-ScoreInformedNMF/} }
@inproceedings{Fitzgerald10_HarmPercSep_DAFX, author = {Derry FitzGerald}, title = {Harmonic/Percussive Separation Using Median Filtering}, booktitle = {Proceedings of the International Conference on Digital Audio Effects ({DAFx})}, address = {Graz, Austria}, year = {2010}, pages = {246--253}, month = {September}, url-pdf = {aps-w23/papers/2010_FitzGerald_HarmonicPercussiveSep_DAFx.pdf} }
@inproceedings{StollerDE19_LyricsAlignment_ICASSP, author = {Daniel Stoller and Simon Durand and Sebastian Ewert}, title = {End-To-End Lyrics Alignment for Polyphonic Music Using an Audio-To-Character Recognition Model}, booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing ({ICASSP})}, pages = {181--185}, address = {Brighton, {UK}}, year = {2019}, url-pdf = {aps-w23/papers/2019_StollerEwert_LyricsCTC_ICASSP_arXiv.pdf}, }
This subgroup particularly aims at students who attended the Virtual Acoustics or Speech Enhancement lecture.
@article{Antonello2017, author = {N. Antonello and E. De Sena and M. Moonen and P.A. Naylor and T. van Waterschoot}, journal = {IEEE/ACM Transactions on Audio, Speech and Language Processing}, title = {Room Impulse Response Interpolation Using a Sparse Spatio-Temporal Representation of the Sound Field}, year = {2017}, volume = {25}, number = {10}, pages = {1929--1941}, url-pdf = {aps-w23/papers/sap_Antonello2017.pdf} }
@article{Tervo2012, author = {S. Tervo and J. P\"{a}tynen and A. Kuusinen and T. Lokki,}, journal = {Journal of the Audio Engineering Society}, title = {Spatial Decomposition Method for Room Impulse Responses}, year = {2012}, volume = {61}, pages = {17--28}, url-pdf = {aps-w23/papers/sap_Tervo2012.pdf} }
@article{Chakrabarty2019, author = {S. Chakrabarty and E.A.P. Habets}, title = {Multi-Speaker DOA Estimation Using Deep Convolutional Networks Trained With Noise Signals}, journal = {IEEE Journal of Selected Topics in Signal Processing}, year = {2019}, month = {March}, volume = {13}, number = {1}, pages = {8--21}, url-pdf = {aps-w23/papers/sap_Chakrabarty2019.pdf} }
@article{Pulkki1997, author = {V. Pulkki}, journal = {Journal of the Audio Engineering Society}, title = {Virtual Sound Source Positioning Using Vector Base Amplitude Panning}, year = {1997}, month = {June}, volume = {45}, number = {6}, pages = {456--466}, url-pdf = {aps-w23/papers/sap_Pulkki1997.pdf} }
@inproceedings{Noisternig2003, author = {M. Noisternig and T. Musil and A. Sontacchi and R. Holdrich}, booktitle = {Proceedings of the International Symposium an Virtual Environments, Human-Computer Interfaces, and Measurement Systems}, title = {3D Binaural Sound Reproduction Using a Virtual Ambisonic Approach}, pages ={174-178}, year = {2003}, month = {July}, url-pdf = {aps-w23/papers/sap_Noisternig2003.pdf} }
@article{Adavanne2019, author = {S. Adavanne and A. Politis and J. Nikunen and T. Virtanen,}, journal = {IEEE Journal of Selected Topics in Signal Processing}, title = {Sound Event Localization and Detection of Overlapping Sources Using Convolutional Recurrent Neural Networks}, year = {2019}, month = {March}, volume = {13}, number = {1}, pages = {34--48}, url-pdf = {aps-w23/papers/sap_Adavanne2019.pdf} }
@article{Yoshioka2012, author={Yoshioka, T. and Nakatani, T.}, journal={IEEE Transactions on Audio, Speech, and Language Processing}, title={Generalization of Multi-Channel Linear Prediction Methods for Blind MIMO Impulse Response Shortening}, year={2012}, volume={20}, number={10}, pages={2707-2720}, url-pdf = {aps-w23/papers/sap_Yoshioka2012.pdf} }
The offered topics usually assume a broad range of prior knowledge. If terms like DFT, Spectral Analysis, Filterbanks, but also Deep Neural Networks are new to you, it is highly recommended to take a look into these topics before the seminar starts.
@inproceedings{schuh2016efficient, title = {Efficient {M}ultichannel {A}udio {T}ransform {C}oding with {L}ow {D}elay and {C}omplexity}, author = {Florian Schuh and Sascha Dick and Richard Füg and Christian R. Helmrich and Nikolaus Rettelbach and Tobias Schwegler}, booktitle = {Proceedings of the 141st Audio Engineering Society Convention}, month = {Sep}, year = {2016}, url-details = {http://www.aes.org/e-lib/browse.cfm?elib=18464} }
@inproceedings{NEURIPS2020_c5d73680, author = {Jungil Kong and Jaehyeon Kim and Jaekyoung Bae}, booktitle = {Advances in Neural Information Processing Systems}, editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin}, pages = {17022--17033}, publisher = {Curran Associates, Inc.}, title = {HiFi-GAN: Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis}, volume = {33}, year = {2020} }
@inproceedings{10.1145/1143844.1143891, author = {Alex Graves and Santiago Fern\'{a}ndez and Faustino Gomez and J\"{u}rgen Schmidhuber}, title = {Connectionist Temporal Classification: Labelling Unsegmented Sequence Data with Recurrent Neural Networks}, year = {2006}, isbn = {1595933832}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/1143844.1143891}, doi = {10.1145/1143844.1143891}, booktitle = {Proceedings of the 23rd International Conference on Machine Learning}, pages = {369–376}, numpages = {8}, location = {Pittsburgh, Pennsylvania, USA}, series = {ICML '06} }
@inproceedings{8683143, author={Ryan Prenger and Rafael Valle and Bryan Catanzaro}, booktitle={Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing ({ICASSP})}, title={Waveglow: A Flow-based Generative Network for Speech Synthesis}, year={2019}, volume={}, number={}, pages={3617-3621}, doi={10.1109/ICASSP.2019.8683143} }
@article{9444770, author={Ning Guo and Bernd Edler}, journal={IEEE Signal Processing Letters}, title={Frequency Domain Long-Term Prediction for Low Delay General Audio Coding}, year={2021}, volume={28}, number={}, pages={1185-1189}, doi={10.1109/LSP.2021.3084503} }
@article{10.2307/3680788, ISSN = {01489267, 15315169}, URL = {http://www.jstor.org/stable/3680788}, author = {Xavier Serra and Julius Smith}, journal = {Computer Music Journal}, number = {4}, pages = {12--24}, publisher = {The MIT Press}, title = {Spectral Modeling Synthesis: A Sound Analysis/Synthesis System Based on a Deterministic Plus Stochastic Decomposition}, urldate = {2022-08-16}, volume = {14}, year = {1990} }