AudioLabs - Publications

Publications

Gaznepoglu, Ünal Ege and Peters, Nils
Deep learning-based F0 synthesis for speaker anonymization
In Proc. EUSIPCO, 2023 (to appear).

@conference{gaznepoglu_deep_2023,
title = {Deep learning-based {F0} synthesis for speaker anonymization},
copyright = {All rights reserved},
booktitle = {Proc. EUSIPCO},
author = {Gaznepoglu, Ünal Ege and Peters, Nils},
month = oct,
year = {2023 (to appear)},
}

Sharma, Neeraj Kumar, Gaznepoglu, Ünal Ege, Robotham, Thomas, and Habets, Emanuël
Two congruent cues are better than one: Impact of ITD-ILD combinations on reaction time for sound lateralization
2023. DOI

@misc{sharma_congruent_2023,
title = {Two congruent cues are better than one: Impact of {ITD}-{ILD} combinations on reaction time for sound lateralization},
publisher = {Journal of the Acoustical Society of America Express Letters},
author = {Sharma, Neeraj Kumar and Gaznepoglu, Ünal Ege and Robotham, Thomas and Habets, Emanuël},
year = {2023},
url = {https://doi.org/10.1121/10.0017936},
doi = {10.1121/10.0017936},
}

Gaznepoglu, Ünal Ege, Leschanowsky, Anna, and Peters, Nils
VoicePrivacy 2022 System Description: Speaker Anonymization with Feature-matched F0 Trajectories
arXiv:2210.17338 cs, eess, also submitted to VoicePrivacy Challenge, 2022. DOI

@misc{gaznepoglu_voiceprivacy_2022,
title = {{VoicePrivacy} 2022 {System} {Description}: {Speaker} {Anonymization} with {Feature}-matched {F0} {Trajectories}},
copyright = {All rights reserved},
shorttitle = {{VoicePrivacy} 2022 {System} {Description}},
url = {http://arxiv.org/abs/2210.17338},
doi = {10.48550/arXiv.2210.17338},
abstract = {We introduce a novel method to improve the performance of the VoicePrivacy Challenge 2022 baseline B1 variants. Among the known deficiencies of x-vector-based anonymization systems is the insufficient disentangling of the input features. In particular, the fundamental frequency (F0) trajectories, which are used for voice synthesis without any modifications. Especially in cross-gender conversion, this situation causes unnatural sounding voices, increases word error rates (WERs), and personal information leakage. Our submission overcomes this problem by synthesizing an F0 trajectory, which better harmonizes with the anonymized x-vector. We utilized a low-complexity deep neural network to estimate an appropriate F0 value per frame, using the linguistic content from the bottleneck features (BN) and the anonymized x-vector. Our approach results in a significantly improved anonymization system and increased naturalness of the synthesized voice. Consequently, our results suggest that F0 extraction is not required for voice anonymization.},
urldate = {2022-11-02},
publisher = {arXiv},
author = {Gaznepoglu, Ünal Ege and Leschanowsky, Anna and Peters, Nils},
month = oct,
year = {2022},
note = {arXiv:2210.17338 [cs, eess], also submitted to VoicePrivacy Challenge},
keywords = {Computer Science - Cryptography and Security, Computer Science - Sound, Electrical Engineering and Systems Science - Audio and Speech Processing},
}

Gaznepoglu, Ünal Ege and Peters, Nils
Exploring the Importance of F0 Trajectories for Speaker Anonymization using X-vectors and Neural Waveform Models
In Workshop on Machine Learning in Speech and Language Processing (MLSLP), 2021. DOI

@inproceedings{gaznepoglu_exploring_2021,
address = {Virtual},
title = {Exploring the {Importance} of {F0} {Trajectories} for {Speaker} {Anonymization} using {X}-vectors and {Neural} {Waveform} {Models}},
copyright = {All rights reserved},
url = {https://arxiv.org/abs/2110.06887v1},
doi = {10.48550/arXiv.2110.06887},
abstract = {Voice conversion for speaker anonymization is an emerging field in speech processing research. Many state-of-the-art approaches are based on the resynthesis of the phoneme posteriorgrams (PPG), the fundamental frequency (F0) of the input signal together with modified X-vectors. Our research focuses on the role of F0 for speaker anonymization, which is an understudied area. Utilizing the VoicePrivacy Challenge 2020 framework and its datasets we developed and evaluated eight low-complexity F0 modifications prior resynthesis. We found that modifying the F0 can improve speaker anonymization by as much as 8\% with minor word-error rate degradation.},
language = {en},
urldate = {2022-03-17},
booktitle = {Workshop on Machine Learning in Speech and Language Processing (MLSLP)},
author = {Gaznepoglu, Ünal Ege and Peters, Nils},
month = oct,
year = {2021},
}

All publications as Bibtex

International Audio Laboratories Erlangen

Publications