@inproceedings{gaznepoglu_why_2025,
title = {Why disentanglement-based speaker anonymization systems fail at preserving emotions?},
copyright = {All rights reserved},
booktitle = {Proc. {IEEE} Intl. Conf. on Acoustics, Speech and Signal Processing ({ICASSP})},
author = {Gaznepoglu, Ünal Ege and Peters, Nils},
month = apr,
year = {2025},
}
@inproceedings{gaznepoglu_you_2025,
title = {You Are What You Say: Exploiting Linguistic Content for {VoicePrivacy} Attacks},
booktitle = {Proc. Interspeech Conf.},
author = {Gaznepoglu, Ünal Ege and Leschanowsky, Anna and Aloradi, Ahmad and Singh, Prachi and Tenbrinck, Daniel and Habets, Emanuël A. P. and Peters, Nils},
month = aug,
year = {2025},
}
@article{sharma_congruent_2023,
title = {Two congruent cues are better than one: Impact of {ITD}-{ILD} combinations on reaction time for sound lateralization},
journal = {Journal of the Acoustical Society of America Express Letters},
author = {Sharma, Neeraj Kumar and Gaznepoglu, Ünal Ege and Robotham, Thomas and Habets, Emanuël},
year = {2023},
url = {https://doi.org/10.1121/10.0017936},
doi = {10.1121/10.0017936},
}
@inproceedings{gaznepoglu_evaluation_2023,
title = {Evaluation of the Speech Resynthesis Capabilities of the {VoicePrivacy} Baseline B1},
url = {https://www.isca-speech.org/archive/spsc_2023/gaznepoglu23_spsc.html},
doi = {10.21437/SPSC.2023-10},
abstract = {Speaker anonymization systems continue to improve their ability to obfuscate the original speaker characteristics in a speech signal, but often create processing artifacts and unnatural sounding voices as a tradeoff. Many of those systems stem from the {VoicePrivacy} Challenge ({VPC}) Baseline B1, using a neural vocoder to synthesize speech from an F0, x-vectors and bottleneck features-based speech representation. Inspired by this, we investigate the reproduction capabilities of the aforementioned baseline, to assess how successful the shared methodology is in synthesizing human-like speech. We use four objective metrics to measure speech quality, waveform similarity, and F0 similarity. Our findings indicate that both the speech representation and the vocoder introduces artifacts, causing an unnatural perception. A {MUSHRA}-like listening test on 18 subjects corroborate our findings, motivating further research on the analysis and synthesis components of the {VPC} Baseline B1.},
eventtitle = {3rd Symposium on Security and Privacy in Speech Communication},
pages = {60--64},
booktitle = {Proc. 3rd Symp. on Security and Privacy in Speech Communication},
publisher = {{ISCA}},
author = {Gaznepoglu, Ünal Ege and Peters, Nils},
year = {2023},
urldate = {2023-09-20},
date = {2023-08-19},
langid = {english},
note = {00000},
}
@inproceedings{gaznepoglu_deep_2023,
title = {Deep learning-based {F0} synthesis for speaker anonymization},
copyright = {All rights reserved},
booktitle = {Proc. European Signal Proc. Conf. (EUSIPCO)},
author = {Gaznepoglu, Ünal Ege and Peters, Nils},
month = sep,
year = {2023},
}
@misc{gaznepoglu_voiceprivacy_2022,
title = {{VoicePrivacy} 2022 {System} {Description}: {Speaker} {Anonymization} with {Feature}-matched {F0} {Trajectories}},
copyright = {All rights reserved},
shorttitle = {{VoicePrivacy} 2022 {System} {Description}},
url = {http://arxiv.org/abs/2210.17338},
doi = {10.48550/arXiv.2210.17338},
abstract = {We introduce a novel method to improve the performance of the VoicePrivacy Challenge 2022 baseline B1 variants. Among the known deficiencies of x-vector-based anonymization systems is the insufficient disentangling of the input features. In particular, the fundamental frequency (F0) trajectories, which are used for voice synthesis without any modifications. Especially in cross-gender conversion, this situation causes unnatural sounding voices, increases word error rates (WERs), and personal information leakage. Our submission overcomes this problem by synthesizing an F0 trajectory, which better harmonizes with the anonymized x-vector. We utilized a low-complexity deep neural network to estimate an appropriate F0 value per frame, using the linguistic content from the bottleneck features (BN) and the anonymized x-vector. Our approach results in a significantly improved anonymization system and increased naturalness of the synthesized voice. Consequently, our results suggest that F0 extraction is not required for voice anonymization.},
urldate = {2022-11-02},
publisher = {arXiv},
author = {Gaznepoglu, Ünal Ege and Leschanowsky, Anna and Peters, Nils},
month = oct,
year = {2022},
note = {arXiv:2210.17338 [cs, eess], also submitted to VoicePrivacy Challenge},
keywords = {Computer Science - Cryptography and Security, Computer Science - Sound, Electrical Engineering and Systems Science - Audio and Speech Processing},
}
@inproceedings{gaznepoglu_exploring_2021,
address = {Virtual},
title = {Exploring the {Importance} of {F0} {Trajectories} for {Speaker} {Anonymization} using {X}-vectors and {Neural} {Waveform} {Models}},
copyright = {All rights reserved},
url = {https://arxiv.org/abs/2110.06887v1},
doi = {10.48550/arXiv.2110.06887},
abstract = {Voice conversion for speaker anonymization is an emerging field in speech processing research. Many state-of-the-art approaches are based on the resynthesis of the phoneme posteriorgrams (PPG), the fundamental frequency (F0) of the input signal together with modified X-vectors. Our research focuses on the role of F0 for speaker anonymization, which is an understudied area. Utilizing the VoicePrivacy Challenge 2020 framework and its datasets we developed and evaluated eight low-complexity F0 modifications prior resynthesis. We found that modifying the F0 can improve speaker anonymization by as much as 8\% with minor word-error rate degradation.},
language = {en},
urldate = {2022-03-17},
booktitle = {Workshop on Machine Learning in Speech and Language Processing (MLSLP)},
author = {Gaznepoglu, Ünal Ege and Peters, Nils},
month = oct,
year = {2021},
}