Publications

  1. Gaznepoglu, Ünal Ege and Peters, Nils
    Why disentanglement-based speaker anonymization systems fail at preserving emotions?
    In Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP), 2025.
    @inproceedings{gaznepoglu_why_2025,
    title = {Why disentanglement-based speaker anonymization systems fail at preserving emotions?},
    copyright = {All rights reserved},
    booktitle = {Proc. {IEEE} Intl. Conf. on Acoustics, Speech and Signal Processing ({ICASSP})},
    author = {Gaznepoglu, Ünal Ege and Peters, Nils},
    month = apr,
    year = {2025},
    }
  2. Gaznepoglu, Ünal Ege, Leschanowsky, Anna, Aloradi, Ahmad, Singh, Prachi, Tenbrinck, Daniel, Habets, Emanuël A. P., and Peters, Nils
    You Are What You Say: Exploiting Linguistic Content for VoicePrivacy Attacks
    In Proc. Interspeech Conf., 2025.
    @inproceedings{gaznepoglu_you_2025,
    title = {You Are What You Say: Exploiting Linguistic Content for {VoicePrivacy} Attacks},
    booktitle = {Proc. Interspeech Conf.},
    author = {Gaznepoglu, Ünal Ege and Leschanowsky, Anna and Aloradi, Ahmad and Singh, Prachi and Tenbrinck, Daniel and Habets, Emanuël A. P. and Peters, Nils},
    month = aug,
    year = {2025},
    }
  3. Sharma, Neeraj Kumar, Gaznepoglu, Ünal Ege, Robotham, Thomas, and Habets, Emanuël
    Two congruent cues are better than one: Impact of ITD-ILD combinations on reaction time for sound lateralization
    Journal of the Acoustical Society of America Express Letters, 2023. DOI
    @article{sharma_congruent_2023,
    title = {Two congruent cues are better than one: Impact of {ITD}-{ILD} combinations on reaction time for sound lateralization},
    journal = {Journal of the Acoustical Society of America Express Letters},
    author = {Sharma, Neeraj Kumar and Gaznepoglu, Ünal Ege and Robotham, Thomas and Habets, Emanuël},
    year = {2023},
    url = {https://doi.org/10.1121/10.0017936},
    doi = {10.1121/10.0017936},
    }
  4. Gaznepoglu, Ünal Ege and Peters, Nils
    Evaluation of the Speech Resynthesis Capabilities of the VoicePrivacy Baseline B1
    In Proc. 3rd Symp. on Security and Privacy in Speech Communication: 60–64, 2023. DOI
    @inproceedings{gaznepoglu_evaluation_2023,
    title = {Evaluation of the Speech Resynthesis Capabilities of the {VoicePrivacy} Baseline B1},
    url = {https://www.isca-speech.org/archive/spsc_2023/gaznepoglu23_spsc.html},
    doi = {10.21437/SPSC.2023-10},
    abstract = {Speaker anonymization systems continue to improve their ability to obfuscate the original speaker characteristics in a speech signal, but often create processing artifacts and unnatural sounding voices as a tradeoff. Many of those systems stem from the {VoicePrivacy} Challenge ({VPC}) Baseline B1, using a neural vocoder to synthesize speech from an F0, x-vectors and bottleneck features-based speech representation. Inspired by this, we investigate the reproduction capabilities of the aforementioned baseline, to assess how successful the shared methodology is in synthesizing human-like speech. We use four objective metrics to measure speech quality, waveform similarity, and F0 similarity. Our findings indicate that both the speech representation and the vocoder introduces artifacts, causing an unnatural perception. A {MUSHRA}-like listening test on 18 subjects corroborate our findings, motivating further research on the analysis and synthesis components of the {VPC} Baseline B1.},
    eventtitle = {3rd Symposium on Security and Privacy in Speech Communication},
    pages = {60--64},
    booktitle = {Proc. 3rd Symp. on Security and Privacy in Speech Communication},
    publisher = {{ISCA}},
    author = {Gaznepoglu, Ünal Ege and Peters, Nils},
    year = {2023},
    urldate = {2023-09-20},
    date = {2023-08-19},
    langid = {english},
    note = {00000},
    }
  5. Gaznepoglu, Ünal Ege and Peters, Nils
    Deep learning-based F0 synthesis for speaker anonymization
    In Proc. European Signal Proc. Conf. (EUSIPCO), 2023.
    @inproceedings{gaznepoglu_deep_2023,
    title = {Deep learning-based {F0} synthesis for speaker anonymization},
    copyright = {All rights reserved},
    booktitle = {Proc. European Signal Proc. Conf. (EUSIPCO)},
    author = {Gaznepoglu, Ünal Ege and Peters, Nils},
    month = sep,
    year = {2023},
    }
  6. Gaznepoglu, Ünal Ege, Leschanowsky, Anna, and Peters, Nils
    VoicePrivacy 2022 System Description: Speaker Anonymization with Feature-matched F0 Trajectories
    arXiv:2210.17338 cs, eess, also submitted to VoicePrivacy Challenge, 2022. DOI
    @misc{gaznepoglu_voiceprivacy_2022,
    title = {{VoicePrivacy} 2022 {System} {Description}: {Speaker} {Anonymization} with {Feature}-matched {F0} {Trajectories}},
    copyright = {All rights reserved},
    shorttitle = {{VoicePrivacy} 2022 {System} {Description}},
    url = {http://arxiv.org/abs/2210.17338},
    doi = {10.48550/arXiv.2210.17338},
    abstract = {We introduce a novel method to improve the performance of the VoicePrivacy Challenge 2022 baseline B1 variants. Among the known deficiencies of x-vector-based anonymization systems is the insufficient disentangling of the input features. In particular, the fundamental frequency (F0) trajectories, which are used for voice synthesis without any modifications. Especially in cross-gender conversion, this situation causes unnatural sounding voices, increases word error rates (WERs), and personal information leakage. Our submission overcomes this problem by synthesizing an F0 trajectory, which better harmonizes with the anonymized x-vector. We utilized a low-complexity deep neural network to estimate an appropriate F0 value per frame, using the linguistic content from the bottleneck features (BN) and the anonymized x-vector. Our approach results in a significantly improved anonymization system and increased naturalness of the synthesized voice. Consequently, our results suggest that F0 extraction is not required for voice anonymization.},
    urldate = {2022-11-02},
    publisher = {arXiv},
    author = {Gaznepoglu, Ünal Ege and Leschanowsky, Anna and Peters, Nils},
    month = oct,
    year = {2022},
    note = {arXiv:2210.17338 [cs, eess], also submitted to VoicePrivacy Challenge},
    keywords = {Computer Science - Cryptography and Security, Computer Science - Sound, Electrical Engineering and Systems Science - Audio and Speech Processing},
    }
  7. Gaznepoglu, Ünal Ege and Peters, Nils
    Exploring the Importance of F0 Trajectories for Speaker Anonymization using X-vectors and Neural Waveform Models
    In Workshop on Machine Learning in Speech and Language Processing (MLSLP), 2021. DOI
    @inproceedings{gaznepoglu_exploring_2021,
    address = {Virtual},
    title = {Exploring the {Importance} of {F0} {Trajectories} for {Speaker} {Anonymization} using {X}-vectors and {Neural} {Waveform} {Models}},
    copyright = {All rights reserved},
    url = {https://arxiv.org/abs/2110.06887v1},
    doi = {10.48550/arXiv.2110.06887},
    abstract = {Voice conversion for speaker anonymization is an emerging field in speech processing research. Many state-of-the-art approaches are based on the resynthesis of the phoneme posteriorgrams (PPG), the fundamental frequency (F0) of the input signal together with modified X-vectors. Our research focuses on the role of F0 for speaker anonymization, which is an understudied area. Utilizing the VoicePrivacy Challenge 2020 framework and its datasets we developed and evaluated eight low-complexity F0 modifications prior resynthesis. We found that modifying the F0 can improve speaker anonymization by as much as 8\% with minor word-error rate degradation.},
    language = {en},
    urldate = {2022-03-17},
    booktitle = {Workshop on Machine Learning in Speech and Language Processing (MLSLP)},
    author = {Gaznepoglu, Ünal Ege and Peters, Nils},
    month = oct,
    year = {2021},
    }
All publications as Bibtex