header

Evaluating the Influence of Phoneme-Dependent Dynamic Speaker Directivity of Embodied Conversational Agents’ Speech


Jonathan Ehret, Jonas Stienen, Chris Brozdowski, Andrea Bönsch, Irene Mittelberg, Michael Vorländer, Torsten Wolfgang Kuhlen
20th ACM International Conference on Intelligent Virtual Agents 2020 (IVA'20)
pubimg

Generating natural embodied conversational agents within virtual spaces crucially depends on speech sounds and their directionality. In this work, we simulated directional filters to not only add directionality, but also directionally adapt each phoneme. We therefore mimic reality where changing mouth shapes have an influence on the directional propagation of sound. We conducted a study (n = 32) evaluating naturalism ratings, preference and distinguishability of omnidirectional speech auralization compared to static and dynamic, phoneme-dependent directivities. The results indicated that participants cannot distinguish dynamic from static directivity. Furthermore, participants’ preference ratings aligned with their naturalism ratings. There was no unanimity, however, with regards to which auralization is the most natural.

» Show BibTeX

@inproceedings{10.1145/3383652.3423863,
author = {Ehret, Jonathan and Stienen, Jonas and Brozdowski, Chris and B\"{o}nsch, Andrea and Mittelberg, Irene and Vorl\"{a}nder, Michael and Kuhlen, Torsten W.},
title = {Evaluating the Influence of Phoneme-Dependent Dynamic Speaker Directivity of Embodied Conversational Agents' Speech},
year = {2020},
isbn = {9781450375863},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3383652.3423863},
doi = {10.1145/3383652.3423863},
abstract = {Generating natural embodied conversational agents within virtual spaces crucially depends on speech sounds and their directionality. In this work, we simulated directional filters to not only add directionality, but also directionally adapt each phoneme. We therefore mimic reality where changing mouth shapes have an influence on the directional propagation of sound. We conducted a study (n = 32) evaluating naturalism ratings, preference and distinguishability of omnidirectional speech auralization compared to static and dynamic, phoneme-dependent directivities. The results indicated that participants cannot distinguish dynamic from static directivity. Furthermore, participants' preference ratings aligned with their naturalism ratings. There was no unanimity, however, with regards to which auralization is the most natural.},
booktitle = {Proceedings of the 20th ACM International Conference on Intelligent Virtual Agents},
articleno = {17},
numpages = {8},
keywords = {phoneme-dependent directivity, directional 3D sound, speech, embodied conversational agents, virtual acoustics},
location = {Virtual Event, Scotland, UK},
series = {IVA '20}
}




Disclaimer Home Visual Computing institute RWTH Aachen University