@inproceedings{f242c6cf260b441494e8f2cfec260896,
title = "Enhancing human-machine interaction: a novel approach to emotion-controlled speech-to-animation",
abstract = "This paper presents a novel framework for emotion-controlled speech-to-animation, addressing the issue of emotional mismatches between speech and facial expressions in existing methods. Our approach synchronises emotional expression across audio and facial animations using State-of-the-Art (SOTA) pretrained models, eliminating the need for costly custom training while ensuring adaptability. A key contribution of our framework is the creation of a novel a Speech-to-Speech (S2S) pipeline for emotional control over generated speech. In addition, we introduce a novel evaluation metric, the Emotion Distribution Divergence (EDD), to assess our models ability to modify the emotions in the original videos. Experimental results demonstrate significant improvements in emotional expressiveness and realism over existing methods, establishing our approach as a major advancement in human-machine interaction, virtual assistants, and emotion-aware IoT applications.",
keywords = "Computer science and informatics",
author = "Rebecca Mobbs and Dimitrios Makris and Demetris Lappas and Vasileios Argyriou",
year = "2025",
month = sep,
day = "19",
doi = "10.1117/12.3078304",
language = "English",
isbn = "9781510694729",
volume = "13796",
series = "International Conference on Computer Vision and Information Technology",
publisher = "SPIE",
editor = "Jixin Ma",
booktitle = "Sixth International Conference on Computer Vision and Information Technology (CVIT 2025)",
address = "United States",
note = "2025 6th International Conference on Computer Vision and Information Technology (CVIT 2025) ; Conference date: 20-06-2025 Through 22-06-2025",
}