@article{oai:ipsj.ixsq.nii.ac.jp:02006884,
 author = {平野,悠人 and 森本,尚之 and Yuto Hirano and Naoyuki Morimoto},
 issue = {1},
 journal = {情報処理学会論文誌コンシューマ・デバイス＆システム（CDS）},
 month = {Jan},
 note = {近年のコンピュータ性能とAI・機械学習技術の進展により，人間と高度に対話可能なAIシステムの研究が進んでいる．特に医療や教育など感情に配慮した対話では，相手の感情を理解し，共感的応答を生成する能力が重要である．本研究では，日本語共感対話データセットを用いて，音声・テキスト各モーダルの特徴を活かした感情認識モデルを構築・比較し，さらにそれらを統合したマルチモーダルモデルを提案する．各モデルの比較により，得意とする感情の傾向が異なることを確認し，統合によって分類性能が向上することを示した．また，実環境を想定し，音声からのテキスト文字起こしや特徴量抽出を行うエッジデバイスを実装し，感情認識IoTシステムを構築した．さらに，テキスト中の記号情報が感情推定において重要であることを示した．, Recent advancements in computational power and AI/machine learning technologies have accelerated research into AI systems capable of engaging in sophisticated interactions with humans. In emotionally sensitive domains such as healthcare and education, it is crucial for AI to understand the emotions of the other party and generate empathetic responses. In this study, we construct and compare emotion recognition models that leverage features from speech and text modalities using a Japanese empathetic dialogue dataset, and further propose a multimodal model that integrates both. The comparison revealed that each modality tends to perform better for different emotional categories, and that integration improves overall classification performance. Furthermore, assuming real-world applications, we implemented an edge device capable of transcribing speech and extracting features, thereby constructing an emotion recognition IoT system. In addition, we demonstrated that symbolic information in text contributes significantly to emotion estimation.},
 pages = {13--22},
 title = {共感的対話におけるマルチモーダル感情認識：音声とテキストを用いた統合的アプローチ},
 volume = {16},
 year = {2026}
}