{"links":{},"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:02006884","sets":["934:6391:1765269693937:1765269730334"]},"path":["1765269730334"],"owner":"80578","recid":"2006884","title":["共感的対話におけるマルチモーダル感情認識：音声とテキストを用いた統合的アプローチ"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2026-01-28"},"_buckets":{"deposit":"e3f335b7-1eb0-4750-90af-44474d8bb457"},"_deposit":{"id":"2006884","pid":{"type":"depid","value":"2006884","revision_id":0},"owners":[80578],"status":"published","created_by":80578},"item_title":"共感的対話におけるマルチモーダル感情認識：音声とテキストを用いた統合的アプローチ","author_link":[],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"共感的対話におけるマルチモーダル感情認識：音声とテキストを用いた統合的アプローチ","subitem_title_language":"ja"},{"subitem_title":"Multimodal Emotion Recognition in Empathetic Dialogue: An Integrated Approach Using Speech and Text","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[コンシューマ・システム論文] 共感性対話，感情認識，マルチモーダル","subitem_subject_scheme":"Other"}]},"item_type_id":"3","publish_date":"2026-01-28","item_3_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"三重大学"},{"subitem_text_value":"三重大学"}]},"item_3_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Mie University","subitem_text_language":"en"},{"subitem_text_value":"Mie University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/2006884/files/IPSJ-TCDS1601003.pdf","label":"IPSJ-TCDS1601003.pdf"},"date":[{"dateType":"Available","dateValue":"2028-01-28"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-TCDS1601003.pdf","filesize":[{"value":"3.3 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"47"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"a7771f8a-9571-43f2-b001-02ba19729ab4","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2026 by the Information Processing Society of Japan"}]},"item_3_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"平野,悠人"}]},{"creatorNames":[{"creatorName":"森本,尚之"}]}]},"item_3_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yuto Hirano","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Naoyuki Morimoto","creatorNameLang":"en"}]}]},"item_3_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA12628043","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_3_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2186-5728","subitem_source_identifier_type":"ISSN"}]},"item_3_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"近年のコンピュータ性能とAI・機械学習技術の進展により，人間と高度に対話可能なAIシステムの研究が進んでいる．特に医療や教育など感情に配慮した対話では，相手の感情を理解し，共感的応答を生成する能力が重要である．本研究では，日本語共感対話データセットを用いて，音声・テキスト各モーダルの特徴を活かした感情認識モデルを構築・比較し，さらにそれらを統合したマルチモーダルモデルを提案する．各モデルの比較により，得意とする感情の傾向が異なることを確認し，統合によって分類性能が向上することを示した．また，実環境を想定し，音声からのテキスト文字起こしや特徴量抽出を行うエッジデバイスを実装し，感情認識IoTシステムを構築した．さらに，テキスト中の記号情報が感情推定において重要であることを示した．","subitem_description_type":"Other"}]},"item_3_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Recent advancements in computational power and AI/machine learning technologies have accelerated research into AI systems capable of engaging in sophisticated interactions with humans. In emotionally sensitive domains such as healthcare and education, it is crucial for AI to understand the emotions of the other party and generate empathetic responses. In this study, we construct and compare emotion recognition models that leverage features from speech and text modalities using a Japanese empathetic dialogue dataset, and further propose a multimodal model that integrates both. The comparison revealed that each modality tends to perform better for different emotional categories, and that integration improves overall classification performance. Furthermore, assuming real-world applications, we implemented an edge device capable of transcribing speech and extracting features, thereby constructing an emotion recognition IoT system. In addition, we demonstrated that symbolic information in text contributes significantly to emotion estimation.","subitem_description_type":"Other"}]},"item_3_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"22","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌コンシューマ・デバイス＆システム（CDS）"}],"bibliographicPageStart":"13","bibliographicIssueDates":{"bibliographicIssueDate":"2026-01-28","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"1","bibliographicVolumeNumber":"16"}]},"relation_version_is_last":true,"weko_creator_id":"80578"},"updated":"2026-01-28T01:53:50.340123+00:00","created":"2026-01-22T00:43:10.383759+00:00","id":2006884}