{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00090263","sets":["581:7002:7080"]},"path":["7080"],"owner":"11","recid":"90263","title":["音声情報案内システムにおけるBag-of-Wordsを用いた無効入力の棄却"],"pubdate":{"attribute_name":"公開日","attribute_value":"2013-02-15"},"_buckets":{"deposit":"2d05b78b-ca3d-41d8-913b-8c620f2b419a"},"_deposit":{"id":"90263","pid":{"type":"depid","value":"90263","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"音声情報案内システムにおけるBag-of-Wordsを用いた無効入力の棄却","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"音声情報案内システムにおけるBag-of-Wordsを用いた無効入力の棄却"},{"subitem_title":"Invalid Input Rejection Using Bag-of-Words for Speech-oriented Guidance System","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[特集:音声ドキュメント処理] 音声情報案内システム,無効入力棄却,Bag-of-Words","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2013-02-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"岡山大学"},{"subitem_text_value":"統計数理研究所"},{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Okayama University","subitem_text_language":"en"},{"subitem_text_value":"The Institute of Statistical Mathematics","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/90263/files/IPSJ-JNL5402003.pdf"},"date":[{"dateType":"Available","dateValue":"2015-02-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL5402003.pdf","filesize":[{"value":"744.0 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"8ddad43a-0975-494d-a83e-6fa2fd12b263","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2013 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"真嶋温佳"},{"creatorName":"藤田, 洋子"},{"creatorName":"トーレスラファエル"},{"creatorName":"川波, 弘道"},{"creatorName":"原, 直"},{"creatorName":"松井, 知子"},{"creatorName":"猿渡, 洋"},{"creatorName":"鹿野, 清宏"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Haruka, Majima","creatorNameLang":"en"},{"creatorName":"Yoko, Fujita","creatorNameLang":"en"},{"creatorName":"Rafael, Torres","creatorNameLang":"en"},{"creatorName":"Hiromichi, Kawanami","creatorNameLang":"en"},{"creatorName":"Sunao, Hara","creatorNameLang":"en"},{"creatorName":"Tomoko, Matsui","creatorNameLang":"en"},{"creatorName":"Hiroshi, Saruwatari","creatorNameLang":"en"},{"creatorName":"Kiyohiro, Shikano","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"実環境における音声認識を用いた情報案内システムでは,雑音等の非音声やユーザ同士の背景会話など,システムへの入力として不適切な入力が存在する.これらの入力はシステムの誤作動・誤認識の原因となるので,システムへの入力として適切な入力(有効入力)と不適切な入力(無効入力)の識別を行い,無効入力を棄却することにより,無効入力に対する応答処理を行わないことが重要である.従来,有効入力と無効入力との識別には,メル周波数ケプストラム係数などの音響的特徴量によるGMM(Gaussian Mixture Model)が用いられる.しかし,入力データの音声認識結果から得られる言語的な情報を使うことにより,システムのタスクを考えたうえで有効入力と無効入力の識別が可能になると考えられる.そこで本論文では,音響特徴量にBag-of-Words(BOW)を言語的特徴量として併用した無効入力の識別を検討した.識別手法としては,サポートベクタマシン(SVM)および最大エントロピー法を用いた.実験には実環境音声情報案内システム「たけまるくん」の入力データを用いた.SVMによる識別結果では,GMMによる音響尤度のみを用いた場合に比べて,BOWを用いた場合,F尺度を82.19%から85.41%に改善することができた.さらに,GMMによる音響尤度,発話時間,SNRを組み合わせた特徴量にBOWを追加することで,F尺度を86.58%まで改善することができた.詳細な分析の結果,BOWは特に無効入力の誤受理を減らす効果があることが示された.","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"On a real environment speech-oriented information guidance system, a valid and invalid input discrimination is important as invalid inputs such as noise, laugh, cough and utterances between users lead to unpredictable system responses. Generally, acoustic features such as MFCC (Mel-Frequency Cepstral Coefficient) are used for discrimination. Comparing acoustic likelihoods of GMMs (Gaussian Mixture Models) from speech data and noise data is one of the typical methods. In addition to that, using linguistic features, such as speech recognition result, is considered to improve discrimination accuracy as it reflects the task-domain of invalid inputs and meaningless recognition results from noise inputs. In this paper, we introduce Bag-of-Words (BOW) as a feature to discriminate between valid and invalid inputs. Support Vector Machine (SVM) and Maximum Entropy method (ME) are also employed to realize robust classification. We experimented the methods using real environment data obtained from the guidance system “Takemaru-kun.” By applying BOW on SVM, the F-measure is improved to 85.09%, from 82.19% when using GMMs. In addition, experiments using features combining BOW with acoustic likelihoods from GMMs, Duration and SNR were conducted, improving the F-measure to 86.58%.","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"451","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicPageStart":"443","bibliographicIssueDates":{"bibliographicIssueDate":"2013-02-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"2","bibliographicVolumeNumber":"54"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"updated":"2025-01-21T16:05:41.920276+00:00","created":"2025-01-18T23:39:52.566961+00:00","links":{},"id":90263}