{"created":"2025-01-19T01:12:47.814187+00:00","updated":"2025-01-19T17:41:01.829615+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00211654","sets":["581:10433:10439"]},"path":["10439"],"owner":"44499","recid":"211654","title":["咽喉マイクを用いた大語彙音声認識のための特徴マッピングによるデータ拡張と知識蒸留"],"pubdate":{"attribute_name":"公開日","attribute_value":"2021-06-15"},"_buckets":{"deposit":"8717e82a-fcea-49e5-ba96-8f068cab1e8b"},"_deposit":{"id":"211654","pid":{"type":"depid","value":"211654","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"咽喉マイクを用いた大語彙音声認識のための特徴マッピングによるデータ拡張と知識蒸留","author_link":["538139","538133","538135","538131","538134","538138","538130","538137","538136","538132"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"咽喉マイクを用いた大語彙音声認識のための特徴マッピングによるデータ拡張と知識蒸留"},{"subitem_title":"Feature Mapping-based Data Augmentation and Knowledge Distillation for Large Vocabulary Speech Recognition Using Throat Microphone","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[一般論文] 咽喉マイク，音声認識，データ拡張，知識蒸留","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2021-06-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"静岡大学"},{"subitem_text_value":"産業技術総合研究所"},{"subitem_text_value":"静岡大学"},{"subitem_text_value":"静岡大学"},{"subitem_text_value":"静岡大学"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Shizuoka University","subitem_text_language":"en"},{"subitem_text_value":"National Institute of Advanced Industrial Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Shizuoka University","subitem_text_language":"en"},{"subitem_text_value":"Shizuoka University","subitem_text_language":"en"},{"subitem_text_value":"Shizuoka University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/211654/files/IPSJ-JNL6206004.pdf","label":"IPSJ-JNL6206004.pdf"},"date":[{"dateType":"Available","dateValue":"2023-06-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL6206004.pdf","filesize":[{"value":"1.5 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"1f2e61fd-f79b-46c8-8a2e-e989a3ddd6cd","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2021 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"鈴木, 貴仁"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"緒方, 淳"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"綱川, 隆司"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"西田, 昌史"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"西村, 雅史"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Takahito, Suzuki","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Jun, Ogata","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Takashi, Tsunakawa","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Masafumi, Nishida","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Masafumi, Nishimura","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"咽喉マイクは接話マイクのような一般的なマイクよりも外部雑音に頑健であるが，一般的なマイクとの音響ミスマッチが大きく，通常の音声認識システムでは認識精度が低下する．また，大量の音声データが利用可能という状況にもない．本研究では接話マイクと咽喉マイクで同時収録した小規模パラレルデータを活用した咽喉マイク音声認識のための学習手法を提案する．提案手法では，まず既存の大規模音声データベースから抽出した接話マイク特徴量を咽喉マイクの特徴量空間にマッピングし，咽喉マイク用音響モデル（DNN-HMM）の学習データを拡張する．このとき特徴マッピングはパラレルデータを用いてLSTMによって学習する．続いて，特徴マッピングによって得た特徴量でDNN-HMMを初期学習し，これを生徒モデルとする．そして，大量の接話マイク特徴量で学習したDNN-HMMを教師モデルとし，知識蒸留に基づき生徒モデルの再学習を行う．読み上げ音声を用いた評価の結果，提案法は咽喉マイク音声のみで学習したDNN-HMMと比べて約36.5%の文字誤り率の削減を達成した．","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Throat microphones are more robust against external noise than conventional acoustic microphones such as close-talk. However, automatic speech recognition (ASR) performance is degraded when throat microphone speech signals are simply input to a general (clean) ASR system due to large acoustic mismatches. Moreover, the amount of throat microphone speech data is not enough to train accurate ASR systems. In this study, we propose a training approach for throat microphone ASR utilizing a small parallel corpus simultaneously recorded by close-talk and throat microphones. As a data-augmentation process, existing large-amount close-talk microphone features are transformed to a throat microphone feature space with the LSTM-based feature mapping which is trained from the parallel corpus. The DNN-HMM is then pre-trained with the mapped features, and fine-tuned by knowledge distillation from a DNN-HMM trained with a large amount of close-talk microphone speech data. Experimental results using read speech data showed that the proposed approach achieved 36.5% relative improvement of character error rate compared to the DNN-HMM trained only with throat microphone speech data.","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"1381","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicPageStart":"1373","bibliographicIssueDates":{"bibliographicIssueDate":"2021-06-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"6","bibliographicVolumeNumber":"62"}]},"relation_version_is_last":true,"item_2_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.20729/00211548","subitem_identifier_reg_type":"JaLC"}]},"weko_creator_id":"44499"},"id":211654,"links":{}}