{"links":{},"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00234667","sets":["1164:5064:11558:11626"]},"path":["11626"],"owner":"44499","recid":"234667","title":["Contextual Biasingを用いた日本語End-to-End音声認識向け語彙登録の検討"],"pubdate":{"attribute_name":"公開日","attribute_value":"2024-06-07"},"_buckets":{"deposit":"d153178b-f0b1-4bea-a572-81e7c376cfed"},"_deposit":{"id":"234667","pid":{"type":"depid","value":"234667","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"Contextual Biasingを用いた日本語End-to-End音声認識向け語彙登録の検討","author_link":["639740","639741","639738","639736","639737","639743","639735","639739","639742","639734"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Contextual Biasingを用いた日本語End-to-End音声認識向け語彙登録の検討"},{"subitem_title":"Exploring Keyword Enrollment for Japanese End-to-End Automatic Speech Recognition using Contextual Biasing","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"ポスターセッション2","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2024-06-07","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"三菱電機株式会社情報技術総合研究所"},{"subitem_text_value":"三菱電機株式会社情報技術総合研究所"},{"subitem_text_value":"Mitsubishi Electric Research Laboratories／現在，Apple"},{"subitem_text_value":"Mitsubishi Electric Research Laboratories"},{"subitem_text_value":"三菱電機株式会社情報技術総合研究所"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Mitsubishi Electric Research Laboratories / Presently with Apple","subitem_text_language":"en"},{"subitem_text_value":"Mitsubishi Electric Research Laboratories","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/234667/files/IPSJ-MUS24140055.pdf","label":"IPSJ-MUS24140055.pdf"},"date":[{"dateType":"Available","dateValue":"2026-06-07"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-MUS24140055.pdf","filesize":[{"value":"1.2 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"21"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"33f9ba4b-7f7c-4ff5-8a04-93b8e8850c2a","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2024 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"三井, 祥幹"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"相原, 龍"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"堀, 貴明"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"ルルー, ジョナトン"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"田口, 進也"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yoshiki, Mitsui","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Ryo, Aihara","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Takaaki, Hori","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Jonathan, Le Roux","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Shinya, Taguchi","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10438388","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8752","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"深層学習の発展に伴い登場した end-to-end (E2E) 音声認識は，従来の階層型音声認識と比較し，総合的に高い性能を発揮する．しかし，階層型音声認識で容易に実現できていた，特定ドメイン向けの語彙登録が困難である欠点を抱えている．E2E 音声認識向けの語彙登録手法として，contextual biasing を用いる方法が提案されているが，特に日本語音声認識では，学習データに現れない表記を含む語彙を登録する場合に，十分な認識性能を得られない．これを解消するため，本稿では，語彙の登録にカタカナ・ひらがな等の表音文字による表記を利用し，音声認識結果テキストに対する後処理で，登録に用いた表記を，元の表記へと戻す改良手法を提案する．更に，表音文字による語彙の表記と，入力音声より得られる音響特徴量との結びつきを強めるため，E2E 音声認識モデルを学習させる際に，学習用テキストの一部の単語を，ランダムに表音文字表記へ置換する改良学習手法を併せて提案する．提案手法により，5 モーラ以上からなる語彙の登録タスクにおいて，元表記を利用し語彙を登録する従来手法よりも高い語彙登録性能が得られることを確認した．","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"End-to-end (E2E) automatic speech recognition (ASR), which has emerged with the development of deep learning, exhibits generally higher performance than conventional modular ASR methods. However, E2E ASR has the drawback that it is difficult to enroll keywords for specific domains, which was easily realized in conventional ASR. Contextual biasing has been proposed for keyword enrollment methods for E2E ASR, but, for Japanese ASR, the performance is not sufficient when we enroll keywords which do not appear in the training data. To overcome this problem, we propose an updated keyword enrollment method where we use phonetic letter notations such as katakana or hiragana to recognize enrolled keywords, converting them back to their original notations in a postprocessing step. Additionally we propose an improved E2E ASR model training method to strengthen the connection between acoustic features obtained from input speech and phonetic letter notations by replacing some words from origial notation to phonetic letter notation. We observed higher keyword enrollment performance for keywords longer than five moras by using the proposed methods.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"7","bibliographic_titles":[{"bibliographic_title":"研究報告音楽情報科学（MUS）"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2024-06-07","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"55","bibliographicVolumeNumber":"2024-MUS-140"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"created":"2025-01-19T01:36:31.525679+00:00","updated":"2025-01-19T09:43:56.882982+00:00","id":234667}