{"links":{},"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00222604","sets":["1164:4179:10952:11075"]},"path":["11075"],"owner":"44499","recid":"222604","title":["次世代Kaldiフレームワーク上の日本語音声認識システム"],"pubdate":{"attribute_name":"公開日","attribute_value":"2022-11-22"},"_buckets":{"deposit":"161720c5-c6d5-46fd-8af0-e72667b6482d"},"_deposit":{"id":"222604","pid":{"type":"depid","value":"222604","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"次世代Kaldiフレームワーク上の日本語音声認識システム","author_link":["584220","584217","584218","584219"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"次世代Kaldiフレームワーク上の日本語音声認識システム"},{"subitem_title":"A Japanese Automatic Speech Recognition System on the Next-Gen Kaldi Framework","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"音声認識(2)","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2022-11-22","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"電気通信大学"},{"subitem_text_value":"電気通信大学"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"The University of Electro-Communications","subitem_text_language":"en"},{"subitem_text_value":"The University of Electro-Communications","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/222604/files/IPSJ-NL22254024.pdf","label":"IPSJ-NL22254024.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-NL22254024.pdf","filesize":[{"value":"1.3 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"23"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"b9616fd1-d6ab-4dc5-86be-e57f66d96fa0","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2022 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"テオ, ウェンシェン"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"南, 泰浩"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Wen, Shen Teo","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yasuhiro, Minami","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10115061","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8779","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"次世代 Kaldi と呼ばれる最先端の音声処理ツールキットが Kaldi の後継モデルとして 2021 年に利用されるようになった.本報告では,この次世代 Kaldi のモジュールを利用して,日本語話し言葉コーパス (CSJ) で学習したストリーミング型 RNN-Transducer 日本語音声認識システムについて述べる.この E2E モデルは,音響モデルと言語モデルを同時に学習する.本モデルで音声認識実験を行った結果,膨大なデータから学習する言語モデルを利用する他の最先端フレームワークとの比較では,その性能は十分でなかったが,Kaldi と比べた場合は Kaldi よりも低い文字誤り率 (CER) の改善が実現できた.本報告では,最初に,次世代 Kaldi の実現設定を説明し,その後実施した実験結果を示す.最後に,E2E 音声認識システムの改善を図るため,音声認識モデルの精度が低下したいくつかの理由を考察する.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"2021 saw the introduction of the cutting-edge successor to the Kaldi speech processing toolkit, known as Next-Gen Kaldi. Leveraging on the Next-Gen Kaldi family of modules in this work, we built a streaming RNN-Transducer Japanese ASR system, trained on the Corpus of Spontaneous Japanese (CSJ). Our E2E model shows a definitive Character Error Rate (CER) improvement over that of Kaldi, but still fall short when compared to state-of-the-art benchmarks from other frameworks enhanced by external language models trained on huge language data. In this paper, we first explain our experiment setups and present our results. Then, in the pursuit of an end-to-end ASR system, we raise several points of discussion where the performance of our ASR model suffered.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告自然言語処理(NL)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2022-11-22","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"24","bibliographicVolumeNumber":"2022-NL-254"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"created":"2025-01-19T01:22:33.269313+00:00","updated":"2025-01-19T13:40:46.860179+00:00","id":222604}