{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00183616","sets":["581:8997:9007"]},"path":["9007"],"owner":"11","recid":"183616","title":["音声認識のための回帰木に基づく複数の変換行列の重み付けによる特徴量空間の適応"],"pubdate":{"attribute_name":"公開日","attribute_value":"2017-09-15"},"_buckets":{"deposit":"f8d74a67-7138-4cb4-822a-9c8eab5ef1c7"},"_deposit":{"id":"183616","pid":{"type":"depid","value":"183616","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"音声認識のための回帰木に基づく複数の変換行列の重み付けによる特徴量空間の適応","author_link":["403561","403560","403565","403564","403562","403558","403563","403559"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"音声認識のための回帰木に基づく複数の変換行列の重み付けによる特徴量空間の適応"},{"subitem_title":"Feature-space Adaptation with a Weighted Sum of Multiple Transformation Matrices Based on Regression Tree for Automatic Speech Recognition","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[一般論文] 自動音声認識,適応,特徴量変換,ディープ・ニューラルネットワーク","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2017-09-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"三菱電機株式会社情報技術総合研究所"},{"subitem_text_value":"三菱電機株式会社情報技術総合研究所"},{"subitem_text_value":"Mitsubishi Electric Research Laboratories"},{"subitem_text_value":"三菱電機株式会社情報技術総合研究所"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Information Technology R&D Center, Mitsubishi Electric Corporation","subitem_text_language":"en"},{"subitem_text_value":"Information Technology R&D Center, Mitsubishi Electric Corporation","subitem_text_language":"en"},{"subitem_text_value":"Mitsubishi Electric Research Laboratories","subitem_text_language":"en"},{"subitem_text_value":"Information Technology R&D Center, Mitsubishi Electric Corporation","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/183616/files/IPSJ-JNL5809018.pdf","label":"IPSJ-JNL5809018.pdf"},"date":[{"dateType":"Available","dateValue":"2019-09-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL5809018.pdf","filesize":[{"value":"1.1 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"0cab0045-5a46-4dd0-9696-4bf163f00bc4","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2017 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"金川, 裕紀"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"太刀岡, 勇気"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"渡部, 晋治"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"石井, 純"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Hiroki, Kanagawa","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yuuki, Tachioka","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Shinji, Watanabe","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Jun, Ishii","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"音声認識では適応が重要である.特徴量空間での適応(fMLLR)は,特徴量ベクトル系列に単一の変換行列を乗算することで実現されるため,デコーディング処理とは独立な,特徴量に関する前処理として実装できる.このためガウス混合分布(GMM)と同様にディープ・ニューラルネットワーク(DNN)の音響モデルに対しても適用できる.一方でモデル空間の適応は,回帰木に基づき複数の変換行列を用いることで,単一の変換行列を用いるfMLLRよりも高い精度で適応が可能である.しかしこの手法には2つの課題がある.1つ目は適応とデコードに同じ生成モデル(例:GMM)の音響モデルを共有しなければならず,DNNの音響モデルには適用できないこと,2つ目は変換行列の数が多くなると,変換行列の推定が過学習しやすいことである.本論文では,1パスの状態アラインメント情報を用いてフレームごとに対応する複数の変換行列を対応付け,それらを用いて重み付け線形和で表現される変換行列で特徴量変換を行う手法を提案する.さらに2つ目の課題に対し,構造的な事前確率の導入により変換行列をMAP推定する,特徴量空間における構造的事後確率最大線形(fSMAPLR)を提案する.実験より,提案するfSMAPLRはfMLLRの性能を上回った.","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"In automatic speech recognition, an adaptation is important. Feature-space maximum-likelihood linear regression (fMLLR) transforms acoustic features to adapted ones by a multiplication operation with a single transformation matrix. This property realizes an efficient adaptation performed within a pre-precessing, which is independent of a decoding process, and this type of adaptation can be applied to deep neural network (DNN). On the other hand, model-space adaptations (i.e., CMLLR) improve the performance of fMLLR because it can use multiple transformation matrices based on a regression tree. However, there are two problems in the model-space adaptations: first, these types of adaptation cannot be applied to DNN because adaptation and decoding must share the same generative model, i.e., Gaussian mixture model (GMM). Second, transformation matrices tend to be over-estimated when the number of transformation matrices is large. This paper proposes to use multiple transformation matrices within a feature-space adaptation framework. The proposed method first estimates multiple transformation matrices in the GMM framework according to the first-pass decoding results and the alignments, and then takes a weighted sum of these matrices to obtain a single feature transformation matrix frame-by-frame. In addition, to address the second problem, we propose feature-space structural maximum a posteriori linear regression (fSMAPLR), which introduces hierarchal prior distributions to regularize the MAP estimation. Experimental results show that the proposed fSMAPLR outperformed fMLLR.","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"1564","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicPageStart":"1555","bibliographicIssueDates":{"bibliographicIssueDate":"2017-09-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"9","bibliographicVolumeNumber":"58"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"id":183616,"updated":"2025-01-20T03:36:28.199997+00:00","links":{},"created":"2025-01-19T00:51:07.485921+00:00"}