{"links":{},"id":57553,"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00057553","sets":["1164:5159:5216:5217"]},"path":["5217"],"owner":"1","recid":"57553","title":["Phonetic Tied - Mixtureモデルを用いた大語彙連続音声認識"],"pubdate":{"attribute_name":"公開日","attribute_value":"1999-12-20"},"_buckets":{"deposit":"82fd9d94-dfec-473a-85b5-dbbb1f04919e"},"_deposit":{"id":"57553","pid":{"type":"depid","value":"57553","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"Phonetic Tied - Mixtureモデルを用いた大語彙連続音声認識","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Phonetic Tied - Mixtureモデルを用いた大語彙連続音声認識"},{"subitem_title":"Phonetic Tied - Mixture Model for LVCSR","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"1999-12-20","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"京都大学情報学研究科"},{"subitem_text_value":"京都大学情報学研究科"},{"subitem_text_value":"名古屋大学工学研究科"},{"subitem_text_value":"奈良先端科学技術大学院大学情報科学研究科"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Kyoto University","subitem_text_language":"en"},{"subitem_text_value":"Kyoto University","subitem_text_language":"en"},{"subitem_text_value":"Nagoya University","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/57553/files/IPSJ-SLP99029008.pdf"},"date":[{"dateType":"Available","dateValue":"2001-12-20"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP99029008.pdf","filesize":[{"value":"585.4 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"74ea89e8-f0f2-47c6-aa53-7276d5f0e7da","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 1999 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"李晃伸"},{"creatorName":"河原, 達也"},{"creatorName":"武田, 一哉"},{"creatorName":"鹿野, 清宏"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Akinobu, Lee","creatorNameLang":"en"},{"creatorName":"Tatsuya, Kawahara","creatorNameLang":"en"},{"creatorName":"Kazuya, Takeda","creatorNameLang":"en"},{"creatorName":"Kiyohiro, Shikano","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"大語彙連続音声認識のための新たなphonetic tied-mixture (PTM)モデルを提案する．このモデルはmonophoneモデルの各状態が持つ数十個のガウス分布集合をtriphoneの対応する状態に割り当て，重みのみを変えて共有することで合成する．通常の状態共有triphoneに比べて音響空間を効率よく表現でき，また巨大なコートブックを要する従来のtied-mixtureモデルよりも学習が容易である．JNASの2万語の新聞記事読み上げタスクにおいて評価した結果，triphoneでの最大性能と同等の7.0%の単語誤り率をより少ないパラメータ数で達成した．また処理効率の面においても，音響スコア計算に用いるガウス分布を上位3%にまで削減しても精度がほとんど低下しなかった．いくつかのガウス分布の足切り計算(Gaussian pruning)手法を提案および比較した結果，最終的に音響尤度計算を約5分の1にまで削減できた．","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"A phonetic tied-mixture (PTM) model for efficient large vocabulary continuous speech recognition is presented. It is synthesized from context-independent phone models with 64 mixture components per state by assigning different mixture weights according to the shared states of triphones. Mixtures are then re-estimated for optimization. The model achieves a word error rate of 7.0% at 20k-word dictation of newspaper corpus, which is comparable to the best figure by the triphone of much higher resolutions. Compared with conventional PTMs that share Gaussians by all states, the proposed model is easily trained and reliably estimated. Furthermore, the model enables the decoder to perform efficient Gaussian pruning. It is found out that computing only two out of 64 components does not cause any loss of accuracy. Several methods for the pruning are proposed and compared, and the best one reduced the computation to about 20%.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"48","bibliographic_titles":[{"bibliographic_title":"情報処理学会研究報告音声言語情報処理（SLP）"}],"bibliographicPageStart":"43","bibliographicIssueDates":{"bibliographicIssueDate":"1999-12-20","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"108(1999-SLP-029)","bibliographicVolumeNumber":"1999"}]},"relation_version_is_last":true,"weko_creator_id":"1"},"created":"2025-01-18T23:20:43.900936+00:00","updated":"2025-01-22T04:26:19.505276+00:00"}