{"created":"2025-01-18T22:45:09.959044+00:00","updated":"2025-01-23T03:00:53.473154+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00010142","sets":["581:612:613"]},"path":["613"],"owner":"1","recid":"10142","title":["発話速度と言語的特徴による変動を考慮した音素持続時間モデルを用いた音声認識"],"pubdate":{"attribute_name":"公開日","attribute_value":"2006-12-15"},"_buckets":{"deposit":"2ba4bc58-f68b-4b05-b5d4-3d83f0ead10e"},"_deposit":{"id":"10142","pid":{"type":"depid","value":"10142","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"発話速度と言語的特徴による変動を考慮した音素持続時間モデルを用いた音声認識","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"発話速度と言語的特徴による変動を考慮した音素持続時間モデルを用いた音声認識"},{"subitem_title":"A Phoneme Duration Model Considering Speaking-rate and Linguistic Features for Speech Recognition","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"論文","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2006-12-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東北大学大学院教育情報学研究部"},{"subitem_text_value":"東北大学大学院工学研究科"},{"subitem_text_value":"東北大学大学院工学研究科"},{"subitem_text_value":"東北大学大学院工学研究科"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Research Division, Graduate School of Educational Informatics, Tohoku University","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Engineering, Tohoku University","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Engineering, Tohoku University","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Engineering, Tohoku University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/10142/files/IPSJ-JNL4712029.pdf"},"date":[{"dateType":"Available","dateValue":"2008-12-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL4712029.pdf","filesize":[{"value":"983.2 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"d1b60063-3114-48f6-8190-f056ceb3a4c8","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2006 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"大河雄一"},{"creatorName":"伊藤, 彰則"},{"creatorName":"鈴木, 基之"},{"creatorName":"牧野, 正三"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yuichi, Ohkawa","creatorNameLang":"en"},{"creatorName":"Akinori, Ito","creatorNameLang":"en"},{"creatorName":"Motoyuki, Suzuki","creatorNameLang":"en"},{"creatorName":"Shozo, Makino","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本論文では,音声認識により生じる認識誤りのうち,持続時間が本来の長さと極端に異なるものを抑制する手段として,音素持続時間のモデルを用いる方法の検討を行った.そして,発話速度や言語的要因によってもたらされる持続時間の変動を考慮した,音素持続時間モデル化法と,その音声認識への適用手法の提案を行う.従来,音声合成の分野を中心として様々な音素持続時間の生成法が提案されているが,音声認識を目的として,発話速度の影響と音素の文中での位置や品詞などの言語的特徴の影響の双方を考慮に入れた音素持続時間のモデル化法や認識手法はなかった.本論文では,言語的特徴などを質問として用いた決定木により求められるクラスを単位とし,音素の持続時間と発話速度と相関の高い局所平均母音長の2 次元正規分布として持続時間のモデル化を行うことで,様々な要因により変化する音素持続時間を高精度に推定を行う方法を提案する.また得られた持続時間の分布を,音声認識結果のN-best のリスコアリングに利用することで,認識率の改善が得られることを述べる.","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"In this paper, we proposed a method of phoneme duration modeling for speech recognition. There was no usual method of duration modeling for speech recognition considering change by both speaking-rate and linguistic feature (phoneme location in sentence, part-of-speech et al.) Therefore, we modeled influence of speaking-rate by 2 dimension normal distribution of phoneme duration and local average of vowel duration. Each normal distribution is determined by tree based clustering with various question which include linguistic feature. We acquired 4.7% reduction of phoneme error rate by re-scoring of N-best hypothesis with proposed duration model.","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"3391","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicPageStart":"3380","bibliographicIssueDates":{"bibliographicIssueDate":"2006-12-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"12","bibliographicVolumeNumber":"47"}]},"relation_version_is_last":true,"item_2_alternative_title_2":{"attribute_name":"その他タイトル","attribute_value_mlt":[{"subitem_alternative_title":"音声言語"}]},"weko_creator_id":"1"},"id":10142,"links":{}}