{"updated":"2025-01-20T05:53:08.652319+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00176409","sets":["1164:5159:8497:9012"]},"path":["9012"],"owner":"11","recid":"176409","title":["高い雑音耐性と推定精度を両立する基本周波数推定法の提案と評価"],"pubdate":{"attribute_name":"公開日","attribute_value":"2016-12-13"},"_buckets":{"deposit":"ff6ce779-3b42-4113-9fe6-2bb50d5d6e64"},"_deposit":{"id":"176409","pid":{"type":"depid","value":"176409","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"高い雑音耐性と推定精度を両立する基本周波数推定法の提案と評価","author_link":["371150","371149"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"高い雑音耐性と推定精度を両立する基本周波数推定法の提案と評価"},{"subitem_title":"Proposal of a robust and high-performance F0 estimator and its evaluation","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"音声分析,特徴抽出","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2016-12-13","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"山梨大学大学院総合研究部"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Interdisciplinary Graduate School, University of Yamanashi","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/176409/files/IPSJ-SLP16114023.pdf","label":"IPSJ-SLP16114023.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP16114023.pdf","filesize":[{"value":"474.6 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"b1605df5-e692-4271-b81d-1e3967d66035","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2016 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"森勢, 将雅"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Masanori, Morise","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8663","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"基本周波数 (F0,最近は FO と表記することもあるが本稿では F0 に統一する) は,周期的に生じる声帯振動間隔の最も短いものの逆数として定義され,知覚する音声の高さに概ね対応する音声の主要なパラメータである.F0 は様々な音声処理に利用されるパラメータであり,例えば Channel vocoder の考えに基づいた高品質音声合成では,音声から F0 を可能な限り高い精度で推定することが要求される.筆者らは,これまで高 SNR の音声を対象とした実時間処理が可能な推定法について検討し,SNR が 30 dB 以上であれば実時間処理が可能であり,かつ最新の方法と比較しても遜色ない性能が達成可能な方法を提案してきた.一方,例えば統計的音声合成では,学習に必要な音声パラメータは事前に分析しておけば良いため,実時間性よりも高い精度と雑音に対する頑健性を備えた方法が望ましいといえる.本稿では,計算速度ではなく,高い耐雑音性と推定精度にフォーカスを絞った F0 推定法 Harvest を提案する.Harvest は,音声スペクトルが調波構造を持つことに着目し,基本波に相当するピークを検出する方法を採用している. まず,高調波と低域雑音を除去するため,様々な中心周波数のバンドパスフィルタによるフィルタリングを実施し,得られた多チャネル信号から F0 の可能性がある候補を全て選定する.その後,選定された候補を瞬時周波数を用いて補正し,時系列の連続性を考えて接続することで最終的な F0 軌跡を生成する.本稿では,音声データベースを用いた評価,および筆者らが 2016 年に提案した耐雑音性評価法により提案法の有効性を示す.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Fundamental frequency (F0) is related with the perceived pitch of the periodic signal and is one of the most important parameters for various kinds of speech processing. Modern channel vocoders for high-quality speech synthesis generally require high-performance estimators in speech parameters including F0. We have proposed a rapid and reliable F0 estimator for real-time applications. On the other hand, other applications such as statistical parametric speech synthesis require a robust estimator rather than computational cost. This paper presents a robust and high-performance F0 estimator named Harvest for high-quality speech synthesis. The proposed estimator consists of three steps : multi-channel band-pass filtering with different center frequencies, calculation of F0 candidates and connection on the basis of the continuity of the F0 contour. Two evaluations in estimation performance and noise robustness were carried out compared with other modern estimators. The result showed that the proposed estimator was superior to others in both estimation performance and noise robustness.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告音声言語情報処理(SLP)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2016-12-13","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"23","bibliographicVolumeNumber":"2016-SLP-114"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"created":"2025-01-19T00:46:03.517718+00:00","id":176409,"links":{}}