{"id":146173,"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00146173","sets":["1164:5159:7884:8382"]},"path":["8382"],"owner":"11","recid":"146173","title":["Deep Neural Networkを用いた話者空間基底への射影による声質変換"],"pubdate":{"attribute_name":"公開日","attribute_value":"2015-11-25"},"_buckets":{"deposit":"cd94f937-8996-4bac-828e-68f9a75c89e3"},"_deposit":{"id":"146173","pid":{"type":"depid","value":"146173","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"Deep Neural Networkを用いた話者空間基底への射影による声質変換","author_link":["227269","227271","227264","227266","227265","227267","227270","227268"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Deep Neural Networkを用いた話者空間基底への射影による声質変換"},{"subitem_title":"Voice Conversion based on Projection to Speaker Space Bases constructed by Deep Neural Network","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"声質変換","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2015-11-25","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東京大学大学院工学系研究科"},{"subitem_text_value":"東京大学大学院工学系研究科"},{"subitem_text_value":"東京大学大学院情報理工学系研究科"},{"subitem_text_value":"東京大学大学院工学系研究科"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Grad. School of Engineering, The Univ. of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Grad. School of Engineering, The Univ. of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Grad. School of Information Science and Technology, The Univ. of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Grad. School of Engineering, The Univ. of Tokyo","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/146173/files/IPSJ-SLP15109001.pdf","label":"IPSJ-SLP15109001.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP15109001.pdf","filesize":[{"value":"415.7 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"bcd66d0c-1537-462c-8846-029f8c8f73b6","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2015 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"橋本, 哲弥"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"柏木, 陽佑"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"齋藤, 大輔"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"峯松, 信明"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Tetsuya, Hashimoto","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yousuke, Kashiwagi","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Daisuke, Saito","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Nobuaki, Minematsu","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8663","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本研究では，入出力に任意話者を用いることのできる柔軟な声質変換を目的とし，Deep Neural Network(DNN) と Eigenvoice GMMs (EVGMM) の枠組みを組み合わせた変換手法を提案する．初めに大規模話者コーパスを用いて EVGMM の学習を行い，GMM の話者空間の基底ベクトル群を得る．EVGMM においては，この基底ベクトル群に対して目的話者に固有の重みベクトルを掛けることで目的話者の GMM 平均ベクトルを決定する．提案手法では，重みベクトルとして 1-of-K 表現ベクトルを用いることで，話者空間を張る基底話者群の GMM を近似する．近似した GMM によって大規模コーパス中の各話者の特徴量を基底話者群の特徴量へ分解することができる．これらを用いることで DNN によって 「ある話者の特徴量から基底話者群の特徴量への変換」 と 「基底話者群の特徴量から目的話者への変換」 の学習をそれぞれ行う．提案手法に対して，適応データ数に対する未知話者への変換精度の客観評価を行った結果，既存手法である EVGMM を上回る精度が得られた．","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"This paper describes a novel approach to construct a voice conversion (VC) system using deep neural networks (DNN) and Eigenvoice GMMs (EVGMM) with the final goal to realize conversion to arbitrary speakers. At first, we train EVGMM with multiple parallel datasets consisting of utterance pairs of a single speaker (reference speaker) and many pre-stored speakers and construct bases of a speaker space based on GMM supervectors. In our proposed method, 1-of-K coding is used instead of speaker-dependent weight parameters in EVGMM to divide input features into components in each basis of a speaker space. Then, converting input features to those components in each basis of a speaker space and converting these components to target speakers' features. These two steps are technically implemented by using DNN. The objective evaluation demonstrates that the proposed architecture improves the performance of target-speaker-open voice conversion.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告音声言語情報処理（SLP）"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2015-11-25","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"1","bibliographicVolumeNumber":"2015-SLP-109"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"updated":"2025-01-20T18:03:44.751184+00:00","created":"2025-01-19T00:21:40.107121+00:00","links":{}}