{"updated":"2025-01-20T18:04:18.644550+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00146191","sets":["1164:5159:7884:8382"]},"path":["8382"],"owner":"11","recid":"146191","title":["多層双方向 LSTM リカレントニューラルネットワークに基づく複数話者音声合成と話者適応"],"pubdate":{"attribute_name":"公開日","attribute_value":"2015-11-25"},"_buckets":{"deposit":"bcb5ccc0-b0a3-4e8c-8604-39d26db5f305"},"_deposit":{"id":"146191","pid":{"type":"depid","value":"146191","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"多層双方向 LSTM リカレントニューラルネットワークに基づく複数話者音声合成と話者適応","author_link":["227382","227387","227385","227384","227383","227386"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"多層双方向 LSTM リカレントニューラルネットワークに基づく複数話者音声合成と話者適応"},{"subitem_title":"Multi-speaker speech synthesis and speaker adaptation based on deep bidirectional long short-term memory recurrent neural network","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"音声合成","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2015-11-25","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東京大学大学院工学系研究科"},{"subitem_text_value":"東京大学大学院工学系研究科"},{"subitem_text_value":"東京大学大学院情報理工学系研究科"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Grad. School of Engineering, The Univ. of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Grad. School of Engineering, The Univ. of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Grad. School of Information Science and Technology, The Univ. of Tokyo","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/146191/files/IPSJ-SLP15109019.pdf","label":"IPSJ-SLP15109019.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP15109019.pdf","filesize":[{"value":"382.9 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"d1e5b2f0-1d47-4d10-9017-8058c30e139e","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2015 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"チョウ, イ"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"峯松, 信明"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"齋藤, 大輔"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yi, Zhao","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Nobuaki, Minematsu","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Daisuke, Saito","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8663","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"論文では,データが限られた話者に対する音声合成の質の向上を目的とした,多層双方向 LSTM リカレントニューラルネットワークに基づく複数話者音声合成モデルを提案する.提案モデルは,話者非依存のネットワーク (SIN) と話者依存のネットワーク (SDN) で構成されており,SIN は複数話者のデータで学習され,SDN はターゲット話者のデータで学習される.さらに,性別コードと話者コード,i-vector を導入することで,SIN 内部において,話者の識別性をより高めることが期待される.データ数が限られたデータベースを用いた音声合成実験により,提案法は,多層ニューラルネットワークと多層双方向 LSTM リカレントニューラルネットワークに基づく複数話者音声合成と比較して,合成音声の品質の向上を確認することができた.さらに,提案した複数話者モデルに話者適応を導入可能であり,実験的に新話者に対する音声合成の自然性と話者性を向上することができた.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"In this paper, a deep bidirectional long short-term memory recurrent neural network (DBLSTM-RNN) based multi-speaker synthesis model is proposed to improve the synthesis quality for a target speaker whose corpus is limited. This model consists of speaker independent network (SIN) and speaker dependent network (SDN), where SIN is jointly trained by multiple speakers and SDN is designed for designed for each of the target speakers. In particular, gender code as well as speaker code or i-vector are prepared as augmented input information to help SIN realize better distinction among different target speakers. Experimental results show that our proposed model improves the synthesis performance with a fairly small database for each speaker, compared with DNN-based multi-speaker TTS and conventional DBLSTM-RNN based TTS. In addition, this multi-speaker model can also be used to perform speaker adaptation, and is experimentally shown to be capable of achieving good quality speech of a new speaker in terms of naturalness and speaker identity.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告音声言語情報処理(SLP)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2015-11-25","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"19","bibliographicVolumeNumber":"2015-SLP-109"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"created":"2025-01-19T00:21:41.042594+00:00","id":146191,"links":{}}