{"updated":"2025-01-19T22:10:02.522632+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00197892","sets":["1164:5159:9712:9831"]},"path":["9831"],"owner":"44499","recid":"197892","title":["深層ニューラルネットワークを用いた波形接続型感情音声合成のための感情制御法"],"pubdate":{"attribute_name":"公開日","attribute_value":"2019-06-15"},"_buckets":{"deposit":"9a16914d-8610-44a8-a7f7-dfe4fb53d1c7"},"_deposit":{"id":"197892","pid":{"type":"depid","value":"197892","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"深層ニューラルネットワークを用いた波形接続型感情音声合成のための感情制御法","author_link":["475638","475639","475640"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"深層ニューラルネットワークを用いた波形接続型感情音声合成のための感情制御法"},{"subitem_title":"Emotion manipulation for unit-selection-based speech synthesis using deep neural network","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"ポスターセッション1","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2019-06-15","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"株式会社エーアイ"},{"subitem_text_value":"株式会社エーアイ"},{"subitem_text_value":"株式会社エーアイ"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/197892/files/IPSJ-SLP19127039.pdf","label":"IPSJ-SLP19127039.pdf"},"date":[{"dateType":"Available","dateValue":"2021-06-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP19127039.pdf","filesize":[{"value":"1.3 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"936a56df-643b-4431-8063-2c84b7b2293b","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2019 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"大谷, 大和"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"松永, 悟之"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"平井, 啓之"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8663","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本稿では深層学習を用いた波形接続型感情音声合成のための感情制御法について述べる.従来の波形接続型感情音声合成では,1) 素片単位での混合が困難であるため,中間的な感情表現が乏しい,2) 入力された感情強度に従い素片の感情の種類を切り替えるため,感情による声質の変化が不連続になるといった問題があった.これらの問題を解決するために,提案手法では深層ニューラルネットワーク (DNN) を用いて,平静音声のスペクトル特徴量と感情強度から感情音声と平静音声の差分スペクトルを予測し,これを平静の素片に畳み込むことで所望の感情強度の感情素片を生成する.また,入力感情強度に応した差分スペクトル特徴量を予測可能にするため,データ拡張により感情強度に対応した差分スペクトル特徴量を生成し,これらを学習に用いることで所望の制御則を DNN に埋め込む.実験的評価では,従来手法と比較して滑らかな感情制御ができていることを確認した.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"This paper describes a novel emotion manipulation method for unit-selection-based speech synthesis (USS) using a deep neural network. Our conventional unit-selection-based emotional speech synthesis (USES) includes two weaknesses; 1) it is poor at mixed emotional expressions because it is difficult to generate interpolated units, and 2) variations of emotional voice quality are discontinuous because emotional unit set are changed based on input emotion intensities. To solve these problems, the proposed method predicts spectral differentials between emotional and neutral speech from input emotional intensities and neutral spectral features using the deep neural network (DNN). Then the emotional units are generated by convolution of neutral ones with predicted spectral differentials. Moreover, in order to generate spectral differentials corresponding with input emotional intensities, we introduce data augmentation technique to training of DNNs. Experimental results show that the proposed method achieves smooth manipulations of emotional intensities compared with the conventional USES.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告音声言語情報処理(SLP)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2019-06-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"39","bibliographicVolumeNumber":"2019-SLP-127"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"created":"2025-01-19T01:02:18.387779+00:00","id":197892,"links":{}}