{"links":{},"id":232527,"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00232527","sets":["1164:5159:11541:11549"]},"path":["11549"],"owner":"44499","recid":"232527","title":["x-vectorの話者空間を利用した2話者間の中間話者音声合成"],"pubdate":{"attribute_name":"公開日","attribute_value":"2024-02-22"},"_buckets":{"deposit":"a1cb80ef-f52c-49c8-a8c2-6a4d7c433068"},"_deposit":{"id":"232527","pid":{"type":"depid","value":"232527","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"x-vectorの話者空間を利用した2話者間の中間話者音声合成","author_link":["629584","629583","629582","629580","629585","629579","629586","629581"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"x-vectorの話者空間を利用した2話者間の中間話者音声合成"},{"subitem_title":"Intermediate speaker speech synthesis between two speakers using x-vector speaker space","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"ポスターセッション2 SP/SLP","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2024-02-22","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"豊橋技術科学大学"},{"subitem_text_value":"豊橋技術科学大学"},{"subitem_text_value":"豊橋技術科学大学"},{"subitem_text_value":"豊橋技術科学大学"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Toyohashi Uniersity of Technology","subitem_text_language":"en"},{"subitem_text_value":"Toyohashi Uniersity of Technology","subitem_text_language":"en"},{"subitem_text_value":"Toyohashi Uniersity of Technology","subitem_text_language":"en"},{"subitem_text_value":"Toyohashi Uniersity of Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/232527/files/IPSJ-SLP24151057.pdf","label":"IPSJ-SLP24151057.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP24151057.pdf","filesize":[{"value":"1.9 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"61f6d760-2ada-4d2b-8158-9139d007b8e8","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2024 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"細井, 颯太"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"木内, 貴浩"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"若林, 佑幸"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"北岡, 教英"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Sota, Hosoi","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Takahiro, Kinouchi","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yukoh, Wakabayashi","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Norihide, Kitaoka","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8663","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"近年,複数話者の声質や話し方について学習している音声合成器を用いて,学習セットに含まれていない話者の音声を新たに合成する話者生成手法が考案されている.本研究では,複数話者音声合成に用いる話者空間に x-vector を採用し,二人の話者の中間に位置するような中間音声を合成する手法を提案する.まず提案手法では,話者の音声から x-vector を抽出するときに,損失関数として話者間の類似度を考慮する.そして,それらの x-vector の内分点をとることで話者間の中間に位置するような中間話者 x-vector を生成する.最終的には,テキストと中間話者 x-vector を結合した特徴量を用いて中間音声を生成する.提案手法により,基本周波数と発話区間長において,合成された音声が元話者の中間に位置していることが示された.また,x-vector およびユークリッドノルムに関して正規化された x-vector を用いた手法と比較したところ,基本周波数と発話区間長に関する客観評価,中間音声を選択する主観評価実験の両方で同等程度の品質であることが示された.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Recent advancements in speech synthesis technologies have enabled the synthesis of speeches of speakers not in the training set using multi-speaker speech synthesizers that learn diverse speech qualities and speaking styles. This study proposes a novel speaker generation approach to synthesize intermediate speech between two speakers by incorporating x-vectors in the speaker space used for multi-speaker speech synthesis. The proposed method constructs x-vectors from multi-speakers’ speeches with a loss function that accounts for speaker similarity. Intermediate speaker x-vectors are then generated by calculating the internal division points between these x-vectors. An intermediate speech is generated using a combination of text and the intermediate speaker x-vector. Experimental results indicate that the synthesized intermediate speech appropriately aligns with the intermediate position of the original speeches in terms of fundamental frequency and the length of voice active frames. The proposed approach demonstrates comparable quality in objective and subjective evaluation experiments compared to methods using original x-vectors and normalized x-vectors with an Euclidean norm.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告音声言語情報処理(SLP)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2024-02-22","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"57","bibliographicVolumeNumber":"2024-SLP-151"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"created":"2025-01-19T01:33:26.807541+00:00","updated":"2025-01-19T10:25:04.434804+00:00"}