{"id":232521,"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00232521","sets":["1164:5159:11541:11549"]},"path":["11549"],"owner":"44499","recid":"232521","title":["テキストのない音声に対する自己教師あり学習モデルによる音声合成の分析 ~多言語活用を中心に~"],"pubdate":{"attribute_name":"公開日","attribute_value":"2024-02-22"},"_buckets":{"deposit":"30bc49da-2b02-4070-ae25-cfb07ff94ea1"},"_deposit":{"id":"232521","pid":{"type":"depid","value":"232521","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"テキストのない音声に対する自己教師あり学習モデルによる音声合成の分析 ~多言語活用を中心に~","author_link":["629533","629536","629532","629531","629534","629535"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"テキストのない音声に対する自己教師あり学習モデルによる音声合成の分析 ~多言語活用を中心に~"},{"subitem_title":"Analysis of Speech Synthesis of Text-Free Audio using a Self-Supervised Learning Model","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"ポスターセッション2 SP/SLP","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2024-02-22","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東京大学大学院電気系工学専攻"},{"subitem_text_value":"東京大学大学院電気系工学専攻"},{"subitem_text_value":"東京大学大学院電気系工学専攻"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Department of Electrical Engineering and Information Systems, Graduate School of Engineering, The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Department of Electrical Engineering and Information Systems, Graduate School of Engineering, The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Department of Electrical Engineering and Information Systems, Graduate School of Engineering, The University of Tokyo","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/232521/files/IPSJ-SLP24151051.pdf","label":"IPSJ-SLP24151051.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP24151051.pdf","filesize":[{"value":"1.1 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"898135a6-bbbc-49c0-9f57-790c4b0e8b47","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2024 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"朴, 浚鎔"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"齋藤, 大輔"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"峯松, 信明"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Joonyong, Park","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Daisuke, Saito","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Nobuaki, Minemitssu","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8663","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"深層学習ベースの音声合成のパフォーマンスは,ここ数年で急速に向上した.しかし,韻律や声調の推論による自然さの向上など,音声合成を実現するにはまだいくつかの課題が残っている.この問題の解決策として,従来の文字記号よりも豊富な情報を入力表現に工夫することで,情報を保存する研究が進められている.本研究では,教師あり学習モデルにより音声情報から得られた入力表現を分析し,これまでに利用されてきたテキスト入力表現と比較する.具体的には,英語と日本語の合成音声を評価し,以下の入力表現について分析検討を行った.その結果,自然言語テキストの使用は口語や内容的な保存には優れている一方,自己教師あり学習モデルの離散表現の使用は音響信号の保存や再合成に優れていることが経験的に示されている.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"The performance of deep learning-based speech synthesis has improved rapidly over the past few years. However, there are still some challenges remaining in the field, such as improving naturalness by predicting prosody and tone in the speech. As a solution to this problem, research for preserving these information is being carried out by devising input expressions to include more abundant information than conventional text symbols. In this research, input expressions obtained from speech information by self-supervised learning are analysed and compared with text input expressions used in conventional system. Specifically, the speech synthesized in English and Japanese are evaluated as a experimental result, and analytical examinations of following input expressions have been carried out. The result has been empirically shown that using natural language text is superior for preserving spoken language and content, while using the discrete representation of the self-supervised learning model is superior for preservation or resynthesis of acoustic signal.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告音声言語情報処理(SLP)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2024-02-22","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"51","bibliographicVolumeNumber":"2024-SLP-151"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"updated":"2025-01-19T10:25:13.156003+00:00","created":"2025-01-19T01:33:26.259364+00:00","links":{}}