{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00232528","sets":["1164:5159:11541:11549"]},"path":["11549"],"owner":"44499","recid":"232528","title":["潜在変数と観測データにガンマ分布を仮定したVAEによる音声振幅スペクトル表現名"],"pubdate":{"attribute_name":"公開日","attribute_value":"2024-02-22"},"_buckets":{"deposit":"c4bacdef-8dde-41c9-9836-8c0591f6866f"},"_deposit":{"id":"232528","pid":{"type":"depid","value":"232528","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"潜在変数と観測データにガンマ分布を仮定したVAEによる音声振幅スペクトル表現名","author_link":["629587","629589","629588","629590"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"潜在変数と観測データにガンマ分布を仮定したVAEによる音声振幅スペクトル表現名"},{"subitem_title":"Speech representation based on VAE assuming gamma distribution for latent variables and observation","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"ポスターセッション2 SP/SLP","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2024-02-22","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"電気通信大学"},{"subitem_text_value":"電気通信大学"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"The University of Electro-communications","subitem_text_language":"en"},{"subitem_text_value":"The University of Electro-communications","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/232528/files/IPSJ-SLP24151058.pdf","label":"IPSJ-SLP24151058.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP24151058.pdf","filesize":[{"value":"1.9 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"4bb8f7e2-4a6c-4f8b-886c-1e06d6c3b7e8","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2024 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"今市, 夏菜子"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"中鹿, 亘"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Nanako, Imaichi","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Toru, Nakashika","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8663","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"近年,データの生成において複雑な関係を表現できる深層生成モデルが注目されており,その例として変分オートエンコーダ (Variational Auto Encorder:VAE) が挙げられる.通常 VAE は,エンコーダとデコーダともにガウス分布を仮定することが多いが,VAE の入力特徴量として適している振幅スペクトルは必ず正の値となることから,負の値も考慮するガウス分布は適切ではないと考えられる.そこで,正の値のみを考慮する確率分布の一つであるガンマ分布を仮定し再構成の精度の比較実験を行った.その結果,ガンマ分布を仮定した VAE において通常の VAE よりも良い精度の結果を得ることができ,VAE を用いて振幅スペクトルをモデル化するという点においてはガンマ分布が適していることが分かった.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Recently, deep generative models that can represent complex relationships in data generation have been attracting attention, and VAE is an example of such a model. However, since the amplitude spectrum suitable for VAE input features is always positive, a Gaussian distribution that also takes negative values into account is not appropriate. Therefore, we conducted an experiment to compare the accuracy of reconstruction by assuming a gamma distribution, a probability distribution that considers only positive values. As a result, we found that the VAE assuming the gamma distribution gave better accuracy results than the normal VAE, indicating that the gamma distribution is suitable for modeling amplitude spectra using VAE.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告音声言語情報処理(SLP)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2024-02-22","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"58","bibliographicVolumeNumber":"2024-SLP-151"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"id":232528,"updated":"2025-01-19T10:25:03.370968+00:00","links":{},"created":"2025-01-19T01:33:26.898579+00:00"}