{"id":232917,"updated":"2025-01-19T10:16:41.283365+00:00","links":{},"created":"2025-01-19T01:34:03.721260+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00232917","sets":["1164:5305:11555:11556"]},"path":["11556"],"owner":"44499","recid":"232917","title":["Self-Playを用いた深層強化学習におけるスコア分布予測型モデルの提案"],"pubdate":{"attribute_name":"公開日","attribute_value":"2024-03-01"},"_buckets":{"deposit":"fe8b41ab-c729-4d35-b0c2-25b76f7228c9"},"_deposit":{"id":"232917","pid":{"type":"depid","value":"232917","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"Self-Playを用いた深層強化学習におけるスコア分布予測型モデルの提案","author_link":["631534","631533","631532"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Self-Playを用いた深層強化学習におけるスコア分布予測型モデルの提案"},{"subitem_title":"A Proposal of Score Distribution Predictive Model in Self-Play Deep Reinforcement Learning","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2024-03-01","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"北海道大学"},{"subitem_text_value":"北海道大学"},{"subitem_text_value":"北海道大学"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"The University of Hokkaido","subitem_text_language":"en"},{"subitem_text_value":"The University of Hokkaido","subitem_text_language":"en"},{"subitem_text_value":"The University of Hokkaido","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/232917/files/IPSJ-GI24051029.pdf","label":"IPSJ-GI24051029.pdf"},"date":[{"dateType":"Available","dateValue":"2026-03-01"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-GI24051029.pdf","filesize":[{"value":"2.8 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"18"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"5e769bb9-5dfe-4ab5-9631-2664399d49ce","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2024 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"神子島, 一弥"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"坂地, 泰紀"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"野田, 五十樹"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11362144","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8736","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本稿ではゲーム AI で用いられる Self-Play による深層強化学習において,スコアの確率分布を予測するモデルを提案する.提案モデルでは,一般に用いられているスコアの期待値の代わりに,スコアの確率分布を求める.それを直接用いることによって,スコア学習における性能低下問題を解決する.既存モデルと比較した評価実験により,性能低下問題が解決されることが分かった.更にスコアに対してより精密な操作を可能とする結果も得られた.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"We propose a model for predicting the probability distribution of score in Self-Play deep reinforcement learning, which is used in game AI. In the proposed model, the probability distribution of score is obtained instead of expected value of score that is commonly used. By using it directly, the performance degradation problem in score learning is solved. Evaluation experiments comparing the proposed model with existing models show that the performance degradation problem is solved. Furthermore, the proposed model allowed more precise manipulation of score.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"8","bibliographic_titles":[{"bibliographic_title":"研究報告ゲーム情報学(GI)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2024-03-01","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"29","bibliographicVolumeNumber":"2024-GI-51"}]},"relation_version_is_last":true,"weko_creator_id":"44499"}}