{"id":91353,"updated":"2025-01-21T15:32:19.271173+00:00","links":{},"created":"2025-01-18T23:40:38.892030+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00091353","sets":["6164:6165:6210:7138"]},"path":["7138"],"owner":"10","recid":"91353","title":["方策勾配法による静的局面評価関数の強化学習についての一考察"],"pubdate":{"attribute_name":"公開日","attribute_value":"2012-11-09"},"_buckets":{"deposit":"96770ba1-bd43-48e4-92d1-bbd22aebef29"},"_deposit":{"id":"91353","pid":{"type":"depid","value":"91353","revision_id":0},"owners":[10],"status":"published","created_by":10},"item_title":"方策勾配法による静的局面評価関数の強化学習についての一考察","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"方策勾配法による静的局面評価関数の強化学習についての一考察"},{"subitem_title":"Learning Static Evaluation Functions Based on Policy Gradient Reinforcement Learning","subitem_title_language":"en"}]},"item_type_id":"18","publish_date":"2012-11-09","item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_18_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"芝浦工業大学工学部情報工学科"},{},{"subitem_text_value":"(株) コスモ・ウェブ"}]},"item_18_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Shibaura Institute of Technology","subitem_text_language":"en"},{"subitem_text_language":"en"},{"subitem_text_value":"Cosmoweb Co., Ltd.","subitem_text_language":"en"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/91353/files/IPSJ-GPWS2012017.pdf"},"date":[{"dateType":"Available","dateValue":"2013-03-22"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-GPWS2012017.pdf","filesize":[{"value":"443.0 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"5"},{"tax":["include_tax"],"price":"0","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"402fd8a3-2785-4892-802e-68b76260a5e6","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2012 by the Information Processing Society of Japan"}]},"item_18_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"五十嵐, 治一"},{"creatorName":"森岡, 祐一"},{"creatorName":"山本, 一将"}],"nameIdentifiers":[{}]}]},"item_18_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Harukazu, Igarashi","creatorNameLang":"en"},{"creatorName":"Yuichi, Morioka","creatorNameLang":"en"},{"creatorName":"Kazumasa, Yamamoto","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_5794","resourcetype":"conference paper"}]},"item_18_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本論文では強化学習の一手法である方策勾配法をコンピュータ将棋に適用する際に，全leaf 局面の静的局面評価値をその局面への遷移確率値で重み付けた期待値を用いた指し手評価方式を提案する．探索木の各ノードにおける指し手の選択としてBoltzmann 分布に基づく確率的戦略を採用すると静的局面評価関数に含まれるパラメータの学習則が再帰的に計算できる．しかしながら，処理対象とするleaf 局面数が大幅に増加するのでいくつかの近似解法も考案した．","subitem_description_type":"Other"}]},"item_18_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"This paper applies policy gradient reinforcement learning to shogi. We propose a move’s evaluation function, which is defined by the expectation of the values of all leaf nodes produced by the move in a search tree, that is weighted by the transition probabilities to the leaf nodes from the root node produced by the move. Boltzmann distribution function gives the probabilities of taking branches in a search tree instead of the minimax strategy. The learning rules of the parameters in the static evaluation function of the states can be calculated recursively. Since the number of leaf nodes for evaluation increases substantially, we also consider approximation methods to reduce the computation time.","subitem_description_type":"Other"}]},"item_18_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"121","bibliographic_titles":[{"bibliographic_title":"ゲームプログラミングワークショップ2012論文集"}],"bibliographicPageStart":"118","bibliographicIssueDates":{"bibliographicIssueDate":"2012-11-09","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"6","bibliographicVolumeNumber":"2012"}]},"relation_version_is_last":true,"weko_creator_id":"10"}}