{"updated":"2025-01-21T14:47:27.808714+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00092727","sets":["1164:5305:7084:7204"]},"path":["7204"],"owner":"11","recid":"92727","title":["方策勾配法による局面評価関数とシミュレーション方策の学習"],"pubdate":{"attribute_name":"公開日","attribute_value":"2013-06-21"},"_buckets":{"deposit":"fe02cef2-3d01-48bb-a807-3e28d940b327"},"_deposit":{"id":"92727","pid":{"type":"depid","value":"92727","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"方策勾配法による局面評価関数とシミュレーション方策の学習","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"方策勾配法による局面評価関数とシミュレーション方策の学習"},{"subitem_title":"Learning Positional Evaluation Functions and Simulation Policies by Policy Gradient Algorithm","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"学習","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2013-06-21","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"芝浦工業大学工学部情報工学科"},{"subitem_text_value":";;"},{"subitem_text_value":"株式会社コスモ・ウェブ"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Shibaura Institute of Technology","subitem_text_language":"en"},{"subitem_text_value":";;","subitem_text_language":"en"},{"subitem_text_value":"Cosmoweb Co., Ltd.","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/92727/files/IPSJ-GI13030006.pdf","label":"IPSJ-GI13030006"},"date":[{"dateType":"Available","dateValue":"2015-06-21"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-GI13030006.pdf","filesize":[{"value":"876.7 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"18"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"5ae49b9e-a009-4282-b981-127c35bbef90","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2013 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"五十嵐, 治一"},{"creatorName":"森岡, 祐一"},{"creatorName":"山本, 一将"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Harukazu, Igarashi","creatorNameLang":"en"},{"creatorName":"Yuichi, Morioka","creatorNameLang":"en"},{"creatorName":"Kazumasa, Yamamoto","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11362144","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本論文では強化学習の一手法である方策勾配法をコンピュータ将棋に適用する方法を考察した.方策勾配法は,報酬や方策にマルコフ性の制限なく自由に設計することができるという大きなメリットがある.本論文では,最初に全 leaf 局面の局面評価値をその局面への遷移確率値で重み付けた期待値を用いた指し手評価方式を提案する.これをベースに,探索木の各ノードにおける指し手の選択法として Boltzmann 分布に基づくソフトマックス戦略を採用した場合の局面評価関数に含まれるパラメータの学習則を導出した.しかし,探索や学習時の計算量が膨大となるため,3 つの近似計算法を考案した.次に,探索時にシミュレーション方策を用いてモンテカルロ探索を行う場合や,探索の深さを制御する場合のために,局面評価関数とシミュレーション方策の両者を同時に学習する学習則を方策勾配法により導出した.さらに,この方策勾配の計算法を利用すると,局面ごとに正解手が既知の場合の教師付学習も可能であることを示し,実際に学習則を導出した.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"This paper applies policy gradient reinforcement learning to shogi, a traditional Japanese board game that resembles chess. First, we propose a move evaluation function, which is defined by the expectation of the values of all leaf nodes produced by the move in a search tree that is weighted by the transition probabilities to the leaf nodes from the root node produced by the move. Since policy gradient reinforcement learning does not require Markovian properties of reward functions and policies, system designers can create the rewards functions and policies more freely than when using other reinforcement learning methods that must be applied in Markov decision processes. The learning rules of the parameters in the positional evaluation function can be calculated recursively when the Boltzmann distribution function gives the probabilities of taking branches in a search tree. We also consider three approximation methods to reduce the computation time for tree searching and parameter learning. Second, we derived the learning rules for both positional evaluation functions and simulation policies for Monte-Carlo simulation search and controlling the search depth by the policy gradient algorithm. This approach can also be applied to supervised learning problems of a teacher's moves in a given position.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"8","bibliographic_titles":[{"bibliographic_title":"研究報告ゲーム情報学(GI)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2013-06-21","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"6","bibliographicVolumeNumber":"2013-GI-30"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"created":"2025-01-18T23:41:31.876975+00:00","id":92727,"links":{}}