{"updated":"2025-01-20T03:30:25.286342+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00183837","sets":["6164:6165:6210:9269"]},"path":["9269"],"owner":"11","recid":"183837","title":["花札のこいこいにおける方策勾配法とNeural Fitted Q Iterationの適用"],"pubdate":{"attribute_name":"公開日","attribute_value":"2017-11-03"},"_buckets":{"deposit":"05cd39b9-c611-429f-961a-57da13c0f32d"},"_deposit":{"id":"183837","pid":{"type":"depid","value":"183837","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"花札のこいこいにおける方策勾配法とNeural Fitted Q Iterationの適用","author_link":["404573","404571","404572","404574"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"花札のこいこいにおける方策勾配法とNeural Fitted Q Iterationの適用"},{"subitem_title":"Applying Policy Gradient method and Neural Fitted Q Iteration for Hanafuda Koi-Koi game player","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"強いAI","subitem_subject_scheme":"Other"},{"subitem_subject":"不完全情報ゲーム","subitem_subject_scheme":"Other"},{"subitem_subject":"花札","subitem_subject_scheme":"Other"},{"subitem_subject":"強化学習","subitem_subject_scheme":"Other"},{"subitem_subject":"方策勾配法","subitem_subject_scheme":"Other"},{"subitem_subject":"Deep Q network","subitem_subject_scheme":"Other"}]},"item_type_id":"18","publish_date":"2017-11-03","item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_18_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"北陸先端科学技術大学院大学"},{"subitem_text_value":"北陸先端科学技術大学院大学"}]},"item_18_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Japan Advanced Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Japan Advanced Institute of Science and Technology","subitem_text_language":"en"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/183837/files/IPSJ-GPWS2017010.pdf","label":"IPSJ-GPWS2017010.pdf"},"date":[{"dateType":"Available","dateValue":"2017-11-03"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-GPWS2017010.pdf","filesize":[{"value":"4.7 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"5"},{"tax":["include_tax"],"price":"0","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"18"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"cf6def5d-8b90-4e2c-8a5e-d8e98262f900","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2017 by the Information Processing Society of Japan"}]},"item_18_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"佐藤, 直之"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"池田, 心"}],"nameIdentifiers":[{}]}]},"item_18_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Naoyuki, Sato","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Kokolo, Ikeda","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_5794","resourcetype":"conference paper"}]},"item_18_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"花札の「こいこい」ゲームは交互2人零和不完全情報ゲームの一種で,様々な媒体で多くの人に遊ばれているが研究例が少なく,人間の上級者に匹敵する人工プレイヤが開発されたという話も聞かない.そのため我々は強化学習の方策勾配法とNeural Fitted Q Iterationを用いて強い「こいこい」プレイヤの実装を試みた.それぞれ盤面の低級な特徴量268個を入力に用いた人工ニューラルネットワークを状態行動価値の推定に用い,簡単なルールベース人工プレイヤとの反復対戦を通じて適切なパラメータの学習を行った.その結果それぞれ対戦相手から搾取した平均スコアは-0.3点と0.5点となった.","subitem_description_type":"Other"}]},"item_18_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Koi-koi game, which is played using Hanafuda playing cards, is a Japanese traditional card game classified as two players turn based imperfect information zero sum game. There are few research article focusing on this game even though this game is popular in Japan. Therefore, we tried to make strong Koi-koi game player by applying two types of reinforcement learning methods. We applied policy gradient method and neural fitted Q iteration. Each player played games against an artificial player which we constructed making its decision in a simple rule based manner. Over 1,000 times game, policy gradient player gained -0.3 score per game and neural fitted Q iteration player gained 0.5 scores in average.","subitem_description_type":"Other"}]},"item_18_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"71","bibliographic_titles":[{"bibliographic_title":"ゲームプログラミングワークショップ2017論文集"}],"bibliographicPageStart":"64","bibliographicIssueDates":{"bibliographicIssueDate":"2017-11-03","bibliographicIssueDateType":"Issued"},"bibliographicVolumeNumber":"2017"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"created":"2025-01-19T00:51:19.768470+00:00","id":183837,"links":{}}