{"created":"2025-01-19T01:11:52.734443+00:00","updated":"2025-01-19T18:03:40.653040+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00210673","sets":["581:10433:10437"]},"path":["10437"],"owner":"44499","recid":"210673","title":["Developing Value Networks for Game 2048 with Reinforcement Learning"],"pubdate":{"attribute_name":"公開日","attribute_value":"2021-04-15"},"_buckets":{"deposit":"6777d5d9-9b70-444e-a412-a4fbc8267eab"},"_deposit":{"id":"210673","pid":{"type":"depid","value":"210673","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"Developing Value Networks for Game 2048 with Reinforcement Learning","author_link":["533965","533966"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Developing Value Networks for Game 2048 with Reinforcement Learning"},{"subitem_title":"Developing Value Networks for Game 2048 with Reinforcement Learning","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[一般論文] game 2048, neural network, reinforcement learning, stochastic game, single-player game","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2021-04-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"School of Information, Kochi University of Technology"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"School of Information, Kochi University of Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/210673/files/IPSJ-JNL6204021.pdf","label":"IPSJ-JNL6204021.pdf"},"date":[{"dateType":"Available","dateValue":"2023-04-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL6204021.pdf","filesize":[{"value":"978.6 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"5"},{"tax":["include_tax"],"price":"0","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"2474ee74-906e-4658-9b1f-af751c1b8a05","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2021 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Kiminori, Matsuzaki"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Kiminori, Matsuzaki","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"The game 2048 is a stochastic single-player game and several computer players have been developed in not only research work but also student projects. Among them, the most successful approach is based on N-tuple networks trained by reinforcement learning methods. Though there have been several works on computer players with deep neural networks, their performance were not as good in most cases. In our previous work, we designed policy networks and applied supervised learning, which resulted in an average score of 215,802. In this study, we tackle the problem with value networks and reinforcement learning methods, since value networks are important to combine with game-tree search methods. We investigate the training methods in several aspects, including batches of training, use of symmetry, network structures, and use of game-specific tricks. We then conduct a training for 240 hours with the best configuration. With the best value network obtained, we achieved an average score of 228,100 with the greedy (1-ply search) play, and furthermore an average score of 406,927 by combining it with the 3-ply expectimax search.\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.29(2021) (online)\nDOI　http://dx.doi.org/10.2197/ipsjjip.29.336\n------------------------------","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"The game 2048 is a stochastic single-player game and several computer players have been developed in not only research work but also student projects. Among them, the most successful approach is based on N-tuple networks trained by reinforcement learning methods. Though there have been several works on computer players with deep neural networks, their performance were not as good in most cases. In our previous work, we designed policy networks and applied supervised learning, which resulted in an average score of 215,802. In this study, we tackle the problem with value networks and reinforcement learning methods, since value networks are important to combine with game-tree search methods. We investigate the training methods in several aspects, including batches of training, use of symmetry, network structures, and use of game-specific tricks. We then conduct a training for 240 hours with the best configuration. With the best value network obtained, we achieved an average score of 228,100 with the greedy (1-ply search) play, and furthermore an average score of 406,927 by combining it with the 3-ply expectimax search.\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.29(2021) (online)\nDOI　http://dx.doi.org/10.2197/ipsjjip.29.336\n------------------------------","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicIssueDates":{"bibliographicIssueDate":"2021-04-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"4","bibliographicVolumeNumber":"62"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"id":210673,"links":{}}