{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00213450","sets":["6164:6165:6210:10734"]},"path":["10734"],"owner":"44499","recid":"213450","title":["外部記憶を用いた部分観測環境における教師なし強化学習"],"pubdate":{"attribute_name":"公開日","attribute_value":"2021-11-06"},"_buckets":{"deposit":"3b356829-cd49-405c-afd8-a90ea7d8445d"},"_deposit":{"id":"213450","pid":{"type":"depid","value":"213450","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"外部記憶を用いた部分観測環境における教師なし強化学習","author_link":["546197","546198","546196","546195"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"外部記憶を用いた部分観測環境における教師なし強化学習"},{"subitem_title":"Unsupervised Reinforcement Learning for Partially Observable Environments Using External Memories","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"深層強化学習","subitem_subject_scheme":"Other"},{"subitem_subject":"部分観測環境","subitem_subject_scheme":"Other"},{"subitem_subject":"教師なし強化学習","subitem_subject_scheme":"Other"},{"subitem_subject":"外部記憶","subitem_subject_scheme":"Other"}]},"item_type_id":"18","publish_date":"2021-11-06","item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_18_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東京大学工学部電子情報工学科"},{"subitem_text_value":"東京大学大学院情報理工学系研究科電子情報学専攻"}]},"item_18_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Department of Information and Communication Engineer-ing, The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information Science and Technology,The University of Tokyo","subitem_text_language":"en"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/213450/files/IPSJ-GPWS2021029.pdf","label":"IPSJ-GPWS2021029.pdf"},"date":[{"dateType":"Available","dateValue":"2021-11-06"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-GPWS2021029.pdf","filesize":[{"value":"2.5 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"5"},{"tax":["include_tax"],"price":"0","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"18"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"4c66136b-0555-40d4-839f-b67875654ef7","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2021 by the Information Processing Society of Japan"}]},"item_18_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"中本, 光彦"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"鶴岡, 慶雅"}],"nameIdentifiers":[{}]}]},"item_18_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Mitsuhiko, Nakamoto","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yoshimasa, Tsuruoka","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_5794","resourcetype":"conference paper"}]},"item_18_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"部分観測環境における深層強化学習の適用は困難である.また,複雑なタスクにおいては適切な報酬関数を設計することも難しいとされている.本研究では,これらの課題を解決するために,部分観測環境における教師なし強化学習のアルゴリズムを提案する.部分観測性に対処するためにエージェントに外部の記憶機構を与え,外部報酬を用いる代わりに相互情報量に基づいた内発的報酬を提案する.提案する内発的報酬は,エージェントに観測情報が非常に限られている状態空間を優先的に探索しながら,有効な記憶を学習させることを可能にする.実験では,HalfCheetah エージェントに限られた観測だけで,外部報酬を一切使用せずに,前後に走ることを習得させることができた.","subitem_description_type":"Other"}]},"item_18_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Deep reinforcement learning (RL) is difficult when the environment is partially observable and has no reward function. In this paper, we propose an unsupervised RL algorithm to tackle these problems. We provide the agent with external memory to deal with partial observability, and propose a novel mutual information-based intrinsic reward for unsupervised exploration. The proposed intrinsic reward encourages the agent to explore the state space with strict partial observability, and at the same time, obtain an informative memory. In the experiments, our algorithm enables a HalfCheetah agent to run forward and backward with limited observations and without receiving any external rewards.","subitem_description_type":"Other"}]},"item_18_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"165","bibliographic_titles":[{"bibliographic_title":"ゲームプログラミングワークショップ2021論文集"}],"bibliographicPageStart":"160","bibliographicIssueDates":{"bibliographicIssueDate":"2021-11-06","bibliographicIssueDateType":"Issued"},"bibliographicVolumeNumber":"2021"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"id":213450,"updated":"2025-01-19T17:09:27.748350+00:00","links":{},"created":"2025-01-19T01:14:19.659115+00:00"}