{"created":"2025-01-18T23:52:13.646407+00:00","updated":"2025-01-21T08:05:42.869074+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00109849","sets":["6504:6739:7816"]},"path":["7816"],"owner":"6748","recid":"109849","title":["複数の報酬とゲート機構を用いたモジュール型強化学習アルゴリズム"],"pubdate":{"attribute_name":"公開日","attribute_value":"2012-03-06"},"_buckets":{"deposit":"fdf16dcf-1020-4c49-bf3f-45adde85a1a8"},"_deposit":{"id":"109849","pid":{"type":"depid","value":"109849","revision_id":0},"owners":[6748],"status":"published","created_by":6748},"item_title":"複数の報酬とゲート機構を用いたモジュール型強化学習アルゴリズム","author_link":["25001","25000","25002"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"複数の報酬とゲート機構を用いたモジュール型強化学習アルゴリズム"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"人工知能と認知科学","subitem_subject_scheme":"Other"}]},"item_type_id":"22","publish_date":"2012-03-06","item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_22_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"早大"},{"subitem_text_value":"早大"},{"subitem_text_value":"早大"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/109849/files/IPSJ-Z74-5R-2.pdf"},"date":[{"dateType":"Available","dateValue":"2014-12-18"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-Z74-5R-2.pdf","filesize":[{"value":"550.5 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"39edc50b-2732-4908-a7f2-9e907fd226f1","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2012 by the Information Processing Society of Japan"}]},"item_22_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"吉田裕昭"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"中村真吾"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"橋本周司"}],"nameIdentifiers":[{}]}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_5794","resourcetype":"conference paper"}]},"item_22_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00349328","subitem_source_identifier_type":"NCID"}]},"item_22_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"強化学習を用いて入力数が多く複雑なシステムの最適な制御器を獲得しようとすると、状態空間が指数関数的に拡大し、膨大な学習時間が必要となってしまう。この問題の解決策として、複数の単純な制御器を用意し、系の制御方法を学習するモジュール型強化学習が提案されている。しかし、いずれかの制御器を選択するだけの従来手法では、制御モジュールの組み合わせが生じるような状況に対応することができない。そこで、本研究では複数の制御器とゲート機構を用意し、制御機構ごとに報酬を与えることで、状態空間の爆発を抑えつつ複雑な系の制御器を獲得する強化学習アルゴリズムを提案する。実験では、テレビゲームのキャラクタ操作制御に提案手法を適用し、その有用性を確認した。","subitem_description_type":"Other"}]},"item_22_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"294","bibliographic_titles":[{"bibliographic_title":"第74回全国大会講演論文集"}],"bibliographicPageStart":"293","bibliographicIssueDates":{"bibliographicIssueDate":"2012-03-06","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"1","bibliographicVolumeNumber":"2012"}]},"relation_version_is_last":true,"weko_creator_id":"6748"},"id":109849,"links":{}}