{"created":"2025-01-18T23:02:25.661587+00:00","updated":"2025-01-22T15:36:04.746554+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00033638","sets":["1164:2735:2796:2797"]},"path":["2797"],"owner":"1","recid":"33638","title":["特徴構成法を用いたQ学習の効率改善"],"pubdate":{"attribute_name":"公開日","attribute_value":"1998-11-26"},"_buckets":{"deposit":"1bd1f2bd-5e18-4649-99d7-2e74e4ec7109"},"_deposit":{"id":"33638","pid":{"type":"depid","value":"33638","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"特徴構成法を用いたQ学習の効率改善","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"特徴構成法を用いたQ学習の効率改善"},{"subitem_title":"Improving the Effectiveness of Q - Learning by Using Feature Construction","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"1998-11-26","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"神戸大学工学部情報知能工学科"},{"subitem_text_value":"神戸大学工学部情報知能工学科"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Department of Computer and Systems Engineering, Faculty of Engineering, Kobe University","subitem_text_language":"en"},{"subitem_text_value":"Department of Computer and Systems Engineering, Faculty of Engineering, Kobe University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/33638/files/IPSJ-MPS98022010.pdf"},"date":[{"dateType":"Available","dateValue":"2000-11-26"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-MPS98022010.pdf","filesize":[{"value":"690.9 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"17"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"e65a8258-c228-4e4a-bf85-ccb3f6426273","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 1998 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"宮本, 行庸"},{"creatorName":"上原, 邦昭"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yukinobu, Miyamoto","creatorNameLang":"en"},{"creatorName":"Kuniaki, Uehara","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10505667","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本稿では，特徴構成法を用いた強化学習システムFCQLについて述べる．従来の強化学習では，対象とする環境の各状態を識別する適切な属性が，学習の前段階であらかじめ準備されていることを仮定している．現実には，学習システムが状態を識別するのに充分な入力系を持っているとは限らず，領域に固有の特徴を適宜構成していく機能が必要とされる．本稿では，構成的帰納学習に用いられる特徴構成法を，強化学習の一手法であるQ学習と統合し，有限離散時間環境における適切な内部表現と評価関数を学習する手法を提案する．結果として，単位時間における期待報酬値を最大化するのみでなく，収束までに費やす状態数の大幅な削減が実現できた．","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"In this paper, we describe a new reinforcement learning system called FCQL (Feature Constructive Q-Learning). Usually, reinforcement learning methods assume that they an identify each state before learning. In a real-world domain, the learner only has limited sensors, so is required the ability to construct new features. This paper describes an approach integrating feature construction with Q-learning to learn efficient internal state representation and a decision policy simultaneously in a finite, deterministic environment. The result shows that FCQL can not only maximize the long-term discounted reward per unit time. but also reduce the number of status to converge.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"62","bibliographic_titles":[{"bibliographic_title":"情報処理学会研究報告数理モデル化と問題解決（MPS）"}],"bibliographicPageStart":"57","bibliographicIssueDates":{"bibliographicIssueDate":"1998-11-26","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"105(1998-MPS-022)","bibliographicVolumeNumber":"1998"}]},"relation_version_is_last":true,"weko_creator_id":"1"},"id":33638,"links":{}}