{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00199590","sets":["581:9633:9642"]},"path":["9642"],"owner":"44499","recid":"199590","title":["環境変化をともなう経路選択問題における強化学習"],"pubdate":{"attribute_name":"公開日","attribute_value":"2019-09-15"},"_buckets":{"deposit":"c7125f34-7ea1-4127-8828-67972372b888"},"_deposit":{"id":"199590","pid":{"type":"depid","value":"199590","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"環境変化をともなう経路選択問題における強化学習","author_link":["483262","483260","483261","483263","483259","483264"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"環境変化をともなう経路選択問題における強化学習"},{"subitem_title":"Reinforcement Learning in Routing Problems with Environment Shifts","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[一般論文] 強化学習，環境変化，経路計画問題，重み付きサンプリング","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2019-09-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"株式会社豊田中央研究所"},{"subitem_text_value":"株式会社豊田中央研究所"},{"subitem_text_value":"株式会社豊田中央研究所"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Toyota Central R&D Labs., Inc.","subitem_text_language":"en"},{"subitem_text_value":"Toyota Central R&D Labs., Inc.","subitem_text_language":"en"},{"subitem_text_value":"Toyota Central R&D Labs., Inc.","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/199590/files/IPSJ-JNL6009031.pdf","label":"IPSJ-JNL6009031.pdf"},"date":[{"dateType":"Available","dateValue":"2021-09-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL6009031.pdf","filesize":[{"value":"1.4 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"2d5ddb1b-3144-49a8-a356-0c07c3e0a06d","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2019 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"大滝, 啓介"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"西, 智樹"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"吉村, 貴克"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Keisuke, Otaki","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Tomoki, Nishi","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Takayoshi, Yoshimura","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"強化学習は，ある環境においてエージェントが取るべき行動を経験から学習する手法であり，行動は特定の環境から得られる経験を用いて学習される．そのため環境自体が変化した場合には，新しい環境に対して一から，または以前学習した結果を再利用し，行動の修正が必要な状態に対して学習をやり直す必要がある．我々は経路選択問題において，目的地までの距離の変化に基づいて，再学習が必要となる状態を絞り込むことで学習を高速化する手法を提案する．本稿では格子世界を用いた実験を行い，環境変化の構造的情報を利用することで，再学習が効率的に進むことを確認した．","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Reinforcement learning involves learning a policy. The learned policy must be adjusted when the environment shifts from a source domain to another domain. Typical approaches use learned parameters of the policy as initial parameters. We propose to use knowledge of the shifts additionally to adjust the policy. The knowledge is represented by weights on states representing the degree of changes in distances from the states to an absorbing goal. Our method uses these weights to sample states, wherein an agent updates the policy. Numerical experiments on Gridworlds indicate that the knowledge about the shifts is helpful for efficient learning, particularly at an early stage.","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"1579","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicPageStart":"1572","bibliographicIssueDates":{"bibliographicIssueDate":"2019-09-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"9","bibliographicVolumeNumber":"60"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"updated":"2025-01-19T21:40:14.611397+00:00","created":"2025-01-19T01:03:29.609554+00:00","links":{},"id":199590}