@techreport{oai:ipsj.ixsq.nii.ac.jp:00209715, author = {Zefeng, Xu and Koichi, Moriyama and Tohgoroh, Matsui and Atsuko, Mutoh and Nobuhiro, Inuzuka and Zefeng, Xu and Koichi, Moriyama and Tohgoroh, Matsui and Atsuko, Mutoh and Nobuhiro, Inuzuka}, issue = {15}, month = {Feb}, note = {Exploration in sparse reward environments pose significant challenges for many reinforcement learning algorithms. Rather than solely relying on extrinsic rewards provided by environments, many state-of-the-art methods generate intrinsic rewards to encourage the agent explore the environments. However, we found that existing models fall short in some environments, where the agent must visit a same state more than once. Thus, we improve an existing model to propose a novel type of intrinsic exploration bonus which will reward the agent when a new sequence is discovered. The intrinsic reward is the error of a recurrent neural network predicting features of the sequences given by a fixed randomly initialized recurrent neural network. Our approach performs well in some Atari games where conditions must be fulfilled to develop stories., Exploration in sparse reward environments pose significant challenges for many reinforcement learning algorithms. Rather than solely relying on extrinsic rewards provided by environments, many state-of-the-art methods generate intrinsic rewards to encourage the agent explore the environments. However, we found that existing models fall short in some environments, where the agent must visit a same state more than once. Thus, we improve an existing model to propose a novel type of intrinsic exploration bonus which will reward the agent when a new sequence is discovered. The intrinsic reward is the error of a recurrent neural network predicting features of the sequences given by a fixed randomly initialized recurrent neural network. Our approach performs well in some Atari games where conditions must be fulfilled to develop stories.}, title = {Generating Intrinsic Rewards by Random Recurrent Network Distillation}, year = {2021} }