@article{oai:ipsj.ixsq.nii.ac.jp:00079533,
 author = {松井, 藤五郎 and 後藤卓 and 和泉, 潔 and 陳ユ and Tohgoroh, Matsui and Takashi, Goto and Kiyoshi, Izumi and Yu, Chen},
 issue = {12},
 journal = {情報処理学会論文誌},
 month = {Dec},
 note = {本論文では,強化学習において複利リターンを最大化する複利型強化学習の枠組みを示し,ファイナンス分野のタスクへの応用例を示す.複利型強化学習は,報酬の代わりにリターンがマルコフ性を満たすリターン型MDPを対象とする.複利型強化学習では,二重指数的割引と投資比率の概念を導入し,対数をとることによって従来の強化学習と同様の方法で割引複利リターンを最大化する.続いて,従来の強化学習のアルゴリズムであるQ学習とOnPSを複利型に拡張した複利型Q学習と複利型OnPSのアルゴリズムを示す.また,3本腕バンディット問題に対する実験結果と日本国債取引問題への応用例を示し,複利型強化学習の有効性を確認する., This paper describes an extended framework of reinforcement learning, called compound reinforcement learning, which maximizes the compound return and shows its application to financial tasks. Compound reinforcement learning is designed for return-based MDP in which an agent observes the return instead of the rewards. We introduce double exponential discounting and betting fraction into the framework and then we can write the logarithm of double-exponentially discounted compound return as the sum of a polynomially discounted logarithm of simple gross return. In this paper, we show two algorithms of compound reinforcement learning: compound Q-learning and compound OnPS. We also show the experimental results using 3-armed bandit and an application to a financial task: Japanese government bond trading.},
 pages = {3300--3308},
 title = {複利型強化学習の枠組みと応用},
 volume = {52},
 year = {2011}
}