@techreport{oai:ipsj.ixsq.nii.ac.jp:00069716,
 author = {但馬, 康宏 and Yasuhiro, Tajima},
 issue = {8},
 month = {Jun},
 note = {ゲームの評価関数を強化学習を用いて獲得する場合，一般的には終了局面における勝敗を報酬とし，途中局面の報酬を 0 とする手法が知られている．本研究では途中局面に対する報酬をその局面におけるランダムシミュレーションの勝率とし，終了局面における勝敗の報酬の大きさを変化させた場合の違いを検証する．さらにオセロゲーム Zebra において利用されている盤面パターンの評価重みを本手法により学習し，実験的評価とする．, Reinforcement learning for an evaluation function of games is applied with zero-reward for intermediate posistions and win/lose rewawrd for the terminal position, in general. In this paper, we propose some reward setting methods for intermediate positions and compare them each other. Then, we evaluate our methods by experiments on othello game Zebra's pattern check parameters.},
 title = {強化学習による評価関数の獲得における報酬設定について},
 year = {2010}
}