@inproceedings{oai:ipsj.ixsq.nii.ac.jp:00175342,
 author = {水上, 直紀 and 鶴岡, 慶雅 and Mizukami, Naoki and Tsuruoka, Yoshimasa},
 book = {ゲームプログラミングワークショップ2016論文集},
 month = {Oct},
 note = {本論文では強化学習を用いた麻雀プレイヤを構築する方法について述べる．初めに手牌から和了点数を予測するモデルを生成した牌譜から学習する．このモデルの結果と期待最終順位を用いて効率的な和了を行う手をプログラムは選択する．このモデルの結果と期待最終順位を用いて効率的な和了を行う手をプログラムは選択する．得られたプログラムは高い点数を和了する技術を獲得したものの，自己対戦の結果は元のプログラムに勝ち越すことはできなかった．, This paper describes a method for building a mahjong program using reinforcement learning. We train models that predict winning scores of a player's hands using game records that are generated by our program. Our program decides moves based on the outputs of the prediction models and the expected final ranks. The program has obtained a skill for winning with high scores, but the evaluation results of self-play is lower than those of the original program.},
 pages = {81--88},
 publisher = {情報処理学会},
 title = {強化学習を用いた効率的な和了を行う麻雀プレイヤ},
 volume = {2016},
 year = {2016}
}