@techreport{oai:ipsj.ixsq.nii.ac.jp:00149701, author = {稲元, 勉 and 樋上, 喜信 and 小林, 真也 and Tsutomu, Inamoto and Yoshinobu, Higami and Shin-ya, Kobayashi}, issue = {29}, month = {Feb}, note = {本稿では,無限期間マルコフ決定過程のための動的計画法を GPU 上で実行する際の計算時間を削減するため,動的計画法が収束したか否かを判定する処理を効率化する技法を提案し,小規模な問題に対する評価結果を示すことを目的とする.提案技法は,GPGPU の枠組みとして NVIDIA 社の CUDA を用いることを前提とし,状態価値の変化量の最大値が閾値以下であるか否かとして収束判定を行う際に,GPU 側で部分状態空間に対応するスレッドブロック単位で変化量の最大値を算出しておき,この値の全部分状態空間にわたる最大値をホスト側で算出するという素朴なものである.計算結果では,animat 問題,mountain-car 問題を対象とし,提案技法を用いた場合の計算時間を示す., In this paper, we propose a technique to decrease the computational time of the value iteration program which is implemented on a GPU to solve infinite-stage Markov decision process. In order to judge whether the value iteration has been converged or not, the difference between state values before and after a sweep, which represents the computation to update all state values, is calculated for each state, then the the maximum of such differences over the whole state space is calculated. Here, it is obvious that taking such maximum over whole state space is equal to taking the maximum over values each of which is the maximum difference of state values over a sub state space. The proposed technique premises to use the CUDA to implement the value iteration for NVIDIA's GPU, and considers a thread block in the CUDA as a sub state space, then conducts computations to take the maximum difference of a sub state space in a corresponding thread block on a GPU. The effectiveness of the proposed technique is examined in terms of computational times through its applications to the animat problem and the mountain-car problem.}, title = {無限期間動的計画法のGPU実装における収束判定の処理時間削減に向けた検討}, year = {2016} }