@techreport{oai:ipsj.ixsq.nii.ac.jp:00102318,
 author = {丸山, 剛寛 and 田中, 宏明 and 水谷, 洋輔 and 神谷, 智晴 and 大野, 和彦 and Takanori, Maruyama and Hiroaki, Tanaka and Yousuke, Mizutani and Tomoharu, Kamiya and Kazuhiko, Ohno},
 issue = {44},
 month = {Jul},
 note = {近年，GPU 上で汎用計算を実行する GPGPU が注目されている．現在主流な開発環境である CUDA では，高級言語で記述することが可能だが，GPU の複雑なメモリ構造を意識してプログラミングする必要がある．これに対し，我々は単純なメモリ構造モデルでプログラミング可能な MESI-CUDA を提案している．しかし，現在の MESI-CUDA は単一の GPU 環境を想定しており GPU1 基を使用するコードしか生成できないため，複数の GPU を搭載していても性能を発揮できない．一方，CUDA では複数の GPU を利用できるが，個々の GPU をユーザーが直接制御する必要がある．そこで，我々は複数の GPU へ自動的に処理を振り分けるスレッドマッピング機構を提案する．処理を振り分ける方法として静的／動的な方法が考えられるが，前者は静的解析のみで最適な振り分けをすることが困難である．一方，後者の場合，負荷の均等化のためには振り分けの粒度を細かくすることが望ましいが，その結果として並列度が下がり GPU の利用効率が低下する可能性がある．そこで本稿では，静的解析の結果を用いてある程度の処理を静的に振り分けた後，動的振り分けに切り替えるハイブリッド型の手法を提案する．, The performance of Graphics Processing Units (GPU) is improving rapidly. Thus, General Purpose computation on Graphics Processing Units (GPGPU) is expected as an important method for high-performance computing. CUDA, one of the major developing environment, enables GPU programming using C/C++, but the user must handle the complicated memory architecture. Therefore, we are developing a new programming framework named MESI-CUDA, which provides a simple memory architecture model automatically generating low-level CUDA code. However, the current implementation of MESI-CUDA only supports single GPU and cannot utilize multiple GPUs. Although CUDA supports multi-GPU, the user must directly control each device. Thus, we propose a scheme which automatically maps GPU threads to multiple GPUs. Although static mapping causes small runtime overhead, the result may not be optimal. On the other hand, the efficiency of dynamic mapping is largely influenced by the granularity of thread grouping; large grouping disturbs optimal load-balancing, while small grouping cannot utilize large number of GPU cores. Therefore, we propose a hybrid scheme combining static and dynamic mapping methods.},
 title = {GPGPUフレームワークMESI-CUDAにおけるマルチGPUへのスレッドマッピング機構},
 year = {2014}
}