@techreport{oai:ipsj.ixsq.nii.ac.jp:00226830,
 author = {福原, 淳司 and 滝本, 宗宏 and Junji, Fukuhara and Munehiro, Takimoto},
 issue = {4},
 month = {Jul},
 note = {画像処理装置（Graphics Processing Unit，以下，GPU）は，Single-Instruction Multiple-Thread 実行形式を採用しており，分岐命令があると両分岐先を実行する分岐発散を引き起こす場合がある．分岐発散が発生すると，両分岐先の実行コストがかかるので，GPU 向けプログラムの実行効率が低下する．近年，分岐発散が生じた際，一方の分岐先の命令を実行している間にストールした場合に，もう一方の分岐先の命令を割り込ませる Subwarp Interleaving
（以下，SI）という手法が提案された．SI は分岐発散が生じている分岐内のストールを隠蔽できるので，プログラムの実行効率を改善する．しかし，SI は命令の並び順によっては効果的にストールを隠蔽できない問題があった．本稿では，SI を利用して，より効果的にストールを隠蔽するとともにスレッドレベル並列性を高める GPU 向け命令スケジューリング手法を提案する．実験ではサンプルプログラムに対する効果と本手法の将来的な拡張性を示す．, Graphics Processing Units (GPUs) exploit the Single-Instruction Multiple-Thread (SIMT) execution model, which causes branch divergence when SIMT threads in a warp follow diﬀerent execution paths. Once the divergence occurs, the execution of diverged paths is serialized, decreasing the performance of GPU programs. Subwarp Interleaving (SI), which allows GPUs to interleave the execution of some subwarps when one subwarp is stalled, has been proposed. Although SI can hide load-to-use stalls in divergent paths, the eﬀectiveness is limited by the order of instructions in divergent branches. In this paper, we propose the novel instruction scheduling algorithm for GPUs to allow SI to hide load-to-use stalls more eﬃciently and improve performance. Our experimental results show that the eﬀect on sample programs and the future expandability of the proposed method.},
 title = {Subwarp Interleavingを利用したGPU向け命令スケジューリング手法},
 year = {2023}
}