@article{oai:ipsj.ixsq.nii.ac.jp:00158051,
 author = {田渕, 晶大 and 中尾, 昌広 and 村井, 均 and 朴, 泰祐 and 佐藤, 三久 and Akihiro, Tabuchi and Masahiro, Nakao and Hitoshi, Murai and Taisuke, Boku and Mitsuhisa, Sato},
 issue = {1},
 journal = {情報処理学会論文誌コンピューティングシステム（ACS）},
 month = {Mar},
 note = {GPUやMICのような演算加速機構を持つクラスタが広く使われている．演算加速機構のプログラミングにOpenACCやOpenMP 4.0を用いてMPIと組み合わせることで，比較的簡易に演算加速機構を持つクラスタ向けのプログラムを記述できるようになったが，それでもなおMPIの記述が煩雑であるため生産性が低いという問題がある．そこで我々はPartitioned Global Address Space（PGAS）言語XcalableMPと演算加速機構プログラミングモデルOpenACCを統合したXcalableACC（XACC）を提案している．XACCでは逐次コードに指示文を追加することにより，演算加速機構を持つクラスタ向けのプログラミングが可能である．本稿では，XACCの通信指示文の一部をNVIDIA GPU向けに実装しベンチマークで性能評価を行った．MPI+OpenACCと比較してHimeno Benchmarkでは最大で97%，NAS Parallel Benchmarks（NPB）CGでは最大で96%の性能を達成した．また指示文による簡潔な記述によりMPI+OpenACCと比較してコード行数をHimeno Benchmarkでは51%，NPB CGでは79%に抑えられたことから，XACCは高い性能と生産性があるといえる．, Clusters equipped with accelerators such as GPU and MIC are widely used. For these clusters, programmers can develop their applications relatively easily by combining MPI with OpenACC or OpenMP 4.0, but lower productivity due to complex MPI programming is still a problem. We have been proposing XcalableACC (XACC), which is an integration of a Partitioned Global Address Space (PGAS) language XcalableMP (XMP) and OpenACC. XACC enables programmers to develop applications for accelerator clusters just by adding directives to a serial version of the code. In this paper, we show the implementation of the XACC communication directives for NVIDIA GPU and evaluated their performance using two benchmarks. The performance of the XACC version against MPI+OpenACC version is up to 97% for Himeno Benchmark and up to 96% for NAS Parallel Benchmarks (NPB) CG. The code size of XACC version against MPI+OpenACC version is 51% for Himeno Benchmark and 79% for NPB CG. Therefore, XACC features fully high performance and productivity.},
 pages = {17--29},
 title = {演算加速機構を持つクラスタ向けPGAS言語XcalableACCの評価},
 volume = {9},
 year = {2016}
}