@inproceedings{oai:ipsj.ixsq.nii.ac.jp:00163694,
 author = {佐藤, 賢太 and 藤田, 典久 and 塙, 敏博 and 松本, 和也 and 朴, 泰祐 and Khaled, Ibrahim and Kenta, Sato and Norihisa, Fujita and Toshihiro, Hanawa and Kazuya, Matsumoto and Taisuke, Boku and Khaled, Ibrahim},
 book = {ハイパフォーマンスコンピューティングと計算科学シンポジウム論文集},
 month = {May},
 note = {近年，GPU のような演算加速装置を用いたクラスタが HPC 分野で多く用いられるようになってきている．筑波大学計算科学研究センターでは，ノードを跨ぐ演算加速装置間での直接通信を実現するために，密結合並列演算加速機構 TCA（Tightly Coupled Accelerators） を提唱している．この TCA の実装として PEACH2（PCI Express Adaptive Communication Hub version 2） が開発されており，ノードを跨ぐ GPU 間での直接通信を行うことができる．しかしながら，TCA/PEACH2 を利用するためには独自の API を用いる必要があり，プログラミングコストが高く，既存のアプリケーションの移植も容易ではないという問題がある．本研究では，PGAS 言語を対象とした通信ライブラリである GASNet に注目し，これを GPU を対象とした PEACH2 に実装する．これによって，GASNet を介して各種のソフトウェアとの互換性が生じ，TCA/PEACH2 が広く利用できるようになると考えられる．既存の GASNet では GPU メモリを対象とした通信しか想定されておらず，GPU を対象とした拡張は現在開発段階にあり，本論文では現在進行中の GASNet の GPU 対応の拡張についても触れる．TCA/PEACH2 による GASNet のプロトタイプ実装において，ノードを跨ぐ GPU 間の通信性能は TCA/PEACH2 を直接使用した場合の性能と比較して，最小レイテンシの増大は 15%程度に抑えられ，ソフトウェア支援によって最大バンド幅は 1.2 倍の性能向上を達成した．, Recently, PC clusters equipped with GPU as accelerators are widely spread and operated. We have been proposing Tightly Coupled Accelerators (TCA) architecture to realize inter-node direct communication among GPUs, and we developed PCI Express Adaptive Communication Hub version 2 (PEACH2) as a prototype of TCA implementation. However, currently non-standard unique API is required to use TCA/PEACH2, so that the programming cost is expensive and porting of existing applications is not easy. On the other hand, GASNet library developed by Lawrence Berkeley National Laboratory provides low-level communication layer for Partitioned Global Address Space (PGAS) languages such as Unified Parallel C (UPC), Co-Array-Fortran and XcalableMP (XMP), and so on. GASNet assumes only CPU memory as communication target, and extension for GPU-aware GASNet is work in progress now. Beside of GPU-aware GASNet development on commodity network such as InfiniBand, we implement it on TCA/PEACH2 to provide general programming and system software porting on this hardware in this paper. We also mention currently planned features of GPU-aware GASNet. In the case of inter-node GPU communication using GASNet prototype implementation by TCA/PEACH2, the minimum latency is increased only 15 % from the case with native API, and the maximum bandwidth is increased by 1.2 times of native API thanks to software support.},
 pages = {68--76},
 publisher = {情報処理学会},
 title = {密結合並列演算加速機構TCAによるGPU対応GASNetの実装と評価},
 volume = {2016},
 year = {2016}
}