{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00141751","sets":["6164:6165:6242:8250"]},"path":["8250"],"owner":"11","recid":"141751","title":["密結合並列演算加速機構TCAを用いたGPU間直接通信によるCollective通信の実装と性能評価"],"pubdate":{"attribute_name":"公開日","attribute_value":"2015-05-12"},"_buckets":{"deposit":"5e2795b6-b934-4026-9fa4-5b8eb0bab0d3"},"_deposit":{"id":"141751","pid":{"type":"depid","value":"141751","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"密結合並列演算加速機構TCAを用いたGPU間直接通信によるCollective通信の実装と性能評価","author_link":["208004","208009","208005","208008","208012","208006","208007","208011","208013","208010"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"密結合並列演算加速機構TCAを用いたGPU間直接通信によるCollective通信の実装と性能評価"},{"subitem_title":"Implementation and Performance Evaluation of Collective Communication with Proprietary Interconnect TCA for GPU Direct Communication","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"GPU","subitem_subject_scheme":"Other"}]},"item_type_id":"18","publish_date":"2015-05-12","item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_18_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"筑波大学計算科学研究センター"},{"subitem_text_value":"東京大学情報基盤センター"},{"subitem_text_value":"筑波大学計算科学研究センター/筑波大学大学院システム情報工学研究科/現在,理化学研究所計算科学研究機構"},{"subitem_text_value":"筑波大学大学院システム情報工学研究科/現在,富士通ソフトウェアテクノロジーズ"},{"subitem_text_value":"筑波大学計算科学研究センター/筑波大学大学院システム情報工学研究科"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/141751/files/IPSJ-HPCS2015039.pdf"},"date":[{"dateType":"Available","dateValue":"2017-05-12"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-HPCS2015039.pdf","filesize":[{"value":"1.6 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"330","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"330","billingrole":"14"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"1d012339-c4cc-4dd9-a6b0-8ce39c0897a1","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2015 by the Information Processing Society of Japan"}]},"item_18_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"松本, 和也"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"塙, 敏博"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"児玉, 祐悦"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"藤井, 久史"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"朴, 泰祐"}],"nameIdentifiers":[{}]}]},"item_18_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Kazuya, Matsumoto","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Toshihiro, Hanawa","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yuetsu, Kodama","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Hisafumi, Fujii","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Taisuke, Boku","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_5794","resourcetype":"conference paper"}]},"item_18_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"筑波大学計算科学研究センターでは,GPU クラスタにおけるノード間に跨る GPU 間通信のレイテンシ改善を目的とした密結合並列演算加速機構 TCA (Tightly Coupled Accelerators) を独自開発している.本稿では,Scatter,Reduce,Allgather,Allreduce の 4 つの Collective 通信の TCA による実装と,その性能を TCA 実証環境の GPU クラスタである HA-PACS/TCA において評価した結果を述べる.TCA による実装は通信レイテンシが問題となる小さめなサイズの Collective 通信において,MPI による Collective 通信と比べて高速にその通信処理を行うことが可能であることを示す.また,実装した Collective 通信を利用した Conjugate Gradient 法 (CG 法) の実装およびその性能について述べる.本研究で用いる CG 法の並列アルゴリズムは,Allgather と Allreduce をその通信部分に用いるものである.TCA による Collective 通信を用いた CG 法実装は,疎行列のサイズ (行数) が数千から数万の場合では MPI の Collective 通信を用いた実装よりも高い性能を達成できることを示す.","subitem_description_type":"Other"}]},"item_18_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"We have been developing a proprietary interconnect technology called Tightly Coupled Accelerators (TCA) architecture to improve communication latency and bandwidth between compute nodes on a GPU cluster. This paper presents the implementation and performance evaluation results of four different collective communication operations (scatter, reduce, allgather, allreduce). The performance measurements are conducted on HA-PACS/TCA, which is a proof-of-concept GPU cluster based on the TCA architecture. The implementation using TCA is faster than an MPI collective communication implementation in case collective communications for small sizes where the communication latency decides most of its performance. This paper also describes an implementation of Conjugate Gradient (CG) method utilizing the implemented collective communication and the performance. We use the parallel algorithm of CG method that utilizes the allgather and allreduce in the data communication. The CG method implementation using TCA outperforms the implementation using MPI for sparse matrices whose matrix size is thousands to tens of thousands.","subitem_description_type":"Other"}]},"item_18_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"128","bibliographic_titles":[{"bibliographic_title":"ハイパフォーマンスコンピューティングと計算科学シンポジウム論文集"}],"bibliographicPageStart":"120","bibliographicIssueDates":{"bibliographicIssueDate":"2015-05-12","bibliographicIssueDateType":"Issued"},"bibliographicVolumeNumber":"2015"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"id":141751,"updated":"2025-01-20T19:15:15.995190+00:00","links":{},"created":"2025-01-19T00:19:17.647942+00:00"}