{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00069960","sets":["1164:2240:6021:6143"]},"path":["6143"],"owner":"10","recid":"69960","title":["Segmented Scan法のCUDA向け最適化実装"],"pubdate":{"attribute_name":"公開日","attribute_value":"2010-07-27"},"_buckets":{"deposit":"da17413d-e506-4296-87d9-c111334fe686"},"_deposit":{"id":"69960","pid":{"type":"depid","value":"69960","revision_id":0},"owners":[10],"status":"published","created_by":10},"item_title":"Segmented Scan法のCUDA向け最適化実装","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Segmented Scan法のCUDA向け最適化実装"},{"subitem_title":"Optimized Implementation of Segmented Scan Method for CUDA","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"GPU 最適化","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2010-07-27","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東京大学情報基盤センタースーパーコンピューティング研究部門"},{"subitem_text_value":"日立製作所中央研究所"},{"subitem_text_value":"東京大学情報基盤センタースーパーコンピューティング研究部門"},{"subitem_text_value":"東京大学情報基盤センタースーパーコンピューティング研究部門"},{"subitem_text_value":"愛媛大学大学院理工学研究科"},{"subitem_text_value":"東京大学情報基盤センタースーパーコンピューティング研究部門"},{"subitem_text_value":"日立超LSIシステムズ"},{"subitem_text_value":"東京大学情報基盤センタースーパーコンピューティング研究部門"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Supercomputing Research Devision, Information Technology Center, The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Central Research Laboratory Hitachi, Ltd.","subitem_text_language":"en"},{"subitem_text_value":"Supercomputing Research Devision, Information Technology Center, The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Supercomputing Research Devision, Information Technology Center, The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Science and Engineering, Ehime University","subitem_text_language":"en"},{"subitem_text_value":"Supercomputing Research Devision, Information Technology Center, The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Hitachi ULSI System Co., Ltd.","subitem_text_language":"en"},{"subitem_text_value":"Supercomputing Research Devision, Information Technology Center, The University of Tokyo","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/69960/files/IPSJ-HPC10126001.pdf"},"date":[{"dateType":"Available","dateValue":"2012-07-27"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-HPC10126001.pdf","filesize":[{"value":"357.6 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"14"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"7dc9459d-64e5-43ac-81cc-67e399b15076","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2010 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"大島, 聡史"},{"creatorName":"櫻井, 隆雄"},{"creatorName":"片桐, 孝洋"},{"creatorName":"中島, 研吾"},{"creatorName":"黒田, 久泰"},{"creatorName":"直野, 健"},{"creatorName":"猪貝, 光祥"},{"creatorName":"伊藤, 祥司"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Satoshi, Ohshima","creatorNameLang":"en"},{"creatorName":"Takao, Sakurai","creatorNameLang":"en"},{"creatorName":"Takahiro, Katagiri","creatorNameLang":"en"},{"creatorName":"Kengo, Nakajima","creatorNameLang":"en"},{"creatorName":"Hisayasu, Kuroda","creatorNameLang":"en"},{"creatorName":"Ken, Naono","creatorNameLang":"en"},{"creatorName":"Mitsuyoshi, Igai","creatorNameLang":"en"},{"creatorName":"Shoji, Itoh","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10463942","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本稿では Segmented Scan 法を用いた疎行列ベクトル積の CUDA 向け最適化実装について述べる.我々は実装の再利用性に着目した自動チューニングインターフェース OpenATLib の提案を行い,また OpenATLib の提供する機能の一つである疎行列ベクトル積においては Segmented Scan 方式を元にスカラ計算機向けに改良を行った Branchless Segmented Scan 方式を提案している.本稿ではこれらの方式を元にして CUDA 向けの新たな Segmented Scan 方式を考案し実装した.GPU 上で高速実行可能なようにアルゴリズムの改良や各種の最適化を行った結果,偏りの大きな行列に対して NVIDIA GeForceGTX285 上で最大で 3.26GFLOPS の性能を達成した.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"We discuss about optimized implementation of sparse matrix vector multiplication for CUDA using Segmented Scan method. We proposed Auto-tuning interface OpenATLib and we also proposed Branchless Segmented Scan method besed on Segmented Scan method for scalar computer as an important new feature of sparse matrix vector multiplication. In this paper, we proposed and implemented new Segmented Scan method for CUDA based on Segmented Scan method and Branchless Segmented Scan method. As a result of optimized implementation, we aimed 3.26GFLOPS on NVIDIA GeForceGTX285.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"7","bibliographic_titles":[{"bibliographic_title":"研究報告ハイパフォーマンスコンピューティング(HPC)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2010-07-27","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"1","bibliographicVolumeNumber":"2010-HPC-126"}]},"relation_version_is_last":true,"weko_creator_id":"10"},"id":69960,"updated":"2025-01-21T23:39:59.014201+00:00","links":{},"created":"2025-01-18T23:29:17.780506+00:00"}