{"created":"2025-01-19T01:31:16.893203+00:00","updated":"2025-01-19T10:52:24.227892+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00231113","sets":["1164:2240:11176:11408"]},"path":["11408"],"owner":"44499","recid":"231113","title":["An Efficient Sparse Matrix Storage Format for Sparse Matrix-Vector Multiplication and Sparse Matrix-Transpose-Vector Multiplication on GPUs"],"pubdate":{"attribute_name":"公開日","attribute_value":"2023-11-28"},"_buckets":{"deposit":"d395b2b1-b820-40ab-8f41-fdb633ac96ac"},"_deposit":{"id":"231113","pid":{"type":"depid","value":"231113","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"An Efficient Sparse Matrix Storage Format for Sparse Matrix-Vector Multiplication and Sparse Matrix-Transpose-Vector Multiplication on GPUs","author_link":["623422","623423","623421","623420"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"An Efficient Sparse Matrix Storage Format for Sparse Matrix-Vector Multiplication and Sparse Matrix-Transpose-Vector Multiplication on GPUs"},{"subitem_title":"An Efficient Sparse Matrix Storage Format for Sparse Matrix-Vector Multiplication and Sparse Matrix-Transpose-Vector Multiplication on GPUs","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"アクセラレータ","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2023-11-28","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"Japan Advanced Institute of Science and Technology"},{"subitem_text_value":"Japan Advanced Institute of Science and Technology"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Japan Advanced Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Japan Advanced Institute of Science and Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/231113/files/IPSJ-HPC23192035.pdf","label":"IPSJ-HPC23192035.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-HPC23192035.pdf","filesize":[{"value":"2.3 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"14"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"07db53cc-cce5-4a4a-9d34-ac2ac2570095","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2023 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Ryohei, Izawa"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yasushi, Inoguchi"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Ryohei, Izawa","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yasushi, Inoguchi","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10463942","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8841","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"The utilization of sparse matrix storage formats is widespread across various fields, including scientific computing, machine learning, and statistics. Within these domains, there is a need to perform Sparse Matrix-Vector Multiplication (SpMV) and Sparse Matrix-Transpose-Vector Multiplication (SpMVT) iteratively within a single application. However, executing SpMV and SpMVT on GPUs using existing sparse matrix storage formats presents challenges in terms of memory usage, memory access and load balancing. In our study, we present a novel sparse matrix storage format named GCSB, designed specifically for optimizing SpMV and SpMVT operations on GPUs through the implementation of advanced memory compression techniques. Expanding upon the pre-existing CSB format compatible with CPU-based SpMV and SpMVT, we extend its functionality to the GPU environment. This adaptation enables quicker execution of SpMV and SpMVT in comparison to CSR, achieved by effectively utilizing the L1 cache and ensuring load balancing, while maintaining the theoretical memory usage equivalent to that of CSR. Through our experiments, we demonstrate that GCSB achieves comparable theoretical memory usage to CSR while outperforming CSR in terms of speed on various matrices sourced from the University of Florida Sparse Matrix Collection. GCSB achieves a speedup of up to 1.47 speedup on TITAN RTX and 2.75 on A100. Furthermore, we show that GCSB reduces the L1 cache miss counts by strategically grouping and rearranging non-zero elements. Additionally, we conduct a qualitative assessment, affirming that GCSB exhibits superior performance, particularly when non-zero elements are widely dispersed throughout the matrix and the proportion of non-zero elements within the matrix is relatively high.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"The utilization of sparse matrix storage formats is widespread across various fields, including scientific computing, machine learning, and statistics. Within these domains, there is a need to perform Sparse Matrix-Vector Multiplication (SpMV) and Sparse Matrix-Transpose-Vector Multiplication (SpMVT) iteratively within a single application. However, executing SpMV and SpMVT on GPUs using existing sparse matrix storage formats presents challenges in terms of memory usage, memory access and load balancing. In our study, we present a novel sparse matrix storage format named GCSB, designed specifically for optimizing SpMV and SpMVT operations on GPUs through the implementation of advanced memory compression techniques. Expanding upon the pre-existing CSB format compatible with CPU-based SpMV and SpMVT, we extend its functionality to the GPU environment. This adaptation enables quicker execution of SpMV and SpMVT in comparison to CSR, achieved by effectively utilizing the L1 cache and ensuring load balancing, while maintaining the theoretical memory usage equivalent to that of CSR. Through our experiments, we demonstrate that GCSB achieves comparable theoretical memory usage to CSR while outperforming CSR in terms of speed on various matrices sourced from the University of Florida Sparse Matrix Collection. GCSB achieves a speedup of up to 1.47 speedup on TITAN RTX and 2.75 on A100. Furthermore, we show that GCSB reduces the L1 cache miss counts by strategically grouping and rearranging non-zero elements. Additionally, we conduct a qualitative assessment, affirming that GCSB exhibits superior performance, particularly when non-zero elements are widely dispersed throughout the matrix and the proportion of non-zero elements within the matrix is relatively high.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告ハイパフォーマンスコンピューティング(HPC)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2023-11-28","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"35","bibliographicVolumeNumber":"2023-HPC-192"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"id":231113,"links":{}}