{"created":"2025-01-19T00:53:06.505258+00:00","updated":"2025-01-20T02:44:24.492491+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00186051","sets":["1164:2240:9411:9412"]},"path":["9412"],"owner":"11","recid":"186051","title":["Pushing the Limits for 2D Convolution Computation On CUDA-enabled GPUs"],"pubdate":{"attribute_name":"公開日","attribute_value":"2018-02-21"},"_buckets":{"deposit":"d30d650c-4354-4d1b-96df-3992c93bc6c4"},"_deposit":{"id":"186051","pid":{"type":"depid","value":"186051","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"Pushing the Limits for 2D Convolution Computation On CUDA-enabled GPUs","author_link":["415827","415828","415829","415826","415832","415830","415825","415831"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Pushing the Limits for 2D Convolution Computation On CUDA-enabled GPUs"},{"subitem_title":"Pushing the Limits for 2D Convolution Computation On CUDA-enabled GPUs","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"GPU","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2018-02-21","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"Tokyo Institute of Technology／AIST-Tokyo Tech Real World Big-Data Computation Open Innovation Laboratory, National Institute of Advanced Industrial Science and Technology"},{"subitem_text_value":"National Institute of Advanced Industrial Science and Technology"},{"subitem_text_value":"AIST-Tokyo Tech Real World Big-Data Computation Open Innovation Laboratory, National Institute of Advanced Industrial Science and Technology"},{"subitem_text_value":"Tokyo Institute of Technology／AIST-Tokyo Tech Real World Big-Data Computation Open Innovation Laboratory, National Institute of Advanced Industrial Science and Technology"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Tokyo Institute of Technology / AIST-Tokyo Tech Real World Big-Data Computation Open Innovation Laboratory, National Institute of Advanced Industrial Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"National Institute of Advanced Industrial Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"AIST-Tokyo Tech Real World Big-Data Computation Open Innovation Laboratory, National Institute of Advanced Industrial Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Tokyo Institute of Technology / AIST-Tokyo Tech Real World Big-Data Computation Open Innovation Laboratory, National Institute of Advanced Industrial Science and Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/186051/files/IPSJ-HPC18163022.pdf","label":"IPSJ-HPC18163022.pdf"},"date":[{"dateType":"Available","dateValue":"2020-02-21"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-HPC18163022.pdf","filesize":[{"value":"4.1 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"14"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"890c063b-0a30-4500-ac59-28d08c1eeea3","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2018 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Peng, Chen"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Mohamed, Wahib"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Shinichiro, Takizawa"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Satoshi, Matsuoka"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Peng, Chen","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Mohamed, Wahib","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Shinichiro, Takizawa","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Satoshi, Matsuoka","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10463942","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8841","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"The 2D convolution operator is the computational bottleneck in a variety of image processing and machine learning applications. We propose an algorithm to compute convolution by employing register files to cache image data (known as register cache), rather than using the user-managed scratch-pad memory. We take advantage of CUDA's warp shuffle functions to accelerate the intra-warp communication of partial results. Unlike the GEMM-based, FFT-based or Winograd method, our algorithm executes the convolution computation without using any GPU memory as a workspace, and is general to all filter shapes. Our algorithm performs better than state-of-the-art 2D convolution implementations. Using a single TitanXp GPU, it is in average 4.7x faster than NPP (Nvidia Performance Primitives), and 1.8x faster than the highly-optimized ArrayFire library.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"The 2D convolution operator is the computational bottleneck in a variety of image processing and machine learning applications. We propose an algorithm to compute convolution by employing register files to cache image data (known as register cache), rather than using the user-managed scratch-pad memory. We take advantage of CUDA's warp shuffle functions to accelerate the intra-warp communication of partial results. Unlike the GEMM-based, FFT-based or Winograd method, our algorithm executes the convolution computation without using any GPU memory as a workspace, and is general to all filter shapes. Our algorithm performs better than state-of-the-art 2D convolution implementations. Using a single TitanXp GPU, it is in average 4.7x faster than NPP (Nvidia Performance Primitives), and 1.8x faster than the highly-optimized ArrayFire library.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"9","bibliographic_titles":[{"bibliographic_title":"研究報告ハイパフォーマンスコンピューティング（HPC）"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2018-02-21","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"22","bibliographicVolumeNumber":"2018-HPC-163"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"id":186051,"links":{}}