{"created":"2025-01-18T23:32:37.989943+00:00","updated":"2025-01-21T21:09:12.056723+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00075557","sets":["1164:2240:6352:6493"]},"path":["6493"],"owner":"11","recid":"75557","title":["SPECベンチマークプログラムのCUDAによる並列化の検討"],"pubdate":{"attribute_name":"公開日","attribute_value":"2011-07-20"},"_buckets":{"deposit":"0bf60dfd-daf4-4f39-a1a8-c491764bd5aa"},"_deposit":{"id":"75557","pid":{"type":"depid","value":"75557","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"SPECベンチマークプログラムのCUDAによる並列化の検討","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"SPECベンチマークプログラムのCUDAによる並列化の検討"},{"subitem_title":"Examination of Parallelization by CUDA in SPEC benchmark program","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"GPU","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2011-07-20","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"早稲田大学"},{"subitem_text_value":"早稲田大学"},{"subitem_text_value":"早稲田大学"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Waseda University","subitem_text_language":"en"},{"subitem_text_value":"Waseda University","subitem_text_language":"en"},{"subitem_text_value":"Waseda University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/75557/files/IPSJ-HPC11130016.pdf"},"date":[{"dateType":"Available","dateValue":"2013-07-20"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-HPC11130016.pdf","filesize":[{"value":"728.5 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"14"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"e8a1f0ae-895d-43d2-b592-c90a01353708","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2011 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"平, 勇樹"},{"creatorName":"木村, 啓二"},{"creatorName":"笠原, 博徳"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yuki, Taira","creatorNameLang":"en"},{"creatorName":"Keiji, Kimura","creatorNameLang":"en"},{"creatorName":"Hironori, Kasahara","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10463942","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"近年，GPU を汎用的な科学計算に用いる手法である GPGPU が注目されている．GPU は CPU と比べて高速な演算性能を持っているが，GPU の高い並列性を生かすためには並列性を持ったプログラムの選択と最適化が不可欠である．Doall のような単純な計算においては性能を出しやすいが，漸化計算 （Reduction） のような並列性が低くなる計算では最適化を行わなければ性能を生かし切ることが出来ない．本研究報告では，並列性の高い汎用 SMP 用ベンチマークである SPEC OMPL2001 331.art を評価対象とし，最適化によって GPU の性能がどれほど得られ，データサイズによってどれほど性能向上の差があるかを調査する．GPGPU のための並列アーキテクチャ CUDA を用いてベンチマークプログラム 331.art を最適化し評価を行ったところ，12 コアでの並列実行の総計算時間と比べて 1.90 倍の速度向上を得た．また，配列サイズが 200 万個以上の漸化計算であればデータ転送帯域を有効に使うことが出来，CPU での並列実行より高速に動作させられることが確認できた．","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Recently, GPGPU which means a technique of General Purpose computing on GPU has attracted attention. GPU has a high-speed computing performance compared with CPU. Although in order to utilize a high parallelism that GPU have well, it is necessary to select a program with parallelism and optimize the program. It is easy to give high performance in the simple calculation such as Doall, but cannot make use of performance if you don't optimize a low parallelism compute such as Reduction. In this paper, we set a target for SPEC OMPL2001 331.art which has high parallelism and evaluate how much performance is provided by GPU optimize and evaluate how much difference will appear by changing data size of arrays. In this paper, we got speed-up of 1.90 times compared with the total calculation time of parallel execution in 12 cores. We can execute faster than parallel execution in 12 cores when we set a target as a Reduction which access to 2 Million data array.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告ハイパフォーマンスコンピューティング（HPC）"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2011-07-20","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"16","bibliographicVolumeNumber":"2011-HPC-130"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"id":75557,"links":{}}