{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00087831","sets":["6164:6165:6242:6971"]},"path":["6971"],"owner":"11","recid":"87831","title":["AVXを用いた倍々精度疎行列ベクトル積の高速化"],"pubdate":{"attribute_name":"公開日","attribute_value":"2013-01-08"},"_buckets":{"deposit":"127a083b-e081-45a5-b3b1-486dfa3d4cd7"},"_deposit":{"id":"87831","pid":{"type":"depid","value":"87831","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"AVXを用いた倍々精度疎行列ベクトル積の高速化","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"AVXを用いた倍々精度疎行列ベクトル積の高速化"},{"subitem_title":"AVX Acceleration of Sparse Matrix-Vector Multiplication in Double-Double","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"数値計算","subitem_subject_scheme":"Other"}]},"item_type_id":"18","publish_date":"2013-01-08","item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_18_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"工学院大学情報学部"},{"subitem_text_value":"工学院大学情報学部"},{"subitem_text_value":"工学院大学情報学部"},{"subitem_text_value":"筑波大学図書館情報メディア系"}]},"item_18_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Faculty of Informatics, Kogakuin University","subitem_text_language":"en"},{"subitem_text_value":"Faculty of Informatics, Kogakuin University","subitem_text_language":"en"},{"subitem_text_value":"Faculty of Informatics, Kogakuin University","subitem_text_language":"en"},{"subitem_text_value":"Faculty of Library, Information and Media Science, University of Tsukuba","subitem_text_language":"en"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/87831/files/IPSJ-HPCS2013003.pdf"},"date":[{"dateType":"Available","dateValue":"2015-01-08"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-HPCS2013003.pdf","filesize":[{"value":"1.4 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"330","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"330","billingrole":"14"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"dbcf6eb1-fc12-43cf-ad84-f63d467694ac","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2013 by the Information Processing Society of Japan"}]},"item_18_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"菱沼, 利彰"},{"creatorName":"藤井, 昭宏"},{"creatorName":"田中, 輝雄"},{"creatorName":"長谷川, 秀彦"}],"nameIdentifiers":[{}]}]},"item_18_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Toshiaki, Hishinuma","creatorNameLang":"en"},{"creatorName":"Akihiro, Fujii","creatorNameLang":"en"},{"creatorName":"Teruo, Tanaka","creatorNameLang":"en"},{"creatorName":"Hidehiko, Hasegawa","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_5794","resourcetype":"conference paper"}]},"item_18_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"計算性能の向上に伴い，高精度による計算が多くの場面で可能となっている．4倍精度を効率良く実現する手法として，2つの倍精度変数で1つの4倍精度変数を表現する倍々精度演算がある．本研究では，疎行列とベクトルの演算に使われる基本演算をAVX命令を用いて高速化し，性能を決定するパラメタについて分析を行うことにより，倍々精度演算をAVXで行う際の効果を示した．AVX命令を用いた場合，同時演算数の増加，3オペランド化によるレジスタ退避，復元処理の減少などの効果が大きく，SSE2の性能と比べて，キャッシュに収まる範囲のベクトル間の演算では約1.7から2.3倍の性能となったが，キャッシュに収まらない場合は，キャッシュアクセス，メモリアクセスが大きなボトルネックになることがわかった．倍精度の疎行列と倍々精度のベクトルの積では，約1.1から1.9倍の性能となり，メモリアクセスはボトルネックとならず，疎行列の1行あたりの非零要素の数が性能に大きな影響を与えていることがわかった．これらの結果から，倍々精度の疎行列ベクトル積の性能を予測する1つの指標を導出した．","subitem_description_type":"Other"}]},"item_18_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"As computing performance is improved generation after generation, high precision computation becomes possible in many situations. One of the efficient methods to perform quadruple precision is to use Double-Double precision which uses two double precision variables for one quadruple precision variable. In this paper, the authors tuned basic operation kernels of sparse matrices and vectors in Double-Double precision using AVX, and analyzed their performance. The AVX speedup ratio of the Double-Double vector operations is from 1.7 to 2.3 when data stored in the cache. The reason of performance acceleration is number of operations in the same time and elimination of backup and recovery values on registers by three operands instruction, The AVX performance decreases when data not stored in the cache, because of cache hit ratio and memory bandwidth. The AVX speedup ratio of the product of Double precision sparse matrix and Double-Double precision vector is from 1.1 to 1.9. An average number of nonzero elements per row affects to the performance, but a memory bandwidth does not affect to the performance. The authors define one metric to forecast the AVX performance of the product of sparse matrix and vector in Double-Double.","subitem_description_type":"Other"}]},"item_18_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"31","bibliographic_titles":[{"bibliographic_title":"ハイパフォーマンスコンピューティングと計算科学シンポジウム論文集"}],"bibliographicPageStart":"23","bibliographicIssueDates":{"bibliographicIssueDate":"2013-01-08","bibliographicIssueDateType":"Issued"},"bibliographicVolumeNumber":"2013"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"id":87831,"updated":"2025-01-21T17:01:01.173944+00:00","links":{},"created":"2025-01-18T23:38:25.700830+00:00"}