@article{oai:ipsj.ixsq.nii.ac.jp:00017880,
 author = {襲田, 勉 and 丸山, 訓英 and 鷲尾, 巧 and 土肥, 俊 and 山田, 進 and Tsutomu, Osoda and Kunieda, Maruyama and Takuma, Washio and Shun, Doi and Susumu, Yamada},
 issue = {SIG08(HPS2)},
 journal = {情報処理学会論文誌ハイパフォーマンスコンピューティングシステム（HPS）},
 month = {Nov},
 note = {共有メモリベクトル並列計算機の演算性能を最大限に引き出すような，ランダムスパース行列のためのBlock（ブロック）ILU前処理付き反復法のベクトル・並列化手法を提案し，その手法を並列ベクトル型スーパコンピュータSX-4（SRAM版，1CPUのピーク性能2Gflops）上で性能評価した結果を示す．ここでブロックとはある格子点上に定義された複数の未知数からなる集合とする．ベクトル・並列処理をすることが難しいとされるBILU前処理の前進・後退代入演算のベクトル・並列化のためにIDS-MJAD（InDependent Set Multiple Jagged Diagonal）形式を導入した．IDS-MJAD形式の導入によりCPU間の同期回数を低減した代入演算の実装が可能になる．3次元構造解析問題（GeoFEM Tiger V1.0）を用いた約100万自由度の評価例題を使った数値実験において，1CPUで1.0Gflops，8CPUで6.8Gflopsを達成した．, In this paper, we propose techniques to extract vector and shared memory parallel performance of a parallel vector machine, and we evaluate the proposed techniques on an NEC super computer SX-4. As the linear solver, we have implemented the block ILU preconditioned iterative method, which is frequently used for many large sparse problems. Here, a block corresponds to unknowns on one node in a mesh or in a grid.  As for the vectorization and parallelization technique, we propose IDS-MJAD (Independent set multiple jagged diagonal) format. With this technique, we can decrease the number of synchronizations. The numerical experimental results show that we achieved about 1.0ｴ,Gflops on 1ｴ,CPU and about 6.8ｴ,Gflops on 8ｴ,CPUs for some FEM problems.},
 pages = {92--100},
 title = {非構造メッシュ用BILU前処理付き反復法のベクトル・並列化手法},
 volume = {41},
 year = {2000}
}