@article{oai:ipsj.ixsq.nii.ac.jp:00018240,
 author = {永井, 貴博 and 吉田, 仁 and 黒田, 久泰 and 金田, 康正 and Takahiro, NAGAI and Hitoshi, YOSHIDA and Hisayasu, KURODA and Yasumasa, KANADA},
 issue = {SIG13(ACS19)},
 journal = {情報処理学会論文誌コンピューティングシステム（ACS）},
 month = {Aug},
 note = {並列計算機の性能向上や数値計算法の進展は，大規模科学技術計算における大きな鍵となっている。特に浮動小数点数における演算においては、計算規模が増すに従ってより多くの計算量を必要とし、計算誤差も増大する。そのために、倍精度演算より有効桁数が多い 4 倍精度演算の必要性が高まってきており注目されている。4 倍精度数の表現には、倍精度浮動小数点数を 2 つ用いて表される 128 ビットデータ型があるが、SR11000 モデル J2 上の Hitachi 最適化コンパイラにおいて、4 倍精度演算は 2 つの倍精度データ型を用いてソフトウェアによって実現されており、倍精度演算に比べより多くの計算回数を必要とする。そこで本研究では、SR11000 モデル J2 上の Hitachi 最適化コンパイラを用いて 4 倍精度演算を定量的に解析し、FMA 命令 （Fused Multiply-Add） を用いて演算回数を削減することによって高速化を行い、最大で約 1.5 倍の高速な 4 倍精度積和演算を実現した。, It is important to develop the numerical computation and to increase the performance of parallel computers so that the large scale computation is required in the scientific research fields. Generally, floating point arithmetics generate rounding error because of its limited significant figures to need more complexity. Therefore, quadruple precision arithmetics has been paid more attention today. Quadruple precision arithmetics are emulated with a pair of 64-bit double precision representation with Hitachi optimizing compiler on SR11000/J2. In this paper, we introduce the quantitative analysis of quadruple precision arithmetics. We implemented and attained 1.5 times maximum speed up with FMA (Fused Multiply-Add) instruction by reducing the number of operations.},
 pages = {214--222},
 title = {SR11000 モデル J2 における 4 倍精度積和演算の高速化},
 volume = {48},
 year = {2007}
}