@inproceedings{oai:ipsj.ixsq.nii.ac.jp:00074355,
 author = {岩上, 拓矢 and 吉村, 和浩 and 中田, 尚 and 中島, 康彦 and Takuya, Iwakami and Kazuhiro, Yoshimura and Takashi, Nakada and Yasuhiko, Nakashima},
 book = {先進的計算基盤システムシンポジウム論文集},
 month = {May},
 note = {我々は，一般的な機械語命令を演算器アレイに写像して高い効率で実行する線形アレイ型アクセラレータ LAPP（Linear Array Pipeline Processor） を提案している．LAPP は多数の演算器をアレイ状に配置し，プログラムの最内ループから演算器アレイのデータパスを構成し，必要最小限のユニットだけで実行することによって，高性能と低消費電力を両立している．しかしこの従来型の LAPP ではスループットを高めるために 1 演算器に 1 命令を固定して割り当てるという物理制約があり，演算器数を超える命令列を高速実行できなかった．本稿ではこのような物理演算器数を超えた命令列を高速実行するために時分割実行による仮想化機構を提案する．本機構は演算器アレイにおいて各演算器で複数命令を時分割実行することによって仮想的に大きな演算器アレイを構築し，物理演算器数を超える長い命令列の高速実行を可能にする．評価の結果，従来型の LAPP と比べ電力あたり性能は約 0.89 倍に低下するものの，面積あたり性能を約 1.15 倍に向上させることができた．, We have previously proposed Linear Array Pipeline Processor (LAPP), which can map an inner loop of conventional VLIW codes onto Function Unit (FU) array and use minimum required FUs to exploit performance per watt. However, under a fixed mapping, one FU will be specified to a certain instruction and the longest map-able data flow path in the loop kernel is thus limited by the physical depth of the FU array. To address this problem, we propose a virtualization mechanism to extend the mapping ability of the FU array in LAPP. Specifically, this mechanism virtually extended the depth of the FU array by time-sharing the FUs to execute multiple VLIW instructions inside the loop kernel. Our evaluation results indicate that the virtualization will introduce an 11% performance per watt cost from the original LAPP, due the impact to working frequency. However, with virtualization, we can successfully use an 18-stage LAPP to substitute a baseline 36-stage LAPP. The performance per area is accordingly increased to 115% of the baseline LAPP.},
 pages = {136--143},
 publisher = {情報処理学会},
 title = {仮想化機構による演算器アレイ型アクセラレータの効率化},
 volume = {2011},
 year = {2011}
}