@techreport{oai:ipsj.ixsq.nii.ac.jp:00214016,
 author = {妹尾, 豪士 and 神宮司, 明良 and 倉持, 亮佑 and 中原, 啓貴 and Takeshi, Senoo and Akira, Jinguji and Ryosuke, Kuramochi and Hiroki, Nakahara},
 issue = {7},
 month = {Nov},
 note = {ニューラルネットワークは多様な用途で用いられ，ネットワークの攻撃検知のような高速な学習を必要とする需要がある．誤差逆伝搬による学習をハードウェアで高速化するとき，誤差逆伝搬でパラメータを更新する計算の依存関係により並列処理が困難である．学習の推論と誤差逆伝搬を同時に計算できるデータフローアーキテクチャを実現するために，パラメータ更新を遅延させても多層パーセプトロンが正常に学習できることを示した．このハードウェアを Xilinx Alveo U50 に実装し，Intel Core i9 CPU と比較して 3 倍高速で 11.5 倍優れた電力効率を実現しており，また NVIDIA RTX 3090 GPU と比較して 2.5 倍高速で 21.4 倍優れた電力効率であることを示した．, Neural networks are being used in various applications, and the demand for fast training with large amounts of data is emerging. For example, a network intrusion detection (NID) system needs to be trained in a short period to detect attacks based on large amount of traﬃc logs.  We propose a training accelerator as a systolic array on a Xilinx U50 Alveo FPGA card to solve this problem. We found that the accuracy is almost the same as conventional training even when the forward and backward paths are run simultaneously by delaying the weight update. Compared to the Intel Core i9 CPU and NVIDIA RTX 3090 GPU, it was three times faster than the CPU and 2.5 times faster than the GPU. The processing speed per power consumption was 11.5 times better than the CPU and 21.4 times better than the GPU. From these results, we can conclude that implementing a training accelerator on FPGAs as a systolic array can achieve high speed and high energy eﬃciency.},
 title = {シストリックアレイによる多層パーセプトロンの学習アクセラレータについて},
 year = {2021}
}