@techreport{oai:ipsj.ixsq.nii.ac.jp:00216069,
 author = {高嶋, 優希 and 神宮司, 明良 and 中原, 啓貴 and Yuki, Takashima and Akira, Jinguji and Hiroki, Nakahara},
 issue = {12},
 month = {Jan},
 note = {近年，深層学習への需要が高まっており，多くのハードウェア実装が提案されてきた．Xilinx 社の提供するプラットフォームである Deep learning Processor Unit（DPU）はその一つである．しかし，推論を高速で行える一方で学習できないという問題があった．本研究ではConvolutional Neural Network（CNN）の最終層を独立させる最終層学習を提案する．この手法は最終層以外を DPU を用いて演算を行い，最終層は CPU で計算する．すなわち，モデルの大半を DPU で処理可能なため高速な計算速度を維持しつつ，最終層のみを CPU で実行して重みの変更や追加といった学習を可能とする．CNN を用いた画像分類は出力のニューロン数とクラス数が一致している必要があり，クラス数を追加する再学習などで効果を発揮する．最終層学習を行う場合，既存のクラスと追加するクラスのドメインの類似性が重要であることが分かった．そのため多クラスなデータセットでの学習には向かない．しかし，cifar10 のようなクラス数が少なくドメインが独立しているデータセットであればモデル全体を 10 クラス分全ての学習データで学習した際と，モデル全体を 8 クラス分で学習したのち最終層のみ 2 クラス追加して学習した際で 3 ポイント程度の精度低下に抑えることができた．また DPU と CPU のハイブリッドシステムの処理速度低下はほとんどないことを明らかにした．, The demand for deep learning has been increasing, and many hardware implementations have been proposed. The Deep learning Processor Unit (DPU) was provided by Xilinx. Althogh it can perform inference at high speed, it cannot perform training. We propose a tail layer training that makes the tail layer of a Convolutional Neural Network (CNN) independent. All layers except the tail layer are computed using a DPU, and the tail layer is computed by a CPU. Since the number of neurons and classes in the output must be the same for image classiﬁcation, it is eﬀective for retraining to add the number of classes. The tail layer training, found that the relationship between the existing classes and the classes to be added is important. Therefore, it is not suitable for training on large number of classes. However, with a dataset such as cifar10, it is able to reduce the loss of accuracy by about 3 points between training the entire model with all 10 classes and training only the tail layer with 2 add classes after training the entire model with 8 classes.},
 title = {最終層学習によるDPUの学習機能追加について},
 year = {2022}
}