@inproceedings{oai:ipsj.ixsq.nii.ac.jp:00227386,
 author = {Xingan, Sha and Zeqiu, Liu and Yuejie, Meng and Masao, Yanagisawa and Youhua, Shi and Xingan, Sha and Zeqiu, Liu and Yuejie, Meng and Masao, Yanagisawa and Youhua, Shi},
 book = {DAシンポジウム2023論文集},
 month = {Aug},
 note = {Convolutional neural networks (CNNs) are widely utilized in object detection due to their exceptional accuracy. However, it is challenging to deploy them in the power-constrained and resource-constrained edge applications for their large computational workload. Among different CNN algorithms, the YOLO [1] series offers a superior balance between speed and accuracy. Furthermore, field-programmable gate arrays (FPGAs) have advantages over Graphics Processing Units (GPUs) and Application-Specific Integrated Circuits (ASICs) in terms of cost, energy efficiency, reconfigurability, and short development cycles. Therefore, this study aims to develop a high-performance and energy-efficient FPGA-based YOLO accelerator, implemented on the VC707 FPGA board. In this paper, the state-ofart YOLOv6 [2] featuring compact and hardware friendly is deployed on FPGA, which achieves 84.9% mean average precision (mAP) in PASCAL VOC2007 dataset far exceeding the accuracy of most existing YOLO accelerators. In addition, the proposed FPGA-based accelerator design adopting the output stationary dataflow and the double buffers scheme with a ping-pong mechanism can eliminate almost all energy-costive and time-consuming DRAM accesses. Experiments show that this hardware design achieves 364.5 frames per second (fps) and 18.46W on Virtex 7 VX485T FPGA under 150 MHz., Convolutional neural networks (CNNs) are widely utilized in object detection due to their exceptional accuracy. However, it is challenging to deploy them in the power-constrained and resource-constrained edge applications for their large computational workload. Among different CNN algorithms, the YOLO [1] series offers a superior balance between speed and accuracy. Furthermore, field-programmable gate arrays (FPGAs) have advantages over Graphics Processing Units (GPUs) and Application-Specific Integrated Circuits (ASICs) in terms of cost, energy efficiency, reconfigurability, and short development cycles. Therefore, this study aims to develop a high-performance and energy-efficient FPGA-based YOLO accelerator, implemented on the VC707 FPGA board. In this paper, the state-ofart YOLOv6 [2] featuring compact and hardware friendly is deployed on FPGA, which achieves 84.9% mean average precision (mAP) in PASCAL VOC2007 dataset far exceeding the accuracy of most existing YOLO accelerators. In addition, the proposed FPGA-based accelerator design adopting the output stationary dataflow and the double buffers scheme with a ping-pong mechanism can eliminate almost all energy-costive and time-consuming DRAM accesses. Experiments show that this hardware design achieves 364.5 frames per second (fps) and 18.46W on Virtex 7 VX485T FPGA under 150 MHz.},
 pages = {129--134},
 publisher = {情報処理学会},
 title = {Energy-Efficient and Real-Time FPGA-based YOLOv6 Accelerator for Object Detection},
 volume = {2023},
 year = {2023}
}