@techreport{oai:ipsj.ixsq.nii.ac.jp:00242243, author = {Qiaoyi, Deng and Satoshi, Ikehata and Yusuke, Sekikawa and Ikuro, Sato and Qiaoyi, Deng and Satoshi, Ikehata and Yusuke, Sekikawa and Ikuro, Sato}, issue = {17}, month = {Jan}, note = {Bird's-Eye View (BEV) representations are critical for providing a unified spatial scene understanding to autonomous driving tasks. However, existing methods often struggle with a lack of transformation equivariance. This results in artifacts on BEV feature maps that degrade the performance of downstream tasks. To address this issue, we propose a regularization approach to enhance transformation equivariance through ego-vehicle and dynamic object motion transformations by aligning BEV features in the BEV coordinate system across consecutive frames and introduces a consistency loss to penalize feature misalignment. Experiments on the nuScenes dataset demonstrate that the proposed approach effectively reduces artifacts, stabilizes BEV representations, and improves the reliability of downstream tasks., Bird's-Eye View (BEV) representations are critical for providing a unified spatial scene understanding to autonomous driving tasks. However, existing methods often struggle with a lack of transformation equivariance. This results in artifacts on BEV feature maps that degrade the performance of downstream tasks. To address this issue, we propose a regularization approach to enhance transformation equivariance through ego-vehicle and dynamic object motion transformations by aligning BEV features in the BEV coordinate system across consecutive frames and introduces a consistency loss to penalize feature misalignment. Experiments on the nuScenes dataset demonstrate that the proposed approach effectively reduces artifacts, stabilizes BEV representations, and improves the reliability of downstream tasks.}, title = {Regularizing Image Encoders to Generate Bird's-Eye View Representations for Autonomous Driving Tasks}, year = {2025} }