@techreport{oai:ipsj.ixsq.nii.ac.jp:00209846, author = {シュ, テツコウ and 菅野, 裕介 and 佐藤, 洋一 and Zhehao, Zhu and Yusuke, Sugano and Yoichi, Sato}, issue = {48}, month = {Feb}, note = {This paper introduces a cross-view non-local neural network to learn joint representations for understanding human activities from videos captured by wearable and fixed cameras. The key element is a non-local model to extract and enhance the global visual feature similarity across the views while reducing dissimilarity. The proposed method achieves a state-of-the-art performance on a cross-view action recognition benchmark dataset.}, title = {クロスビュー・非局所ニューラルネットワークによる自己視点映像と固定視点映像間の共通特徴量の学習}, year = {2021} }