@article{oai:ipsj.ixsq.nii.ac.jp:00211203, author = {Takayuki, Nakatsuka and Kazuyoshi, Yoshii and Yuki, Koyama and Satoru, Fukayama and Masataka, Goto and Shigeo, Morishima and Takayuki, Nakatsuka and Kazuyoshi, Yoshii and Yuki, Koyama and Satoru, Fukayama and Masataka, Goto and Shigeo, Morishima}, issue = {5}, journal = {情報処理学会論文誌}, month = {May}, note = {This paper proposes a statistical approach to 2D pose estimation from human images. The main problems with the standard supervised approach, which is based on a deep recognition (image-to-pose) model, are that it often yields anatomically implausible poses, and its performance is limited by the amount of paired data. To solve these problems, we propose a semi-supervised method that can make effective use of images with and without pose annotations. Specifically, we formulate a hierarchical generative model of poses and images by integrating a deep generative model of poses from pose features with that of images from poses and image features. We then introduce a deep recognition model that infers poses from images. Given images as observed data, these models can be trained jointly in a hierarchical variational autoencoding (image-to-pose-to-feature-to-pose-to-image) manner. The results of experiments show that the proposed reflective architecture makes estimated poses anatomically plausible, and the pose estimation performance is improved by integrating the recognition and generative models and also by feeding non-annotated images. ------------------------------ This is a preprint of an article intended for publication Journal of Information Processing(JIP). This preprint should not be cited. This article should be cited as: Journal of Information Processing Vol.29(2021) (online) DOI http://dx.doi.org/10.2197/ipsjjip.29.406 ------------------------------, This paper proposes a statistical approach to 2D pose estimation from human images. The main problems with the standard supervised approach, which is based on a deep recognition (image-to-pose) model, are that it often yields anatomically implausible poses, and its performance is limited by the amount of paired data. To solve these problems, we propose a semi-supervised method that can make effective use of images with and without pose annotations. Specifically, we formulate a hierarchical generative model of poses and images by integrating a deep generative model of poses from pose features with that of images from poses and image features. We then introduce a deep recognition model that infers poses from images. Given images as observed data, these models can be trained jointly in a hierarchical variational autoencoding (image-to-pose-to-feature-to-pose-to-image) manner. The results of experiments show that the proposed reflective architecture makes estimated poses anatomically plausible, and the pose estimation performance is improved by integrating the recognition and generative models and also by feeding non-annotated images. ------------------------------ This is a preprint of an article intended for publication Journal of Information Processing(JIP). This preprint should not be cited. This article should be cited as: Journal of Information Processing Vol.29(2021) (online) DOI http://dx.doi.org/10.2197/ipsjjip.29.406 ------------------------------}, title = {MirrorNet: A Deep Reflective Approach to 2D Pose Estimation for Single-Person Images}, volume = {62}, year = {2021} }