@article{oai:ipsj.ixsq.nii.ac.jp:00186818,
 author = {鈴木, 雅大 and 松尾, 豊 and Masahiro, Suzuki and Yutaka, Matsuo},
 issue = {3},
 journal = {情報処理学会論文誌},
 month = {Mar},
 note = {本稿では，異なる種類のモダリティ間を双方向に生成できる深層生成モデルについて研究する．双方向とは，たとえば画像から対応する文書を生成するだけでなく，文書から画像も生成できるということである．近年，variational autoencoder（VAE）のような深層生成モデルで異なるモダリティを扱う研究が行われている．しかし，これらは条件づけられた従属的な関係しかモデル化していないため，あるモダリティから別のモダリティに1方向しか生成できない．双方向で生成するためには，すべてのモダリティ間の高レベルな概念をとらえるような共有表現を抽出し，それを通じて複数のモダリティを双方向に生成する必要がある．本研究では，各モダリティが共有表現に独立に条件づけられた下での全モダリティの同時分布をモデル化したjoint multimodal variational autoencoder（JMVAE）を提案する．一般的に，あるモダリティから別のモダリティを生成する際には，入力では生成先のモダリティは欠損させる必要がある．もし生成先のモダリティの次元が生成元のモダリティより大きい場合，推論した潜在変数や生成したモダリティが崩れてしまう可能性がある．本研究では，既存の欠損値補完の手法でも解決できないことを明らかにし，この問題を解決するために，JMVAE-klと階層的JMVAEという追加的な手法を提案する．実験から，これらの手法によって，欠損モダリティ問題が解決すること，すべてのモダリティを統合した適切な共有表現が獲得されること，従来の1方向しか生成できないモデルと比較して，同等以上の精度で双方向に生成できることを確認した．, We investigate deep generative models that can exchange multiple modalities bi-directionally, e.g., generating images from corresponding texts and vice versa. Recently, some studies handle multiple modalities on deep generative models such as variational autoencoders (VAEs). However, these models typically assume that modalities are forced to have a conditioned relation, i.e., we can only generate modalities in one direction. To achieve our objective, we should extract a joint representation that captures high-level concepts among all modalities and through which we can exchange them bi-directionally. As described herein, we propose a joint multimodal variational autoencoder (JMVAE), in which all modalities are independently conditioned on joint representation. In other words, it models a joint distribution of modalities. In general, when generating another modality from one modality, the modality which we want to generate must be missing on input. If the missing modality is high-dimensional is larger in dimension than other modalities, then the inferred latent variable and generated samples might be collapsed. We found that this issue cannot prevent even using the conventional missing value complementation. In this study, we introduce two independent methods, JMVAE-kl and hierarchical JMVAE, which can prevent this issue. Our experiments showed the following results: our models can solve the missing modality problem; we can obtain appropriate joint representations which contain all modalities by our models; and our models can generate multiple modalities bi-directionally as same or better than the conventional models which can generate only one direction.},
 pages = {859--873},
 title = {異なるモダリティ間の双方向生成のための深層生成モデル},
 volume = {59},
 year = {2018}
}