@techreport{oai:ipsj.ixsq.nii.ac.jp:00184865,
 author = {房, 福明 and 山岸, 順一 and 越前, 功 and Fuming, Fang and Junichi, Yamagishi and Isao, Echizen},
 issue = {9},
 month = {Dec},
 note = {近年，機械学習の進展により声質変換の性能が大幅に向上した．しかし，学習データが対とならないノンパラレルの場合，ソース話者とターゲット話者の特徴を精密にマッチすることが難しい．ノンパラレル声質変換モデルの学習はまだ困難であり，変換性能はまだ低い問題がある．一方，画像変換分野ではペアなしの画像データベースから変換モデルを学習する方法として CycleGAN が注目されている．CycleGAN は GAN の一種であり，複数個の generator と discriminator を持つ．また，generator は入力データの一部の情報を維持しながら，discriminator との競争学習によりターゲットドメインへの変換ができる特徴がある．そこで，本研究はこのアイディアに基づいて CycleGAN をノンパラレル声質変換に適用する方法を提案する．提案手法では，ソース話者とターゲット話者の類似特徴を直接マッチするのではなく，ソース話者の一部の言語情報を維持しながら話者特徴をターゲット話者にできるだけ近付けるように変換モデルを学習する．被験者評価実験より，提案手法は標準の GAN に基づいたパラレル声質変換を上回ったことを示す．, Recently, voice conversion (VC) based on deep learning has achieved remarkable performance. However, it is still difficult to train a mapping model using nonparallel training samples. In this work, we propose a high-quality nonparallel VC training method based on CycleGAN. A CycleGAN is a kind of generative adversarial network (GAN) originally developed for unpaired image-to-image translation. This model can be learned by an approach that a part of input information is kept while the corresponding distribution of the input data can be converted into a target distribution without paired training samples. Experimental results show that the proposed method outperforms a standard GAN-based parallel VC system.},
 title = {CycleGANを用いた高品質なノンパラレル声質変換},
 year = {2017}
}