@techreport{oai:ipsj.ixsq.nii.ac.jp:00211693, author = {Mitsuhiko, Nakamoto and Xueting, Wang and Toshihiko, Yamasaki and Mitsuhiko, Nakamoto and Xueting, Wang and Toshihiko, Yamasaki}, issue = {1}, month = {Jun}, note = {We present a generative adversarial networks (GAN) based approach to lip-sync 2D cartoon animations. Most of the previous works have worked on lip-sync for the real people talking videos. However, lip-sync for 2D cartoon animations was rarely discussed while the traditional workflow of creating 2D cartoon animations is highly time-consuming. The main problem of automatically lip-syncing a 2D cartoon animation, especially using a deep learning approach, is the lack of datasets which consist of well lip-synced cartoon animations. Therefore, In this paper we present a GAN-based approach to achieve 2D cartoon animation lip-sync with no need of collecting raw cartoon animation datasets. Alternatively, we construct a cartoon talking video dataset by applying CartoonGAN to transform real-life speaking videos into cartoon styles. The dataset after the style transfer was used to train a lip-synchronization model, Wav2Lip. Our approach can generate natural lip-synchronized cartoon animations. We also conduct a user study and the results demonstrate the effectiveness of our approach., We present a generative adversarial networks (GAN) based approach to lip-sync 2D cartoon animations. Most of the previous works have worked on lip-sync for the real people talking videos. However, lip-sync for 2D cartoon animations was rarely discussed while the traditional workflow of creating 2D cartoon animations is highly time-consuming. The main problem of automatically lip-syncing a 2D cartoon animation, especially using a deep learning approach, is the lack of datasets which consist of well lip-synced cartoon animations. Therefore, In this paper we present a GAN-based approach to achieve 2D cartoon animation lip-sync with no need of collecting raw cartoon animation datasets. Alternatively, we construct a cartoon talking video dataset by applying CartoonGAN to transform real-life speaking videos into cartoon styles. The dataset after the style transfer was used to train a lip-synchronization model, Wav2Lip. Our approach can generate natural lip-synchronized cartoon animations. We also conduct a user study and the results demonstrate the effectiveness of our approach.}, title = {A GAN Based Approach to Lip-Sync 2D Cartoon Animations without Requiring Raw Cartoon Dataset}, year = {2021} }