@article{oai:ipsj.ixsq.nii.ac.jp:00189394, author = {Raj, Dabre and Fabien, Cromieres and Sadao, Kurohashi and Raj, Dabre and Fabien, Cromieres and Sadao, Kurohashi}, issue = {5}, journal = {情報処理学会論文誌}, month = {May}, note = {In this paper, we explore a simple approach for “Multi-Source Neural Machine Translation” (MSNMT) which only relies on preprocessing a N-way multilingual corpus without modifying the Neural Machine Translation (NMT) architecture or training procedure. We simply concatenate the source sentences to form a single, long multi-source input sentence while keeping the target side sentence as it is and train an NMT system using this preprocessed corpus. We evaluate our method in resource poor as well as resource rich settings and show its effectiveness (up to 4 BLEU using 2 source languages and up to 6 BLEU using 5 source languages) and compare them against existing approaches. We also provide some insights on how the NMT system leverages multilingual information in such a scenario by visualizing attention. We then show that this multi-source approach can be used for transfer learning to improve the translation quality for single-source systems without using any additional corpora thereby highlighting the importance of multilingual-multiway corpora in low resource scenarios. We also extract and evaluate a multilingual dictionary by a method that utilizes the multi-source attention and show that it works fairly well despite its simplicity. ------------------------------ This is a preprint of an article intended for publication Journal of Information Processing(JIP). This preprint should not be cited. This article should be cited as: Journal of Information Processing Vol.26(2018) (online) DOI http://dx.doi.org/10.2197/ipsjjip.26.406 ------------------------------, In this paper, we explore a simple approach for “Multi-Source Neural Machine Translation” (MSNMT) which only relies on preprocessing a N-way multilingual corpus without modifying the Neural Machine Translation (NMT) architecture or training procedure. We simply concatenate the source sentences to form a single, long multi-source input sentence while keeping the target side sentence as it is and train an NMT system using this preprocessed corpus. We evaluate our method in resource poor as well as resource rich settings and show its effectiveness (up to 4 BLEU using 2 source languages and up to 6 BLEU using 5 source languages) and compare them against existing approaches. We also provide some insights on how the NMT system leverages multilingual information in such a scenario by visualizing attention. We then show that this multi-source approach can be used for transfer learning to improve the translation quality for single-source systems without using any additional corpora thereby highlighting the importance of multilingual-multiway corpora in low resource scenarios. We also extract and evaluate a multilingual dictionary by a method that utilizes the multi-source attention and show that it works fairly well despite its simplicity. ------------------------------ This is a preprint of an article intended for publication Journal of Information Processing(JIP). This preprint should not be cited. This article should be cited as: Journal of Information Processing Vol.26(2018) (online) DOI http://dx.doi.org/10.2197/ipsjjip.26.406 ------------------------------}, title = {Exploiting Multilingual Corpora Simply and Efficiently in Neural Machine Translation}, volume = {59}, year = {2018} }