{"updated":"2025-01-20T01:37:59.133168+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00189394","sets":["581:9322:9327"]},"path":["9327"],"owner":"11","recid":"189394","title":["Exploiting Multilingual Corpora Simply and Efficiently in Neural Machine Translation"],"pubdate":{"attribute_name":"公開日","attribute_value":"2018-05-15"},"_buckets":{"deposit":"591b36d7-3993-453e-83d1-74c9b1f7e06b"},"_deposit":{"id":"189394","pid":{"type":"depid","value":"189394","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"Exploiting Multilingual Corpora Simply and Efficiently in Neural Machine Translation","author_link":["430806","430805","430809","430808","430810","430807"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Exploiting Multilingual Corpora Simply and Efficiently in Neural Machine Translation"},{"subitem_title":"Exploiting Multilingual Corpora Simply and Efficiently in Neural Machine Translation","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[一般論文] Neural Machine Translation (NMT), multi-source NMT, empirical comparison, transfer learning, deep learning, dictionary extraction","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2018-05-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Informatics, Kyoto University"},{"subitem_text_value":"Japan Science and Technology Agency"},{"subitem_text_value":"Graduate School of Informatics, Kyoto University"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Informatics, Kyoto University","subitem_text_language":"en"},{"subitem_text_value":"Japan Science and Technology Agency","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Informatics, Kyoto University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/189394/files/IPSJ-JNL5905011.pdf","label":"IPSJ-JNL5905011.pdf"},"date":[{"dateType":"Available","dateValue":"2020-05-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL5905011.pdf","filesize":[{"value":"503.9 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"5"},{"tax":["include_tax"],"price":"0","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"3db98885-cf61-49a4-99ed-441febb7f311","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2018 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Raj, Dabre"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Fabien, Cromieres"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Sadao, Kurohashi"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Raj, Dabre","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Fabien, Cromieres","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Sadao, Kurohashi","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"In this paper, we explore a simple approach for “Multi-Source Neural Machine Translation” (MSNMT) which only relies on preprocessing a N-way multilingual corpus without modifying the Neural Machine Translation (NMT) architecture or training procedure. We simply concatenate the source sentences to form a single, long multi-source input sentence while keeping the target side sentence as it is and train an NMT system using this preprocessed corpus. We evaluate our method in resource poor as well as resource rich settings and show its effectiveness (up to 4 BLEU using 2 source languages and up to 6 BLEU using 5 source languages) and compare them against existing approaches. We also provide some insights on how the NMT system leverages multilingual information in such a scenario by visualizing attention. We then show that this multi-source approach can be used for transfer learning to improve the translation quality for single-source systems without using any additional corpora thereby highlighting the importance of multilingual-multiway corpora in low resource scenarios. We also extract and evaluate a multilingual dictionary by a method that utilizes the multi-source attention and show that it works fairly well despite its simplicity.\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.26(2018) (online)\nDOI http://dx.doi.org/10.2197/ipsjjip.26.406\n------------------------------","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"In this paper, we explore a simple approach for “Multi-Source Neural Machine Translation” (MSNMT) which only relies on preprocessing a N-way multilingual corpus without modifying the Neural Machine Translation (NMT) architecture or training procedure. We simply concatenate the source sentences to form a single, long multi-source input sentence while keeping the target side sentence as it is and train an NMT system using this preprocessed corpus. We evaluate our method in resource poor as well as resource rich settings and show its effectiveness (up to 4 BLEU using 2 source languages and up to 6 BLEU using 5 source languages) and compare them against existing approaches. We also provide some insights on how the NMT system leverages multilingual information in such a scenario by visualizing attention. We then show that this multi-source approach can be used for transfer learning to improve the translation quality for single-source systems without using any additional corpora thereby highlighting the importance of multilingual-multiway corpora in low resource scenarios. We also extract and evaluate a multilingual dictionary by a method that utilizes the multi-source attention and show that it works fairly well despite its simplicity.\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.26(2018) (online)\nDOI http://dx.doi.org/10.2197/ipsjjip.26.406\n------------------------------","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicIssueDates":{"bibliographicIssueDate":"2018-05-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"5","bibliographicVolumeNumber":"59"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"created":"2025-01-19T00:55:27.041099+00:00","id":189394,"links":{}}