{"updated":"2025-01-20T00:01:33.704143+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00192733","sets":["1164:4179:9477:9618"]},"path":["9618"],"owner":"44499","recid":"192733","title":["単語分散表現を用いたEarth Mover's Distanceと文長の違いに基づく類似度による対訳文の自動抽出"],"pubdate":{"attribute_name":"公開日","attribute_value":"2018-12-04"},"_buckets":{"deposit":"1e08aee8-1e46-4f71-be23-1ca4b22ccabc"},"_deposit":{"id":"192733","pid":{"type":"depid","value":"192733","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"単語分散表現を用いたEarth Mover's Distanceと文長の違いに基づく類似度による対訳文の自動抽出","author_link":["450575","450577","450576","450578","450573","450574"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"単語分散表現を用いたEarth Mover's Distanceと文長の違いに基づく類似度による対訳文の自動抽出"},{"subitem_title":"Automatic Extraction of Bilingual Sentences by Similarity based on Earth Mover's Distance using Word Embeddings and Difference of Sentence Length","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"機械翻訳・文法誤り訂正","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2018-12-04","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"北海学園大学大学院工学研究科"},{"subitem_text_value":"北海学園大学大学院工学研究科"},{"subitem_text_value":"北海道大学大学院情報科学研究科"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Engineering, Hokkai-Gakuen University,","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Engineering, Hokkai-Gakuen University,","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information Science and Technology, Hokkaido University,","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/192733/files/IPSJ-NL18238002.pdf","label":"IPSJ-NL18238002.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-NL18238002.pdf","filesize":[{"value":"661.6 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"23"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"41bfc632-837a-4a13-a5e6-cb3754d23d0d","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2018 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"田上, 諒"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"越前, 谷博"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"荒木, 健治"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Ryo, Tanoue","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Hiroshi, Echizen","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Kenji, Araki","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10115061","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8779","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本報告では,対訳辞書などの高品質な対訳知識を用いることなく,コンパラブルコーパスから対訳文を自動抽出する手法を提案する.提案手法では,単語分散表現を用いて翻訳行列と類似度計算を行うことで対訳文を抽出する.その際,類似度計算には Earth Mover's Distance を用いる.更に,提案手法では文長の違いを重みとして類似度に用いることで抽出精度の向上を図っている.ニュース記事のコンパラブルコーパスを用いた性能評価実験の結果,全記事の平均の F 値はベースラインで 0.13, EMD のみのシステムと提案手法にける文長を考慮しない場合では共に 0.42,文長を考慮した場合は 0.49 となった.これらの結果から,文長を考慮した提案手法の有効性が確認された.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"In this paper, we propose new method to automatically extract bilingual sentences from comparable corpus without high quality bilingual knowledge such as bilingual dictionary. In our proposed method, the bilingual sentences are extracted using the translation matrix and the similarity between two language sentences based on the word embeddings. In that case, the Earth Mover's Distance is used to calculate the similarity. Moreover, the weight based on the difference of lengths between two language sentences is applied to the similarity in EMD. The evaluational experiments using the news article's comparable corpus indicate that the average of F-measure of our proposed method was 0.49, those of our proposed method without the weight based on the sentence length and the method based only on EMD were respectively 0.42, and that of the method using sentence length which is the baseline was 0.13. Therefore, we confirmed the effectiveness of our proposed method using the weight based sentence length.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告自然言語処理(NL)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2018-12-04","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"2","bibliographicVolumeNumber":"2018-NL-238"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"created":"2025-01-19T00:58:25.527154+00:00","id":192733,"links":{}}