@techreport{oai:ipsj.ixsq.nii.ac.jp:00192733,
 author = {田上, 諒 and 越前, 谷博 and 荒木, 健治 and Ryo, Tanoue and Hiroshi, Echizen and Kenji, Araki},
 issue = {2},
 month = {Dec},
 note = {本報告では，対訳辞書などの高品質な対訳知識を用いることなく，コンパラブルコーパスから対訳文を自動抽出する手法を提案する．提案手法では，単語分散表現を用いて翻訳行列と類似度計算を行うことで対訳文を抽出する．その際，類似度計算には Earth Mover's Distance を用いる．更に，提案手法では文長の違いを重みとして類似度に用いることで抽出精度の向上を図っている．ニュース記事のコンパラブルコーパスを用いた性能評価実験の結果，全記事の平均の F 値はベースラインで 0.13， EMD のみのシステムと提案手法にける文長を考慮しない場合では共に 0.42，文長を考慮した場合は 0.49 となった．これらの結果から，文長を考慮した提案手法の有効性が確認された．, In this paper, we propose new method to automatically extract bilingual sentences from comparable corpus without high quality bilingual knowledge such as bilingual dictionary. In our proposed method, the bilingual sentences are extracted using the translation matrix and the similarity between two language sentences based on the word embeddings. In that case, the Earth Mover's Distance is used to calculate the similarity. Moreover, the weight based on the difference of lengths between two language sentences is applied to the similarity in EMD. The evaluational experiments using the news article's comparable corpus indicate that the average of F-measure of our proposed method was 0.49, those of our proposed method without the weight based on the sentence length and the method based only on EMD were respectively 0.42, and that of the method using sentence length which is the baseline was 0.13. Therefore, we confirmed the effectiveness of our proposed method using the weight based sentence length.},
 title = {単語分散表現を用いたEarth Mover's Distanceと文長の違いに基づく類似度による対訳文の自動抽出},
 year = {2018}
}