@techreport{oai:ipsj.ixsq.nii.ac.jp:00141870, author = {吉井, 和輝 and Eric, Nichols and 中野, 幹生 and 青野, 雅樹 and Kazuki, Yoshii and Eric, Nichols and Mikio, Nakano and Masaki, Aono}, issue = {4}, month = {May}, note = {単語ベクトルは,統計的自然言語処理で利用しやすい分散意味表現として近年盛んに用いられるようになってきた.しかしながら,今まで主に英語で評価されてきたため,英語以外の言語での有効性は不明である.本研究では,単語の類推 (word analogy) と文完成 (sentence completion) の二つの評価タスクを用い,著名なオープンソースツールである word2vec (gensim の再実装) と GloVe を用いて構築した日本語単語ベクトルの評価を行った.単語の類推タスクでは,英語データで公表されている結果に近い結果を得たが,文完成のタスクでは,精度が大幅に減少した.本稿では,両タスクのエラー解析で明らかになった英語の単語ベクトルと日本語の単語ベクトルの性能差や,日本語特有の問題について調査した結果について述べる., Word vectors have been the subject of a great deal of research in recent years, due to their effectiveness at representing meaning in statistical approaches. However, evaluation of word vectors has thus far been limited to a small number of tasks focusing on the English language. This paper aims to fill that gap by providing comprehensive evaluation of Japanese word vectors. We construct datasets for word analogy and sentence completion tasks and compare vectors constructed with two popular tools, word2vec and GloVe. Evaluation on the word analogy task produced comparable results to those reported on English data, while on the sentence completion task, results were significantly lower than those reported on English data. We conduct error analysis for both tasks and discuss potential factors contributing to differences in performance for English and Japanese.}, title = {日本語単語ベクトルの構築とその評価}, year = {2015} }