@techreport{oai:ipsj.ixsq.nii.ac.jp:00074066,
 author = {新里圭司 and 鎌田浩司 and 黒橋, 禎夫 and Keiji, Shinzato and Hiroshi, Kamada and Sadao, Kurohashi},
 issue = {15},
 month = {May},
 note = {自然文検索では，文書中に出現する単語，同義語・句，係り受け関係をインデックスに登録し，これらを検索時の文書収集やスコアリングに利用する．しかしながら，クエリ中の語・句の同位語が含まれる文書の適合度を誤りやすいという問題がある．本稿では，文書に「書かれていない」ということを表す不在タームを，国語辞典・ウィキペディアより獲得した同位語を利用して生成し，これを利用することで高速に不適合文書を検出する手法を提案する．NTCIR-3/4で構築されたテストセットを用いて提案手法を評価した結果，82.9%の精度で不適合文書を検出できることがわかった．, In natural language search, words, synonyms and dependencies in a document are indexed, and they are exploited for document retrieving and scoring. Natural language search, however, is likely to regard irrelevant documents including coordinate words of terms in a query as relevant ones. To solve the above problem, this paper proposes a non-existence term which means that a document does not describe information. For instance, the non-existence term “pigeon → damage” extracted from the document D means that the document D does not describe “damage of pigeon.” Non-existence terms are generated by using coordinate words extracted from an ordinary dictionary and Wikipedia, and allow search engines to rapidly detect irrelevant documents. We evaluated the effectiveness of non-existence terms using the test collection constructed by NTCIR-3/4 competition. Experimental results showed that the proposed method achieved 82.9% in precision for irrelevant document detection.},
 title = {同位語を利用した不在インデックス},
 year = {2011}
}