@techreport{oai:ipsj.ixsq.nii.ac.jp:00062572,
 author = {池田, 雅紀 and 小野, 真吾 and 佐藤, 一誠 and 吉田, 稔 and 中川, 裕志 and Masaki, Ikeda and Shingo, Ono and Issei, Sato and Minoru, Yoshida and Hiroshi, Nakagawa},
 issue = {6},
 month = {Jul},
 note = {教師なし学習によるクラスタリングに対して，半教師有り学習を適用する手法について提案する．クラスターの評価基準において，結果のクラスターにおける正解データの割合を表す適合率と正解データが結果のクラスターに含まれている割合を表す再現率が存在する．従来研究において，素性の種類を限定することによって特に高い適合率を持つクラスターを生成することが可能になった．これらの素性は疎であり，再現率を向上させることは困難である．一方，素性の中には，人物を識別する能力は弱いが，文書に含まれている数の多い素性が存在する．我々は半教師有り学習を適合率の高いクラスターに対して適用し，クラスターの再現率の向上させることを提案する．本研究では，ブートストラップ法として知られている Espresso を応用し，人名曖昧解消における半教師有り学習として用いる．, This research proposes the application of semi-supervised learning to unsu- pervsed clustering. There are two criteria of cluster evaluation, or precision and recall. Precision is the ratio of true datas in the result cluster and recall is the ratio of true datas the result cluster has to all true data. In previous work, the selection of feature types enables to make high precision clusters, but these fea- tures are too sparse to imporve recall. On the otherhand, there are features that has poor discrimination capacity but are thick in the documents. We suggest to applicate semi-supervised learning to these high precision clusters and advance clusters' recall. In this research, we use Espresso that is bootstrap method in the information extraction for person name disambiguation as semi-supervised learning method.},
 title = {Web上の人名検索結果の同姓同名問題における二段階クラスタリングを用いた再現率向上},
 year = {2009}
}