@article{oai:ipsj.ixsq.nii.ac.jp:00142628, author = {川上, 尚慶 and 太田, 学 and 高須, 淳宏 and 安達, 淳 and Naomichi, Kawakami and Manabu, Ohta and Atsuhiro, Takasu and Jun, Adachi}, issue = {2}, journal = {情報処理学会論文誌データベース(TOD)}, month = {Jun}, note = {電子図書館の運用には,書誌情報データベースの整備が必須である.特に学術論文の参考文献欄には有用な書誌情報が集約されている.そこで我々は,Conditional Random Field(CRF)を用いて参考文献文字列から書誌情報を自動抽出する手法を提案した.しかし,書誌情報を高精度に抽出するには雑誌ごとに一定量の学習データを用意する必要があり,その生成コストが問題だった.本稿では,学習データが少ない場合に,能動サンプリングと擬似学習データ,転移学習を利用して抽出精度を改善する方法を提案する.実験では,抽出精度と必要とする学習データ件数を評価し,提案手法の有効性について考察した., The effective use of digital libraries demands maintenance of bibliographic databases. Especially, the reference fields of academic papers are full of useful bibliographic information. We, therefore, proposed a method of automatically extracting bibliographic information from reference strings using a conditional random field (CRF). However, at least a few hundred reference strings are necessary for training the CRF to achieve high extraction accuracies and the preparation of such human-labeled data for training is usually expensive. As described herein, we propose the use of active sampling, pseudo-training data and transfer learning to improve extraction accuracies with a small amount of training data. Then we evaluate the extraction accuracies and the associated training costs by experimentation and discuss the effectiveness of the proposed approach.}, pages = {18--29}, title = {少量学習データによる参考文献書誌情報抽出精度の向上}, volume = {8}, year = {2015} }