@techreport{oai:ipsj.ixsq.nii.ac.jp:00057423, author = {松本, 裕治 and 伝, 康晴 and Matsumoto, Yuji and Den, Yasuharu}, issue = {55(2001-SLP-036)}, month = {Jun}, note = {話し言葉研究のための基礎データとしてタグ付きコーパスの蓄積が進んでいる。言語データへの最も基本的なタグは単語わかち書きと品詞付与である。本稿では、書き起こされた話し言葉データへの形態素タグ付け自動化のための問題点について考察する。まず、書き言葉と対比して見られる話し言葉の特徴と問題点データが解析精度にどのように貢献するかを観察する。, Tagged corpora are indispensable resource for linguistic research. Several projects are now under way for constructing spoken language corpora. The foundamental annotation to corpora is segmentation and part of speech tagging. In this paper, we examine the issues peculiar to spoken language annotation compared with written language. First, we summarize the characteristics and issues spoken language. We then report some experiments of automatic part of speech tagging based on statistical learning algorithm, through which we see how a small size of tagged corpus is effective in improving the accuracy of the automatic taggers.}, title = {話し言葉の形態素解析}, year = {2001} }