@article{oai:ipsj.ixsq.nii.ac.jp:00078162,
 author = {相澤, 彰子 and Akiko, Aizawa},
 issue = {10},
 journal = {情報処理学会論文誌},
 month = {Oct},
 note = {圧縮プログラムや符号化に基づくデータの類似度尺度について，テキスト文書への適用を中心に近年の研究を概観するとともに，Ziv-Merhav crossparsingと呼ばれる系列分解法と単純ベイズ法を組み合わせたテキスト分類法を新たに提案する．異なるタイプの分類問題を用いた実験により，従来のZiv-Merhav crossparsingや単純ベイズ法に対して，提案手法では分類性能の大幅な改善が得られることを示す．また，サポートベクタマシンやロジスティック回帰に基づく多クラス分類器をベースラインとして用いた比較により，Reuters-21578やTechTC-300のようにカテゴリが文書の話題に基づき設定される問題ではこれらの機械学習手法が優位であるが，論文著者の同定のようにカテゴリが文書の作成者に対応づけられる問題では提案手法が優位となる場合があることを示す．最後に，可変長Nグラムによる類似度尺度という観点から考察を加える．, In this paper, we first present an overview of recent studies on compression and encoding-based similarity measures for textual documents. Next, we propose a new method that combines Ziv-Merhav crossparsing and a naive Bayes classifier. Then, we investigate the performance using different types of text classification problems. The experimental results show that the proposed method considerably overperforms the conventional practice of Ziv-Merhav crossparsing and also naive Bayes classifiers. It is also shown that while multiclass versions of two well-known machine learning methods, a support vector machine and logistic regression, perform better than the proposed method with standard test sets such as Reuters-21578 or TechTC-300, the proposed method performs better with some types of author identification problems. Lastly, we provide a perspective of the proposed method as a similarity measure based on variable length n-grams.},
 pages = {2953--2964},
 title = {多クラス文書分類問題におけるZiv-Merhav Crossparsingの適用と評価},
 volume = {52},
 year = {2011}
}