@techreport{oai:ipsj.ixsq.nii.ac.jp:00066699,
 author = {片山, 太一 and 芳中, 隆幸 and 宇津呂, 武仁 and 河田, 容英 and 福原, 知宏 and Taichi, Katayama and Takayuki, Yoshinaka and Takehito, Utsuro and Yasuhide, Kawada and Tomohiro, Fukuhara},
 issue = {19},
 month = {Nov},
 note = {本研究では，ブログにおいてアフィリエイト収入を得ることを目的とするスパム (スパムブログ，スプログ) のうち，特に，同一のスパムブログ作成者が自動的に大量生成したと推測されるスプログの検出において，HTML 構造の類似性が効果的であることを示す．具体的には，ブログの HTML ファイルにおける DOM ツリーから，コンテンツの最小単位に相当するブロックを抽出し，複数のスプログの間でブロック構造の類似性を測定する．その結果，同一ブログホストにおけるスプログのうち，同一のスパムブログ作成者が自動的に大量生成したと推測されるスプログ同士では，ブロック構造が類似する傾向があることを示す．また，ブロック構造の類似性を素性として用いることにより，SVM によるスプログ検出の性能が向上する場合があることを示す．, Spam blogs or splogs are blogs hosting spam posts, created using machine generated or hijacked content for the sole purpose of hosting advertisements or raising the number of inward of target sites. Among those splogs, this paper focuses on detecting a group of splogs which are estimated to be created by an identical spammer. We especially show that similarities of html structures among those splogs created by an identical spammer contribute to improving the performance of splog detection. In measuring similarities of html structures, we extract a list of blocks (minimum unit of content) from the DOM tree of a html file. We show that the html files of splogs estimated to be created by an identical spammer tend to have similar DOM trees and this tendency is quite effective in splog detection.},
 title = {スプログ検出におけるHTML構造の類似性の有効性の評価},
 year = {2009}
}