@inproceedings{oai:ipsj.ixsq.nii.ac.jp:00240894,
 author = {川村, 慎太郎 and 藤田, 彬 and 金谷, 延幸 and 田中, 秀一 and 安田, 真悟 and 井上, 大介 and Shintaro, Kawamura and Akira, Fujita and Nobuyuki, Kanaya and Hidekazu, Tanaka and Shingo, Yasuda and Daisuke, Inoue},
 book = {コンピュータセキュリティシンポジウム2024論文集},
 month = {Oct},
 note = {検知手法の評価には悪性WebサイトのURLやコンテンツを含んだデータセットが必要になるが，既存のデータセットは過去にユーザーから報告をうけて収集したコンテンツが多いため，時間経過と共にURLやコンテンツの変更や削除が発生する恐れがある．そのため，収集された時点における悪性Webサイトのコンテンツを含んだデータセットを準備することで，検知手法の評価対象として有効に利用できると思われる．このような課題の解決のために，悪性Webサイトにアクセスした時点の悪性Webサイトのスナップショットを取得しストレージへ保存することで，検知手法の評価に利用するデータセットを提供するシステムを提案する．本稿では，既存の悪性Webサイトへ解析基盤を利用してアクセスすることでスナップショットを取得し，ストレージへ保存するシステムを設計する．特にシステムの動作フローについて要件を定義し，システム全体の設計を行った結果を報告する．提案するシステムの有効性を確認するために，取得したスナップショットに悪性Webサイトの再現性があるかを検証する．システムがスナップショットとして取得する内容は，コンテンツに設定されているjavascript，多段リダイレクトの発生回数，リダイレクト元URLとリダイレクト先URL，リンクされている画像，コンテンツの全体を想定する．最後に，今後の実装計画について述べる．, In order to evaluate detection methods, datasets containing URLs and content from malicious websites are required, but since existing datasets often contain content that has been collected from user reports in the past, there is a risk that URLs and content may be changed or deleted over time. For this reason, it is thought that datasets containing content from malicious websites at the time of collection can be effectively used as evaluation targets for detection methods. To solve this problem, we consider a system that provides a dataset for evaluating detection methods by taking a snapshot of a malicious website at the time of access and storing it in a storage device. In this paper, we propose a system design that takes a snapshot of an existing malicious website by accessing it using an analysis platform and storing it in a storage device. In particular, we define the requirements for the system's operation flow and report on the results of the overall system design. As an evaluation of the proposed system, we conduct a verification experiment of the operation flow. The contents that the system acquires as snapshots are assumed to be the javascript set in the content, the number of occurrences of multi-step redirects, the redirect source URL and redirect destination URL, linked images, and the entire content. Finally, we discuss future implementation plans.},
 pages = {1102--1108},
 publisher = {情報処理学会},
 title = {悪性URL検知手法を評価するためのデータセット生成システムの設計},
 year = {2024}
}