@article{oai:ipsj.ixsq.nii.ac.jp:00240716,
 author = {大迫, 勇太郎 and 山内, 利宏 and 吉岡, 克成 and 藤橋, 卓也 and 渡辺, 尚 and 猿渡, 俊介 and Yutaro, Osako and Toshihiro, Yamauchi and Katsunari, Yoshioka and Takuya, Fujihashi and Takashi, Watanabe and Shunsuke, Saruwatari},
 issue = {11},
 journal = {情報処理学会論文誌},
 month = {Nov},
 note = {モノがネットワークに接続されることが一般的となり，我々の生活が便利になった．しかしながら，ネットワーク接続されたモノは攻撃者の標的となり，大規模な攻撃への温床となっているため，マルウェアへの対応が必須である．Internet of Things（IoT）デバイス向けのマルウェアへの対策は，攻撃方法の高速な分析と多様なCPUアーキテクチャへの対応が求められる．このような観点から，本稿ではIoTデバイス向けのマルウェアの分類手法「String-based Malware Classification Algorithm +（SMCA+）」を提案する．SMCA+では，マルウェアから文字列の単語を抽出し，Bag of Wordsに変換したうえで次元削減し得たベクトルに対して，階層的クラスタリングによって分類木を作成する．文字列ベースの手法では，CPUアーキテクチャが異なる同種のマルウェアを近くにクラスタリングできる．また，説明変数が文字列であるため，ユーザが解析しやすいという特徴がある．SMCA+の有効性を横浜国立大学吉岡研究室のIoTマルウェアデータセットおよびVirusTotalのラベルを用いて評価した結果，3ファミリ間での平均F1スコアが0.978であり，従来のマルウェア分類と矛盾のない結果であることを確認した．また，可視化結果の検証によってCPUアーキテクチャによらず類似したバイナリファイルに分類できることを確認し，解析に有用な文字列の情報が抽出できた．, It has become common for things to be connected to the network, making our lives more convenient. However, network-connected things have become targets of attackers and a hotbed for large-scale attacks, so it is essential to address malware. Countermeasures against malware for IoT devices require fast analysis of attack methods and support for a variety of CPU architectures. From this perspective, this paper proposes a new malware classification method for IoT devices, String-based Malware Classification Algorithm+ (SMCA+). SMCA+ extracts words from malware, transforms them into a bag of words, and creates a classification tree by hierarchical clustering of vectors obtained through dimensionality reduction. The string-based method can cluster malware of the same type with different CPU architectures close. In addition, since the explanatory variables are strings, they are easy for users to analyze. We evaluated the effectiveness of SMCA+ using the IoT malware dataset from Yokohama National University Yoshioka Laboratory and VirusTotal labels and found that the average F1 score among the three families is 0.978, confirming results consistent with conventional malware classification. Furthermore, through the validation of visualization results, we confirmed the capability to classify similar binary files regardless of the CPU architecture and extracted useful string information for analysis.},
 pages = {1604--1617},
 title = {CPUアーキテクチャに依存しないIoTマルウェア分類木生成手法},
 volume = {65},
 year = {2024}
}