@article{oai:ipsj.ixsq.nii.ac.jp:00206914,
author = {Otgonpurev, Mendsaikhan and Hirokazu, Hasegawa and Yukiko, Yamaguchi and Hajime, Shimada and Enkhbold, Bataa and Otgonpurev, Mendsaikhan and Hirokazu, Hasegawa and Yukiko, Yamaguchi and Hajime, Shimada and Enkhbold, Bataa},
issue = {9},
journal = {情報処理学会論文誌},
month = {Sep},
note = {Given the sheer amount of digital texts publicly available on the Internet, it becomes more challenging for security analysts to identify cyber threat related content. In this research, we proposed to build an autonomous system to identify cyber threat information from publicly available information sources. We examined different language models to utilize as a cybersecurity-specific filter for the proposed system. Using the domain-specific training data, we trained Doc2Vec and BERT models and compared their performance. According to our evaluation, the BERT-based Natural Language Filter is able to identify and classify cybersecurity-specific natural language text with 90% accuracy.
------------------------------
This is a preprint of an article intended for publication Journal of
Information Processing(JIP). This preprint should not be cited. This
article should be cited as: Journal of Information Processing Vol.28(2020) (online)
DOI http://dx.doi.org/10.2197/ipsjjip.28.623
------------------------------, Given the sheer amount of digital texts publicly available on the Internet, it becomes more challenging for security analysts to identify cyber threat related content. In this research, we proposed to build an autonomous system to identify cyber threat information from publicly available information sources. We examined different language models to utilize as a cybersecurity-specific filter for the proposed system. Using the domain-specific training data, we trained Doc2Vec and BERT models and compared their performance. According to our evaluation, the BERT-based Natural Language Filter is able to identify and classify cybersecurity-specific natural language text with 90% accuracy.
------------------------------
This is a preprint of an article intended for publication Journal of
Information Processing(JIP). This preprint should not be cited. This
article should be cited as: Journal of Information Processing Vol.28(2020) (online)
DOI http://dx.doi.org/10.2197/ipsjjip.28.623
------------------------------},
title = {Identification of Cybersecurity Specific Content Using Different Language Models},
volume = {61},
year = {2020}
}