@article{oai:ipsj.ixsq.nii.ac.jp:00214348,
 author = {Tomoaki, Mimoto and Masayuki, Hashimoto and Shinsaku, Kiyomoto and Koji, Kitamura and Atsuko, Miyaji and Tomoaki, Mimoto and Masayuki, Hashimoto and Shinsaku, Kiyomoto and Koji, Kitamura and Atsuko, Miyaji},
 issue = {12},
 journal = {情報処理学会論文誌},
 month = {Dec},
 note = {A huge number of documents such as news articles, public reports, and personal essays have been released on websites and social media. Once documents containing privacy-sensitive information are published, the risk of privacy breaches increases, thus requiring very careful review of documents prior to publication. In many cases, human experts redact or sanitize documents before publishing them; however, this approach can be inefficient with regard to cost and accuracy. Furthermore, such measures do not guarantee that critical privacy risks are eliminated from the documents. In this paper, we present a generalized adversary model and apply it to document data. This work devises an attack algorithm for documents using a web search engine, and then proposes a privacy-preserving framework against the attacks. We evaluate the privacy risks for actual accident reports from schools and court documents. In experiments using these reports, we show that human-sanitized documents still contain privacy risks and that our proposed approach can contribute to risk reduction.
------------------------------
This is a preprint of an article intended for publication Journal of
Information Processing(JIP). This preprint should not be cited. This
article should be cited as: Journal of Information Processing Vol.29(2021) (online)
DOI http://dx.doi.org/10.2197/ipsjjip.29.778
------------------------------, A huge number of documents such as news articles, public reports, and personal essays have been released on websites and social media. Once documents containing privacy-sensitive information are published, the risk of privacy breaches increases, thus requiring very careful review of documents prior to publication. In many cases, human experts redact or sanitize documents before publishing them; however, this approach can be inefficient with regard to cost and accuracy. Furthermore, such measures do not guarantee that critical privacy risks are eliminated from the documents. In this paper, we present a generalized adversary model and apply it to document data. This work devises an attack algorithm for documents using a web search engine, and then proposes a privacy-preserving framework against the attacks. We evaluate the privacy risks for actual accident reports from schools and court documents. In experiments using these reports, we show that human-sanitized documents still contain privacy risks and that our proposed approach can contribute to risk reduction.
------------------------------
This is a preprint of an article intended for publication Journal of
Information Processing(JIP). This preprint should not be cited. This
article should be cited as: Journal of Information Processing Vol.29(2021) (online)
DOI http://dx.doi.org/10.2197/ipsjjip.29.778
------------------------------},
 title = {Privacy Risk of Document Data and a Countermeasure Framework},
 volume = {62},
 year = {2021}
}