{"updated":"2025-01-22T01:02:23.370562+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00066471","sets":["581:582:5904"]},"path":["5904"],"owner":"11","recid":"66471","title":["ベイジアンフィルタにおける言語知識を用いないトークン抽出方式の提案と評価"],"pubdate":{"attribute_name":"公開日","attribute_value":"2009-09-15"},"_buckets":{"deposit":"d5bd837d-f81e-4ff6-9ad6-79bf64621bfd"},"_deposit":{"id":"66471","pid":{"type":"depid","value":"66471","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"ベイジアンフィルタにおける言語知識を用いないトークン抽出方式の提案と評価","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"ベイジアンフィルタにおける言語知識を用いないトークン抽出方式の提案と評価"},{"subitem_title":"Proposal and Evaluation of Improvements for Language-independent Tokenization in Bayesian Spam E-mail Filters","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"特集:社会を活性化するコンピュータセキュリティ技術","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2009-09-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"青山学院大学大学院理工学研究科理工学専攻/現在,ソニー株式会社"},{"subitem_text_value":"青山学院大学理工学部情報テクノロジー学科"},{"subitem_text_value":"青山学院大学理工学部情報テクノロジー学科"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Science and Engineering, Aoyama Gakuin University / Presently with Sony Corporation","subitem_text_language":"en"},{"subitem_text_value":"College of Science and Engineering, Aoyama Gakuin University","subitem_text_language":"en"},{"subitem_text_value":"College of Science and Engineering, Aoyama Gakuin University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/66471/files/IPSJ-JNL5009022.pdf"},"date":[{"dateType":"Available","dateValue":"2011-09-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL5009022.pdf","filesize":[{"value":"596.5 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"67d1e733-0395-41d4-aa2c-c4e90943a2d0","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2009 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"藤田, 拓也"},{"creatorName":"松本, 章代"},{"creatorName":"テュールストマーティンヤコブ"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Takuya, Fujita","creatorNameLang":"en"},{"creatorName":"Akiyo, Matsumoto","creatorNameLang":"en"},{"creatorName":"Martin, J.Durst","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"近年,社会問題ともなっているスパムメールに対抗するために,ベイズ理論を応用したスパムメールフィルタであるベイジアンフィルタが脚光を浴びている.しかし,社会環境のグローバル化により,多言語環境においても利用可能なスパムメールフィルタが求められている現在において,言語や文字コードの知識を用いないベイジアンフィルタは十分に検討されたとはいえない状況である.そこで本論文では,ベイジアンフィルタに最適な,言語知識を用いないトークン抽出方式の提案と評価を行う.具体的には,電子メールの構造に基づいたトークンへの属性付与や,適切なトークン長のバイト単位N-gramによって,実用的な判別精度を持ったスパムメールフィルタが実現できることを明らかにする.また,言語の異なる複数のメールコーパスを用いた実験によって,言語や文字コードの知識を用いる既存手法との比較を行い,提案手法の有効性を示す.","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Recently, Bayesian filters have attracted attention as a means to combat spam E-mail, which has become a social problem. However, not enough attention has been given to Bayesian filters that do not use knowledge about language or character encoding. This is an important requirement in today's multilingual society. This paper proposes and evaluates methods of languageindependent token extraction optimized for Bayesian filters. We use byte-level N-gram tokens of appropriate length and assign attributes to these tokens based on E-mail structure. This leads to a spam filter with a discrimination accuracy high enough for use in practice. We also compare our proposed methods with existing methods that use knowledge about the language or character encoding using several E-mail corpora with different languages, and show the effectiveness of the newly proposed methods.","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"2192","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicPageStart":"2182","bibliographicIssueDates":{"bibliographicIssueDate":"2009-09-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"9","bibliographicVolumeNumber":"50"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"created":"2025-01-18T23:27:13.461048+00:00","id":66471,"links":{}}