{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00233828","sets":["934:1022:11484:11595"]},"path":["11595"],"owner":"44499","recid":"233828","title":["Automatic Stopword Generation Based on Attention for Document Classification Using Neural Networks"],"pubdate":{"attribute_name":"公開日","attribute_value":"2024-04-23"},"_buckets":{"deposit":"25b87ffe-980d-490f-a395-b0bbd3d432b6"},"_deposit":{"id":"233828","pid":{"type":"depid","value":"233828","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"Automatic Stopword Generation Based on Attention for Document Classification Using Neural Networks","author_link":["636039","636040","636038","636037"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Automatic Stopword Generation Based on Attention for Document Classification Using Neural Networks"},{"subitem_title":"Automatic Stopword Generation Based on Attention for Document Classification Using Neural Networks","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[研究論文] stopwords, attention, BERT, neural network, text classification, machine learning, natural language processing","subitem_subject_scheme":"Other"}]},"item_type_id":"3","publish_date":"2024-04-23","item_3_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"Gifu University"},{"subitem_text_value":"Gifu University"}]},"item_3_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Gifu University","subitem_text_language":"en"},{"subitem_text_value":"Gifu University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/233828/files/IPSJ-TOD1702007.pdf","label":"IPSJ-TOD1702007.pdf"},"date":[{"dateType":"Available","dateValue":"2026-04-23"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-TOD1702007.pdf","filesize":[{"value":"793.0 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"5"},{"tax":["include_tax"],"price":"0","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"13"},{"tax":["include_tax"],"price":"0","billingrole":"39"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"be03215d-0f4a-40c7-b3f7-1c6c81cc9351","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2024 by the Information Processing Society of Japan"}]},"item_3_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yuki, Kuwabara"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yu, Suzuki"}],"nameIdentifiers":[{}]}]},"item_3_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yuki, Kuwabara","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yu, Suzuki","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_3_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11464847","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_3_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7799","subitem_source_identifier_type":"ISSN"}]},"item_3_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"Stopwords are generally used to improve the accuracy of document classification and retrieval. We believe that setting appropriate stopwords improves classification accuracy. However, in our preliminary experiments, in document classification tasks using BERT, existing stopword lists are not effective for improving classification accuracy. To solve this problem, we construct a method for generating stopwords using the attention mechanism of the classifiers. In this method, words with high attention in misclassified input documents and low attention in correctly classified documents are treated as stopwords. The system probabilistically removes stopwords. The system automatically sets the probability of each word in input documents being a stopword when it builds the classification model. We conduct experiments to confirm effectiveness of our stopword generation method. Our experimental results show that there are cases using stopwords generated by our method that improve the classification accuracy. Three of the six classification tasks tested in this study show significant differences in accuracy improvement.\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.32(2024) (online)\n------------------------------","subitem_description_type":"Other"}]},"item_3_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Stopwords are generally used to improve the accuracy of document classification and retrieval. We believe that setting appropriate stopwords improves classification accuracy. However, in our preliminary experiments, in document classification tasks using BERT, existing stopword lists are not effective for improving classification accuracy. To solve this problem, we construct a method for generating stopwords using the attention mechanism of the classifiers. In this method, words with high attention in misclassified input documents and low attention in correctly classified documents are treated as stopwords. The system probabilistically removes stopwords. The system automatically sets the probability of each word in input documents being a stopword when it builds the classification model. We conduct experiments to confirm effectiveness of our stopword generation method. Our experimental results show that there are cases using stopwords generated by our method that improve the classification accuracy. Three of the six classification tasks tested in this study show significant differences in accuracy improvement.\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.32(2024) (online)\n------------------------------","subitem_description_type":"Other"}]},"item_3_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌データベース(TOD)"}],"bibliographicIssueDates":{"bibliographicIssueDate":"2024-04-23","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"2","bibliographicVolumeNumber":"17"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"id":233828,"updated":"2025-01-19T09:58:13.087408+00:00","links":{},"created":"2025-01-19T01:35:25.956068+00:00"}