{"created":"2025-01-18T23:13:26.289081+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00048117","sets":["1164:4179:4208:4210"]},"path":["4210"],"owner":"1","recid":"48117","title":["最大マージン原理にもとづく多重トピック文書の自動分類"],"pubdate":{"attribute_name":"公開日","attribute_value":"2004-09-16"},"_buckets":{"deposit":"6d6fbb36-25e7-4516-8ec3-8ed33d03e41d"},"_deposit":{"id":"48117","pid":{"type":"depid","value":"48117","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"最大マージン原理にもとづく多重トピック文書の自動分類","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"最大マージン原理にもとづく多重トピック文書の自動分類"},{"subitem_title":"Maximum Margin Labeling for Multi - Topic Text Categorization","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2004-09-16","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"NTTコミュニケーション科学基礎研究所"},{"subitem_text_value":"NTTコミュニケーション科学基礎研究所"},{"subitem_text_value":"NTTコミュニケーション科学基礎研究所"},{"subitem_text_value":"NTTコミュニケーション科学基礎研究所"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"NTT Communication Science Laboratories","subitem_text_language":"en"},{"subitem_text_value":"NTT Communication Science Laboratories","subitem_text_language":"en"},{"subitem_text_value":"NTT Communication Science Laboratories","subitem_text_language":"en"},{"subitem_text_value":"NTT Communication Science Laboratories","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/48117/files/IPSJ-NL04163008.pdf"},"date":[{"dateType":"Available","dateValue":"2006-09-16"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-NL04163008.pdf","filesize":[{"value":"241.3 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"23"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"426a1529-d86b-4b29-8d0d-c31c7fd447d3","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2004 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"賀沢, 秀人"},{"creatorName":"泉谷知範"},{"creatorName":"平, 博順"},{"creatorName":"前田, 英作"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Hideto, Kazawa","creatorNameLang":"en"},{"creatorName":"Tomonori, Izumitani","creatorNameLang":"en"},{"creatorName":"Hirotoshi, Taira","creatorNameLang":"en"},{"creatorName":"Eisaku, Maeda","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10115061","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本論文では，与えられたトピック集合の中から文書が該当するトピックを全て選びだす多重トピック文書の自動分類にたいして，最大マージンラベリング法と呼ぶ新しい学習手法を提案する．文書多重ラベリングにおいては，トピックの任意の組合せ（ラベル）を独立したクラスとみなした多クラス分類学習を行うことにより，より精度の高いラベリングが実現できると期待される．しかし，文書分類に代表される多重ラベリングの実問題においては，ラベルあたりのサンプル数の減少にともなう過学習が問題となり，こうした試みは実際にはなされてこなかった．提案手法では，各ラベルを高次元空間に埋め込んだ後にその空間でのマージンを最大化することにより，過学習を押え精度の良い多重ラベリングを実現する．実際に，Web文書の文書多重ラベリングを対象として，Parametric Mixture Model BoosTexter，SVM 最近傍法といった様々な種類の従来手法との比較実験をおこない，提案手法がより高精度なラベリングをより少ない訓練データで実現できることを実証した．","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"In this paper, we address the problem of learining in multi-category document labeling. The goal of multi-category document labeling is to assign a document all the relevant categories from a given category set. The proposed learning method, Maximal Margin Labeling (MML), treats multi-category labels, as well as single-category labels, as independent classes and learns a kind of multi-class classifier on the multi-class problem. Since the number of multi-category labels are quite large in general, data sparseness becomes a serious challenge to MML. Thus we utilize a maximal margin principle in a high-dimensional space, into which all possible labels are embedded, to avoid over-fitting. MML is compared with other labeling methods, Parametric Mixture Model, BoosTexter, Support Vector Machines, and k nearest neighbors, using a collection of multi-category labeled Web pages. The results show that MML outperforms other methods and its high performace is apparent even with a small number of training samples.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"60","bibliographic_titles":[{"bibliographic_title":"情報処理学会研究報告自然言語処理（NL）"}],"bibliographicPageStart":"53","bibliographicIssueDates":{"bibliographicIssueDate":"2004-09-16","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"93(2004-NL-163)","bibliographicVolumeNumber":"2004"}]},"relation_version_is_last":true,"weko_creator_id":"1"},"links":{},"id":48117,"updated":"2025-01-22T08:39:36.287354+00:00"}