{"links":{},"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00040118","sets":["1164:3500:3510:3512"]},"path":["3512"],"owner":"1","recid":"40118","title":["圧縮性に注目した文書の関係分析手法"],"pubdate":{"attribute_name":"公開日","attribute_value":"2006-09-12"},"_buckets":{"deposit":"710a5406-44a4-4ffb-bdda-4b0e4ee3c3dd"},"_deposit":{"id":"40118","pid":{"type":"depid","value":"40118","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"圧縮性に注目した文書の関係分析手法","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"圧縮性に注目した文書の関係分析手法"},{"subitem_title":"Document Relation Analysis by Data Compression ","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2006-09-12","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"電気通信大学大学院情報システム学研究科"},{"subitem_text_value":"電気通信大学大学院情報システム学研究科"},{"subitem_text_value":"電気通信大学大学院情報システム学研究科"},{"subitem_text_value":"電気通信大学大学院情報システム学研究科"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Information Systems, University of Electro-Communications","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information Systems, University of Electro-Communications","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information Systems, University of Electro-Communications","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information Systems, University of Electro-Communications","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/40118/files/IPSJ-FI06084008.pdf"},"date":[{"dateType":"Available","dateValue":"2008-09-12"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-FI06084008.pdf","filesize":[{"value":"706.3 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"39"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"d3e9112e-652f-4ab2-bc4b-895cdaa782d5","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2006 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"松崎, 大輔"},{"creatorName":"渡辺, 俊典"},{"creatorName":"古賀, 久志"},{"creatorName":"張諾"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Daisuke, MATSUZAKI","creatorNameLang":"en"},{"creatorName":"Toshinori, WATANABE","creatorNameLang":"en"},{"creatorName":"Hisashi, KOGA","creatorNameLang":"en"},{"creatorName":"Nuo, ZHANG","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10114171","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"文書間の関係を分析する手法として,辞書ベースの形態素解析を用いて,語の出現頻度による類似性や,キーワード抽出を用いる方法が幅広く利用されている.これらの伝統的手法は,日々新しい単語が生まれるインターネットなどの環境には万全とはいえない.その理由は,これらの伝統的解析手法の前提となる,辞書に登録されていない未知語が頻繁に出現するためである.本稿では文書の圧縮率に注目し,人手による解析辞書の事前整備が不要な,文書の関係分析手法を提案する.提案手法について実験を行いその有効性を検討する.キーワード 文書分析,クラスタリング,データ圧縮","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Dictionary-based morphological analysis is one of the main techniques for document analysis. It is usually used for keyword extraction and classification of similar words. Dictionary-based methods are weak for such environment as the Internet where new words appear that are not contained in the dictionary. In this study, we propose a new document relation analysis method based on the document’s compressibility, requiring no dictionary. The effectiveness of our method is examined through some experiments. Key words  Document analysis, Clustering, Data compression ","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"56","bibliographic_titles":[{"bibliographic_title":"情報処理学会研究報告情報学基礎(FI)"}],"bibliographicPageStart":"51","bibliographicIssueDates":{"bibliographicIssueDate":"2006-09-12","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"94(2006-FI-084)","bibliographicVolumeNumber":"2006"}]},"relation_version_is_last":true,"weko_creator_id":"1"},"created":"2025-01-18T23:07:18.515682+00:00","updated":"2025-01-22T12:35:12.575464+00:00","id":40118}