{"created":"2025-01-18T23:09:37.954019+00:00","updated":"2025-01-22T11:06:19.728731+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00043161","sets":["1164:3782:3822:3827"]},"path":["3827"],"owner":"1","recid":"43161","title":["文字列出現頻度比較による情報源間の類似性判定"],"pubdate":{"attribute_name":"公開日","attribute_value":"2002-03-15"},"_buckets":{"deposit":"7f655fa3-6070-48c8-b576-3cfbf5178441"},"_deposit":{"id":"43161","pid":{"type":"depid","value":"43161","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"文字列出現頻度比較による情報源間の類似性判定","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"文字列出現頻度比較による情報源間の類似性判定"},{"subitem_title":"Measuring Similarity among Information Sources by Comparing String Frequency Distributions","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2002-03-15","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"NTT未来ねっと研究所"},{"subitem_text_value":"NTT未来ねっと研究所"},{"subitem_text_value":"NTT未来ねっと研究所"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"NTT Network Innovation Laboratories","subitem_text_language":"en"},{"subitem_text_value":"NTT Network Innovation Laboratories","subitem_text_language":"en"},{"subitem_text_value":"NTT Network Innovation Laboratories","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/43161/files/IPSJ-DD01032016.pdf"},"date":[{"dateType":"Available","dateValue":"2004-03-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-DD01032016.pdf","filesize":[{"value":"278.3 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"32"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"186ba5e5-ba46-48fd-9098-11658a7aa387","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2002 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"佐藤進也"},{"creatorName":"原田, 昌紀"},{"creatorName":"風間, 一洋"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Shin, -YaSato","creatorNameLang":"en"},{"creatorName":"Masanori, Harada","creatorNameLang":"en"},{"creatorName":"Kazuhiro, Kazama","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10539261","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"Webサーバなどの情報源が持つリソース群中の文字列出現頻度を比較することで情報源間の類似性を判定する手法を提案する．これは，文書中の単語の使用頻度などを筆致を表す特徴量とし，その一致度から著者の同一性を判定する著者推定の手法を応用したものである．本論文では，本手法を著者推定の一方法から導く過程を示す．さらに，本手法から導かれる情報源間の関係と，Webディレクトリにおけるカテゴリの階層構造から導かれる情報源間の関係との整合性を調べ，本手法の妥当性を検証する．また，応用例として情報源の特徴語抽出について述べる．","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"We propose a novel method for measuring similarity among information sources, such as web servers, by comparing distributions of string occurrence frequency in their resources. This approach is an analogue of the literary detective, which is to identify an author by comparing statistical characteristics of documents (e.g., word frequency distributions) that reflect authrs' writing styles. In this paper, we show how we have developed and validated the method. Similarity measured with this method is compared with that of derived from a Web directory service where information sources are classified and hierarchically arranged. We also describe a way to apply the similarity measuring method to selecting feature terms of information sources.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"126","bibliographic_titles":[{"bibliographic_title":"情報処理学会研究報告デジタルドキュメント（DD）"}],"bibliographicPageStart":"119","bibliographicIssueDates":{"bibliographicIssueDate":"2002-03-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"28(2001-DD-032)","bibliographicVolumeNumber":"2002"}]},"relation_version_is_last":true,"weko_creator_id":"1"},"id":43161,"links":{}}