{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00089012","sets":["6164:6165:7006:7012"]},"path":["7012"],"owner":"11","recid":"89012","title":["WWW文書における属性情報抽出の試み"],"pubdate":{"attribute_name":"公開日","attribute_value":"1996-10-23"},"_buckets":{"deposit":"ce067343-5fea-4108-9e05-4d44c0fef7a0"},"_deposit":{"id":"89012","pid":{"type":"depid","value":"89012","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"WWW文書における属性情報抽出の試み","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"WWW文書における属性情報抽出の試み"},{"subitem_title":"Experimental Report of automatic meta-info extraction from WWW documents","subitem_title_language":"en"}]},"item_type_id":"18","publish_date":"1996-10-23","item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_18_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"NTTソフトウェア研究所"},{"subitem_text_value":"shimizu@ntt-20.ntt.jp"},{"subitem_text_value":"NTTソフトウェア研究所"},{"subitem_text_value":"NTTソフトウェア研究所"}]},"item_18_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"NTT Sortware Laboralories","subitem_text_language":"en"},{"subitem_text_value":"NTT Sortware Laboralories","subitem_text_language":"en"},{"subitem_text_value":"NTT Sortware Laboralories","subitem_text_language":"en"},{"subitem_text_value":"NTT Sortware Laboralories","subitem_text_language":"en"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/89012/files/IPSJ-DPSWS1996068.pdf"},"date":[{"dateType":"Available","dateValue":"1998-10-23"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-DPSWS1996068.pdf","filesize":[{"value":"453.0 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"b1bd3361-5933-4c9c-a7ce-622ef1644f5b","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 1996 by the Information Processing Society of Japan"}]},"item_18_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"清水, 奨"},{"creatorName":"神林, 隆"},{"creatorName":"佐藤, 進也"},{"creatorName":"ポール, フランシス"}],"nameIdentifiers":[{}]}]},"item_18_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Susumu, Shimizu","creatorNameLang":"en"},{"creatorName":"Takashi, Kambayashi","creatorNameLang":"en"},{"creatorName":"Shin-ya, Sato","creatorNameLang":"en"},{"creatorName":"Paul, Francis","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_5794","resourcetype":"conference paper"}]},"item_18_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"WWW上で提供される膨大な情報に対して、検索支援のニーズが高まっている。しかし、現在の検索システムの多くはそれぞれが収集した情報を個別にデータベース化しており、互換性は考えられていない。このため検索システムの数だけ収集ロボットが作られるといった非効率性が指摘されている。複数の検索システムの協調動作を実現するためには、収集した情報が持つさまざまな属性情報(文書タイプ、言語その他)を取り出し、共通に利用できるようにすることが重要である。本稿では、WWWで提供される文書を対象とし、属性情報を抽出するためのフレームワークについて述べる。属性の抽出を文書タイプの識別、言語の識別をはじめとする幾つかの工程にわけ、著者らが開発中の検索システムIngridにおける実装について述べる。また各々の工程における技術的な問題点と解決のためのアプローチを示す。","subitem_description_type":"Other"}]},"item_18_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"505","bibliographic_titles":[{"bibliographic_title":"マルチメディア通信と分散処理ワークショップ論文集"}],"bibliographicPageStart":"499","bibliographicIssueDates":{"bibliographicIssueDate":"1996-10-23","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"1","bibliographicVolumeNumber":"1996"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"id":89012,"updated":"2025-01-21T16:46:23.916895+00:00","links":{},"created":"2025-01-18T23:38:56.723001+00:00"}