{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00047835","sets":["1164:4179:4187:4191"]},"path":["4191"],"owner":"1","recid":"47835","title":["HTMLの木構造を利用した条件付確率場による固有表現分類: Wikipedia からのシソーラス半自動構築"],"pubdate":{"attribute_name":"公開日","attribute_value":"2007-05-25"},"_buckets":{"deposit":"6087c1f3-5f58-49c7-932b-0cf186d243d3"},"_deposit":{"id":"47835","pid":{"type":"depid","value":"47835","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"HTMLの木構造を利用した条件付確率場による固有表現分類: Wikipedia からのシソーラス半自動構築","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"HTMLの木構造を利用した条件付確率場による固有表現分類: Wikipedia からのシソーラス半自動構築"},{"subitem_title":"Named Entity Categorization Using Conditional Random Fields on HTML Tree Structure: Semi-Automatic Thesaurus Construction from Wikipedia","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2007-05-25","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"奈良先端科学技術大学院大学情報科学研究科"},{"subitem_text_value":"奈良先端科学技術大学院大学情報科学研究科"},{"subitem_text_value":"奈良先端科学技術大学院大学情報科学研究科"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Information Science, Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information Science, Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information Science, Nara Institute of Science and Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/47835/files/IPSJ-NL07179013.pdf"},"date":[{"dateType":"Available","dateValue":"2009-05-25"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-NL07179013.pdf","filesize":[{"value":"602.8 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"23"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"300e22ea-80f8-45fb-8ddd-3be0ee0dea7d","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2007 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"渡邉, 陽太郎"},{"creatorName":"浅原, 正幸"},{"creatorName":"松本, 裕治"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yotaro, Watanabe","creatorNameLang":"en"},{"creatorName":"Masayuki, Asahara","creatorNameLang":"en"},{"creatorName":"Yuji, Matsumoto","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10115061","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本稿では，Wikipedia 内に出現する固有表現を獲得し，精度よく分類する手法を提案する．Wikipediaの記事に出現するアンカーテキストの単語および句は，リンク先の記事に語釈が記述されている．このWikipedia の特性を用いて，我々は，固有表現の分類問題を固有表現を表すアンカーテキストに対するラベル付与問題として定式化する．まず，アンカーテキストをノードとして定義されるグラフを構成する．次に，グラフにHTML の構造を取り入れるため，HTML のDOM 構造に基づく3 種類のエッジを導入する．このようにして構成したグラフのノードに対するラベル付与を教師あり学習器であるConditional Random Fields (CRFs) を用いて行う．しかし，構成したグラフは閉路を含むため，CRFs の正確な演算を行うことは計算量が大きく困難である．そこで，Tree-based Reparameterization (TRP) を用いて近似的に演算をおこなう手法を導入する．実施した評価実験において，提案手法が２つ組に対するSupport Vector Machines の順次適用による手法と比較して高い精度で固有表現の分類ができたことを報告する．","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"This paper presents a method for categorizing named entities in Wikipedia. In Wikipedia, an anchor text is glossed in a linked HTML text. We formalize named entity categorization as a task of catego-rizing anchor texts with linked HTML texts which glosses a named entity. Using this representation,we introduce a graph structure in which anchor texts are regarded as nodes. In order to incorporate HTML structure on the graph, three types of cliques are de ned based on the HTML DOM structure.We propose a method with Conditional Random Fields (CRFs) to categorize the nodes on the graph.Since the de ned graph include cycles, the exact inference of CRFs is computationally expensive. We introduce an approximate inference method using Tree-based Reparameterization (TRP) to reduce computational cost. Experimental results show that the proposed method outperforms a baseline method that uses Support Vector Machines.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"78","bibliographic_titles":[{"bibliographic_title":"情報処理学会研究報告自然言語処理（NL）"}],"bibliographicPageStart":"73","bibliographicIssueDates":{"bibliographicIssueDate":"2007-05-25","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"47(2007-NL-179)","bibliographicVolumeNumber":"2007"}]},"relation_version_is_last":true,"weko_creator_id":"1"},"id":47835,"updated":"2025-01-22T08:48:35.618878+00:00","links":{},"created":"2025-01-18T23:13:13.196265+00:00"}