{"id":74062,"updated":"2025-01-21T21:42:51.875183+00:00","links":{},"created":"2025-01-18T23:31:45.287949+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00074062","sets":["1164:5159:6316:6416"]},"path":["6416"],"owner":"10","recid":"74062","title":["固有表現抽出のための大規模訓練データの自動獲得"],"pubdate":{"attribute_name":"公開日","attribute_value":"2011-05-09"},"_buckets":{"deposit":"567915e6-cc8c-443c-bdb4-969cf2a58faa"},"_deposit":{"id":"74062","pid":{"type":"depid","value":"74062","revision_id":0},"owners":[10],"status":"published","created_by":10},"item_title":"固有表現抽出のための大規模訓練データの自動獲得","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"固有表現抽出のための大規模訓練データの自動獲得"},{"subitem_title":"Automatic Acquisition of Huge Training Data for Named Entity Recognition","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"学生セッション(3)","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2011-05-09","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東京大学大学院情報理工学系研究科"},{"subitem_text_value":"東京大学大学院情報理工学系研究科"},{"subitem_text_value":"東北大学大学院情報科学研究科"},{"subitem_text_value":"マイクロソフトリサーチアジア"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Information Science and Technology, The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information Science and Technology, The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information Sciences, Tohoku University","subitem_text_language":"en"},{"subitem_text_value":"Microsoft Research Asia","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/74062/files/IPSJ-SLP11086011.pdf"},"date":[{"dateType":"Available","dateValue":"2013-05-09"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP11086011.pdf","filesize":[{"value":"592.2 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"d3e6ae9f-3455-4c43-bb9e-f4b3c4860c8f","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2011 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"宇佐美, 佑"},{"creatorName":"Han-CheolCho"},{"creatorName":"岡崎, 直観"},{"creatorName":"辻井, 潤一"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yu, Usami","creatorNameLang":"en"},{"creatorName":"Cho, Han-Cheol","creatorNameLang":"en"},{"creatorName":"Naoaki, Okazaki","creatorNameLang":"en"},{"creatorName":"Jun'ichi, Tsujii","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"固有表現抽出は,質問応答や情報抽出などのアプリケーションにおいて基盤技術となっており,人名,地名,組織名,遺伝子名など,様々な意味クラスで試みられている.高い性能をもつ固有表現抽出器を構築するためには,あらかじめ意味クラスを付与した訓練データを用意し,機械学習アルゴリズムに基づいて構築するのが一般的である.しかしながら,訓練データの整備は,人手での作業に頼っているのが現状である.これでは,様々なドメイン・意味クラスで,広く固有表現抽出を利用しようにも,訓練データの入手性が固有表現抽出器構築のボトルネックになると考えられる.そこで,本研究では,より入手の容易な語彙データベースと生テキストを用いることで,固有表現抽出のための訓練データを人手に依らず自動的に獲得する手法を提案する.語彙データベースに含まれる豊富な情報を利用することで,高適合率な訓練データを自動獲得し,等位構造解析とself-trainingを適用することで,人手で作成した訓練データに迫る,高品質な訓練データを獲得した.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"8","bibliographic_titles":[{"bibliographic_title":"研究報告音声言語情報処理(SLP)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2011-05-09","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"11","bibliographicVolumeNumber":"2011-SLP-86"}]},"relation_version_is_last":true,"weko_creator_id":"10"}}