{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00067009","sets":["1164:5352:5656:5934"]},"path":["5934"],"owner":"10","recid":"67009","title":["テキストマイニングのためのドメイン別単語辞書の構築方法"],"pubdate":{"attribute_name":"公開日","attribute_value":"2009-12-10"},"_buckets":{"deposit":"69e35d89-314a-49d0-b33c-41e00b4e92fe"},"_deposit":{"id":"67009","pid":{"type":"depid","value":"67009","revision_id":0},"owners":[10],"status":"published","created_by":10},"item_title":"テキストマイニングのためのドメイン別単語辞書の構築方法","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"テキストマイニングのためのドメイン別単語辞書の構築方法"},{"subitem_title":"A Term Selection Method for Domain-oriented Thesaurus in Text Mining","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2009-12-10","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"株式会社NTTデータ技術開発本部"},{"subitem_text_value":"株式会社NTTデータ技術開発本部"},{"subitem_text_value":"株式会社NTTデータ技術開発本部"},{"subitem_text_value":"東京医科歯科大学難治疾患研究所/ヒュービットジェノミクス株式会社"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"R&D Headquarters, NTT DATA CORPORATION","subitem_text_language":"en"},{"subitem_text_value":"R&D Headquarters, NTT DATA CORPORATION","subitem_text_language":"en"},{"subitem_text_value":"R&D Headquarters, NTT DATA CORPORATION","subitem_text_language":"en"},{"subitem_text_value":"Medical Research Institute, Tokyo Medical and Dental University / Research Institute, HuBit Genomix Inc.","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/67009/files/IPSJ-BIO09019023.pdf"},"date":[{"dateType":"Available","dateValue":"2011-12-10"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-BIO09019023.pdf","filesize":[{"value":"197.0 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"41"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"e39307f6-8b5c-4f09-8408-4b660ef71341","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2009 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"末永, 高志"},{"creatorName":"松永, 務"},{"creatorName":"関根, 純"},{"creatorName":"村松, 正明"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Takashi, Suenaga","creatorNameLang":"en"},{"creatorName":"Tsutomu, Matsunaga","creatorNameLang":"en"},{"creatorName":"Jun, Sekine","creatorNameLang":"en"},{"creatorName":"Masaaki, Muramatsu","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA12055912","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"企業に蓄積される文書情報の増加と共に,その有効活用に向けて,内容理解に基づく知識の集約のニーズが高まっている.知識の集約を行うテキストマイニングでは,一般に文書に出現する単語を基に意見の集計や関連文書の収集が行われている.この集計や収集に用いる単語辞書の構築にあたっては意見や分野 (ドメイン) を代表する単語が選定されることが重要である.本稿では,共起の対となる単語の数と,共起する単語対と分野の関係の二つの着目点を持つ,分野を代表する単語を選定するための単語ランキング方式を提案する.具体的には,多くの単語により詳述される単語を代表的な単語とみなし,分野に起因する統計的な交互作用の効果による単語の組合せの評価を共起する単語について加算した基準を用いる方式である.新たな単語辞書を構築する作業を想定した実データによる評価実験の結果から,提案法によるランキング上位 10% に含まれる代表的な単語の数が,無作為に選定する場合に比べて 57% 増加することがわかった.さらに,単語辞書を構築する際の要件を考察し,提案法は要件を網羅的に満足するものであることを明らかにした.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"There is a need to integrate knowledge extracted from electronic document data, since volume of the data is increasing in each company. The knowledge integration, such as survey of customer opinions and collection of relevant documents, is practically handled based on terms. Therefore, the terms contained on a text mining dictionary should be domain-oriented in the data. In this paper, we propose a term ranking method for selecting representative term by considering a statistical interaction of co-occurrence term pair in a specific domain and co-occurrence term number from a point of view that a representative term is described using a variety of other terms. Experimental results using real medical documents show that our method of term ranking performs good term ranking and representative term number included in our method's rank of top 10% increases by 57% than one in a random sampling. Additionally, our method is shown that it is well-suited for requirement in developing a domain-oriented thesaurus.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告バイオ情報学(BIO)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2009-12-10","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"23","bibliographicVolumeNumber":"2009-BIO-19"}]},"relation_version_is_last":true,"weko_creator_id":"10"},"id":67009,"updated":"2025-01-22T00:46:09.995995+00:00","links":{},"created":"2025-01-18T23:27:37.007888+00:00"}