{"updated":"2025-01-21T23:26:02.968294+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00070540","sets":["934:1022:6082:6178"]},"path":["6178"],"owner":"10","recid":"70540","title":["くだけた表現を高精度に解析するための正規化ルール自動生成手法"],"pubdate":{"attribute_name":"公開日","attribute_value":"2010-09-28"},"_buckets":{"deposit":"4f301471-9272-4867-aa65-404297a4dcf2"},"_deposit":{"id":"70540","pid":{"type":"depid","value":"70540","revision_id":0},"owners":[10],"status":"published","created_by":10},"item_title":"くだけた表現を高精度に解析するための正規化ルール自動生成手法","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"くだけた表現を高精度に解析するための正規化ルール自動生成手法"},{"subitem_title":"Automatic Rule Generation Approach for Morphological Analysis of Peculiar Expressions on Blog Documents","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"研究論文","subitem_subject_scheme":"Other"}]},"item_type_id":"3","publish_date":"2010-09-28","item_3_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"KDDI研究所"},{"subitem_text_value":"KDDI研究所"},{"subitem_text_value":"KDDI研究所"},{"subitem_text_value":"KDDI研究所"}]},"item_3_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"KDDI R&D Laboratories, Inc.","subitem_text_language":"en"},{"subitem_text_value":"KDDI R&D Laboratories, Inc.","subitem_text_language":"en"},{"subitem_text_value":"KDDI R&D Laboratories, Inc.","subitem_text_language":"en"},{"subitem_text_value":"KDDI R&D Laboratories, Inc.","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/70540/files/IPSJ-TOD0303007.pdf"},"date":[{"dateType":"Available","dateValue":"2012-09-28"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-TOD0303007.pdf","filesize":[{"value":"447.6 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"13"},{"tax":["include_tax"],"price":"0","billingrole":"39"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"caef7bc6-5700-47db-bbe4-7fcd8af8cf4a","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2010 by the Information Processing Society of Japan"}]},"item_3_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"池田, 和史"},{"creatorName":"柳原, 正"},{"creatorName":"松本, 一則"},{"creatorName":"滝嶋, 康弘"}],"nameIdentifiers":[{}]}]},"item_3_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Kazushi, Ikeda","creatorNameLang":"en"},{"creatorName":"Tadashi, Yanagihara","creatorNameLang":"en"},{"creatorName":"Kazunori, Matsumoto","creatorNameLang":"en"},{"creatorName":"Yasuhiro, Takishima","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_3_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11464847","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_3_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7799","subitem_source_identifier_type":"ISSN"}]},"item_3_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"ブログ上の文書には口語的な表現や特有の表記などのくだけた表現が多数含まれるため,一般の形態素解析器を用いても十分な解析精度を得ることはできない.くだけた表現は人手により辞書登録されることが一般的であるが,人的コストの大きさや専門的な知識を必要とすることが課題である.本稿ではくだけた表現を正規な表現に修正することで高精度な形態素解析を実現する手法を提案する.提案手法ではくだけた表現の修正候補文字列をくだけた表現の少ない文書から自動的に検索し,修正ルールを生成する.生成した多数の修正ルールから文脈に適した修正ルールを選択的に適用するために,検索結果における修正候補文字列の出現頻度,修正前後の文字列間における編集距離,修正前後の文の形態素解析結果の比較,を用いて修正ルールをスコアリングする手法を合わせて提案する.提案手法と従来手法の性能比較評価実験を行い,各手法における未知語の出現率や単語区切りの正確さ,修正前後の文の意味変化を定量的に評価した.提案手法では従来手法と同程度の単語区切りの正確さを維持しながら,対象文章の未知語出現数を 36.1% 減少させることに成功した.これは従来手法における未知語減少数の 2.5 倍以上である.","subitem_description_type":"Other"}]},"item_3_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"In this paper, we propose an algorithm for reducing the number of unknown words on blog documents by replacing peculiar expressions with formal expressions. Japanese blog documents contain many peculiar expressions regarded as unknown sequences by morphological analyzers. Reducing these unknown sequences improves the accuracy of morphological analysis for blog documents. Manual registration of peculiar expressions to the morphological dictionaries is a conventional solution, which is costly and requires specialized knowledge. In our algorithm, substitution candidates of peculiar expressions are automatically retrieved from formally written documents such as newspapers and stored as substitution rules. For the correct replacement, a substitution rule is selected based on three criteria; its appearance frequency in retrieval process, the edit distance between substituted sequences and the original text, and the estimated accuracy improvements of word segmentation after the substitution. Experimental results show our algorithm reduces the number of unknown words by 36.1%, maintaining the same segmentation accuracy as the conventional methods, which is 2.5 times the reduction rate of the conventional methods.","subitem_description_type":"Other"}]},"item_3_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"77","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌データベース(TOD)"}],"bibliographicPageStart":"68","bibliographicIssueDates":{"bibliographicIssueDate":"2010-09-28","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"3","bibliographicVolumeNumber":"3"}]},"relation_version_is_last":true,"weko_creator_id":"10"},"created":"2025-01-18T23:29:42.125634+00:00","id":70540,"links":{}}