{"updated":"2025-01-22T08:07:08.637228+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00049257","sets":["1164:4179:4278:4280"]},"path":["4280"],"owner":"1","recid":"49257","title":["口語的表現を含む日本語文の形態素解析の実現と評価"],"pubdate":{"attribute_name":"公開日","attribute_value":"1994-09-15"},"_buckets":{"deposit":"e64f3181-9452-476b-9864-4aa1dcedcb83"},"_deposit":{"id":"49257","pid":{"type":"depid","value":"49257","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"口語的表現を含む日本語文の形態素解析の実現と評価","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"口語的表現を含む日本語文の形態素解析の実現と評価"},{"subitem_title":"Implementation and evaluation of a morphological analysis method for colloquial Japanese text","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"1994-09-15","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"日本電気(株)情報メディア研究所"},{"subitem_text_value":"日本電気(株)情報メディア研究所"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Information Technology Research Labs, NEC Corp.","subitem_text_language":"en"},{"subitem_text_value":"Information Technology Research Labs, NEC Corp.","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/49257/files/IPSJ-NL94103014.pdf"},"date":[{"dateType":"Available","dateValue":"1996-09-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-NL94103014.pdf","filesize":[{"value":"1.4 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"23"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"09f68920-7453-4b7d-9b40-05d756061f01","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 1994 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"竹元, 義美"},{"creatorName":"福島, 俊一"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yoshikazu, Takemoto","creatorNameLang":"en"},{"creatorName":"Toshikazu, Fukushima","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10115061","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"日本語文章の口語的表現に対応した形態素解析手法を提案し、その評価結果を報告する。広い分野のテキスト処理を想定した場合に口語的表現の形態素解析は重要であるものの、その精度は十分とは言えなかった。本稿では、口語的表現を正しく形態素解析するために2つの手法を示す。1つは、口語特有の言い回しを辞書登録すること、もう1つは、通常は平仮名表記する語を意図的に片仮名表記するなど、表記を変えた強調表現を通常の表記に直して辞書検索することである(口語置換検索処理)。これらの手法を実現した結果、口語的表現を多く含むテキストの文節区切り精度が1.8%向上し、テキストのタイプによらず安定した高い精度を得ることができた。辞書登録では、話し言葉特有の語の登録によって、文節区切りに失敗していた話し言葉の88%を正しく解析できた。口語置換検索処理では、形態素解析に失敗していた意図的な片仮名表記の75%、強調表現で特殊文字を含む単語の79%を救済できた。","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"This paper presents a new morphological analysis method for colloquial Japanese text, and describes its evaluation results. To enlarge application for natural language processing, it is necessary to deal with not only written language as before, but also colloquial language. This paper shows two techniques as the new method. One is to enter words peculiar to spoken language in dictionaries. The other is to replace words written in Katakana or special characters with usual writing and search through dictionaries for them. The two techniques can improve Bunsetsu-segmentation accuracy by 1.8% over a conventional method for text including colloquial expressions, and also accomplish stable accuracy for various types of text. The first technique can remove 88% of Bunsetsu-segmentation failures caused by spoken words. The second technique can remove 75% of failures caused by words written in Katakana expressly for emphasis, and 79% of failures caused by words written in special characters expressly for emphasis.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"112","bibliographic_titles":[{"bibliographic_title":"情報処理学会研究報告自然言語処理(NL)"}],"bibliographicPageStart":"105","bibliographicIssueDates":{"bibliographicIssueDate":"1994-09-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"77(1994-NL-103)","bibliographicVolumeNumber":"1994"}]},"relation_version_is_last":true,"weko_creator_id":"1"},"created":"2025-01-18T23:14:18.770818+00:00","id":49257,"links":{}}