{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00087035","sets":["6164:6165:6630:6922"]},"path":["6922"],"owner":"11","recid":"87035","title":["未整備の歴史的文献への濁点の自動付与アプリケーション"],"pubdate":{"attribute_name":"公開日","attribute_value":"2012-11-10"},"_buckets":{"deposit":"773e114e-8a34-4b6b-9ccb-231c984c02fb"},"_deposit":{"id":"87035","pid":{"type":"depid","value":"87035","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"未整備の歴史的文献への濁点の自動付与アプリケーション","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"未整備の歴史的文献への濁点の自動付与アプリケーション"},{"subitem_title":"Application of Automatic Labeling of Dakuten for Raw Historical Text","subitem_title_language":"en"}]},"item_type_id":"18","publish_date":"2012-11-10","item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_18_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学／人間文化研究機構国立国語研究所"},{"subitem_text_value":"奈良先端科学技術大学院大学"}]},"item_18_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology/National Institute for Japanese Language and Linguistics","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/87035/files/IPSJ-S-CH2012025.pdf"},"date":[{"dateType":"Available","dateValue":"2013-11-10"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-S-CH2012025.pdf","filesize":[{"value":"401.6 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"24"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"ba6a6e9c-6e07-4e69-95f6-e375c49eee57","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2012 by the Information Processing Society of Japan"}]},"item_18_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"岡, 照晃"},{"creatorName":"小町, 守"},{"creatorName":"小木曽, 智信"},{"creatorName":"松本, 裕治"}],"nameIdentifiers":[{}]}]},"item_18_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Teruaki, Oka","creatorNameLang":"en"},{"creatorName":"Mamoru, Komachi","creatorNameLang":"en"},{"creatorName":"Toshinobu, Ogiso","creatorNameLang":"en"},{"creatorName":"Yuji, Matsumoto","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_5794","resourcetype":"conference paper"}]},"item_18_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"生の歴史的文献の中には，濁点が期待されるのに濁点の付いていない，濁点無表記の文字が多く含まれている．濁点無表記文字は可読性・検索性を下げるため，歴史コーパス整備の際には濁点付与が行われる．しかし，濁点付与は専門家にしか行えないため，作業人員の確保が大きな課題となっている．また，作業対象が膨大であるため，作業を完了するまでにも時間がかかる．そこで，我々は統計的機械学習を使った濁点自動付与アプリケーションを開発した．このアプリケーションは太陽コーパスにおける濁点の統計データに基づき，濁点無表記の文字を含んだ近代文語論説文へ自動で濁点付与を行うことができる．本アプリケーションを用い，近代の雑誌「国民之友」に適合率約96%，再現率約98%の濁点付与を達成した．本論文では，アプリケーションに実装した手法と，アプリケーションの仕様について概説する．","subitem_description_type":"Other"}]},"item_18_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Raw historical texts often include mark-lacking characters, which lack compulsory dakuten. Since mark-lacking characters degrade readability and retrievability, dakutens are annotated when creating a historical corpus. However, since only experts can perform the labeling procedure for historical texts, getting annotators is a large challenge. Also, it is time-consuming to conduct annotation for large-scale historical materials. Therefore, we developed an application of automatic labeling of dakuten for marklacking characters by using a machine learning approach. Our application labels dakuten automatically for raw texts written in near-modern literary style of Japanese based on the statistics of dakuten in Taiyo corpus. We used this application, and achieved about 96% precision and 98% recall on a near-modern Japanese magazine, Kokumin-no-Tomo. In this paper, we abstract our implemented method and specification of the application.","subitem_description_type":"Other"}]},"item_18_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"198","bibliographic_titles":[{"bibliographic_title":"じんもんこん2012論文集"}],"bibliographicPageStart":"191","bibliographicIssueDates":{"bibliographicIssueDate":"2012-11-10","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"7","bibliographicVolumeNumber":"2012"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"id":87035,"updated":"2025-01-21T17:28:13.264994+00:00","links":{},"created":"2025-01-18T23:37:54.559272+00:00"}