{"updated":"2025-01-21T15:24:58.998352+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00091615","sets":["581:7002:7148"]},"path":["7148"],"owner":"11","recid":"91615","title":["統計的機械学習を用いた歴史的資料への濁点付与の自動化"],"pubdate":{"attribute_name":"公開日","attribute_value":"2013-04-15"},"_buckets":{"deposit":"61d2011e-e2e2-434e-bac1-68d6cddbfc6a"},"_deposit":{"id":"91615","pid":{"type":"depid","value":"91615","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"統計的機械学習を用いた歴史的資料への濁点付与の自動化","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"統計的機械学習を用いた歴史的資料への濁点付与の自動化"},{"subitem_title":"A Statistical Machine Learning Approach to Automatic Labeling of Voiced Consonants for Historical Texts","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[一般論文] 自然言語処理,機械学習,歴史的資料,濁点,近代文語論説文","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2013-04-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学/国立国語研究所"},{"subitem_text_value":"奈良先端科学技術大学院大学"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology / National Institute for Japanese Language and Linguistics","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/91615/files/IPSJ-JNL5404049.pdf"},"date":[{"dateType":"Available","dateValue":"2015-04-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL5404049.pdf","filesize":[{"value":"844.5 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"c966f394-235d-463a-88c6-6e505ecedf44","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2013 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"岡, 照晃"},{"creatorName":"小町, 守"},{"creatorName":"小木曽, 智信"},{"creatorName":"松本, 裕治"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Teruaki, Oka","creatorNameLang":"en"},{"creatorName":"Mamoru, Komachi","creatorNameLang":"en"},{"creatorName":"Toshinobu, Ogiso","creatorNameLang":"en"},{"creatorName":"Yuji, Matsumoto","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"生の歴史的資料の中には,濁点が期待されるのに濁点の付いていない,濁点無表記の文字が多く含まれている.濁点無表記文字は可読性・検索性を下げるため,歴史コーパス整備の際には濁点付与が行われる.しかし,濁点付与は専門家にしか行えないため,作業人員の確保が大きな課題となっている.また,作業対象が膨大であるため,作業を完了するまでにも時間がかかる.そこで本論文では,濁点付与の自動化について述べる.我々は濁点付与を文字単位のクラス分類問題として定式化した.提案手法は分類を周辺文字列の情報のみで行うため,分類器の学習には形態素解析済みコーパスを必要としない.大規模な近代語のコーパスを学習に使用し,近代の雑誌「国民之友」に適合率96%,再現率98%の濁点付与を達成した.","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Raw historical texts often include mark-lacking characters, which lack compulsory voiced consonant mark. Since mark-lacking characters degrade readability and retrievability, voiced consonant marks are annotated when creating historical corpus. However, since only experts can perform the labeling procedure for historical texts, getting annotators is a large challenge. Also, it is time-consuming to conduct annotation for large-scale historical texts. In this paper, we propose an approach to automatic labeling of voiced consonant marks for mark-lacking characters. We formulate the task into a character-based classification problem. Since our method uses as its feature set only surface information about the surrounding characters, we do not require corpus annotated with word boundaries and POS-tags for training. We exploited large data sets and achieved 96% precision and 98% recall on a near-modern Japanese magazine, Kokumin-no-Tomo.","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"1654","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicPageStart":"1641","bibliographicIssueDates":{"bibliographicIssueDate":"2013-04-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"4","bibliographicVolumeNumber":"54"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"created":"2025-01-18T23:40:50.650976+00:00","id":91615,"links":{}}