{"updated":"2025-01-20T00:08:57.304156+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00192435","sets":["6164:6165:6630:9610"]},"path":["9610"],"owner":"44499","recid":"192435","title":["transformer モデルを用いた機械学習によるサンスクリットの連声解除"],"pubdate":{"attribute_name":"公開日","attribute_value":"2018-11-24"},"_buckets":{"deposit":"c6f9d830-0e5b-4d5a-8f3a-35c3f06216a8"},"_deposit":{"id":"192435","pid":{"type":"depid","value":"192435","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"transformer モデルを用いた機械学習によるサンスクリットの連声解除","author_link":["448956","448955"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"transformer モデルを用いた機械学習によるサンスクリットの連声解除"},{"subitem_title":"Sanskrit Sandhi splitter by machine learning using Transformer","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"Sanskrit\\n","subitem_subject_scheme":"Other"},{"subitem_subject":"sandhi\\n","subitem_subject_scheme":"Other"},{"subitem_subject":"machine learning\\n","subitem_subject_scheme":"Other"},{"subitem_subject":"Transformer","subitem_subject_scheme":"Other"}]},"item_type_id":"18","publish_date":"2018-11-24","item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_18_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東京大学"}]},"item_18_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"University of Tokyo","subitem_text_language":"en"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/192435/files/IPSJ-CH2018002.pdf","label":"IPSJ-CH2018002.pdf"},"date":[{"dateType":"Available","dateValue":"2019-11-24"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-CH2018002.pdf","filesize":[{"value":"378.8 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"24"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"4db59b97-2870-4062-b8b0-65f4c520596f","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2018 by the Information Processing Society of Japan"}]},"item_18_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"塚越, 柚季"}],"nameIdentifiers":[{}]}]},"item_18_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Yuzuki, Tsukagoshi","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_5794","resourcetype":"conference paper"}]},"item_18_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"サンスクリットの文中の単語に自動で形態情報を付与するのは容易ではない.文中の語形 (主に語尾) は，連 声規則により主に後続する語の語頭の音によって変化する.このため，手を加えていないテキストに対する形 態情報の付与が難しい.そこで連声規則が適用されている原典テキストから，連声規則前の形の単語の連続に 戻すことが必要である.時間を要するが，語彙や形態の情報を元に連声を解除することは可能である.一方でそのような情報なしに Attention メカニズム + sequence to sequence モデルを用いて短時間に高精度で連声の解除にも成功している. 本研究では Attention のみを使って高い精度を出すことができる Transformer モデルを用いて，高精度な連声の解除を行った.","subitem_description_type":"Other"}]},"item_18_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"It is difficult to automatically give morphological information to words in a Sanskrit sentence. Sandhi rules change the word forms, especially final sounds, depending on the initial sound of the following word. This makes automatic glossing of untokenized Sanskrit sentences difficult. Therefore, it is necessary to restore individual words from original sentences combined by the sandhi rules. Although it takes a long time, it is possible to split sandhi by vocabulary and/or morphological information. Alternatively, there is another approach that does not need vocabulary or morphological information but splits sandhi with less time, but higher accuracy. This research was performed to split Sanskrit sandhi by the Transformer model which gives high accuracy using only Attention.","subitem_description_type":"Other"}]},"item_18_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"14","bibliographic_titles":[{"bibliographic_title":"じんもんこん2018論文集"}],"bibliographicPageStart":"9","bibliographicIssueDates":{"bibliographicIssueDate":"2018-11-24","bibliographicIssueDateType":"Issued"},"bibliographicVolumeNumber":"2018"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"created":"2025-01-19T00:58:08.831530+00:00","id":192435,"links":{}}