{"links":{},"id":2004369,"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:02004369","sets":["1164:4179:1740452116224:1757047662412"]},"path":["1757047662412"],"owner":"80578","recid":"2004369","title":["ニューラル言語モデルの学習初期における単語の分節化"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2025-09-14"},"_buckets":{"deposit":"6fb515bf-3935-4550-8f94-7c463a89a27b"},"_deposit":{"id":"2004369","pid":{"type":"depid","value":"2004369","revision_id":0},"owners":[80578],"status":"published","created_by":80578},"item_title":"ニューラル言語モデルの学習初期における単語の分節化","author_link":[],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"ニューラル言語モデルの学習初期における単語の分節化","subitem_title_language":"ja"},{"subitem_title":"Early Word Segmentation in Neural Language Models","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"医療応用","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2025-09-14","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学"},{"subitem_text_value":"奈良先端科学技術大学院大学"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"Nara Institute of Science and Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/2004369/files/IPSJ-NL25265016.pdf","label":"IPSJ-NL25265016.pdf"},"date":[{"dateType":"Available","dateValue":"2027-09-14"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-NL25265016.pdf","filesize":[{"value":"1.2 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"23"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"54d74379-374d-443b-993a-9cf482c1427e","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2025 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"帖佐,宗浩"}]},{"creatorNames":[{"creatorName":"西田,悠人"}]},{"creatorNames":[{"creatorName":"大羽,未悠"}]},{"creatorNames":[{"creatorName":"渡辺,太郎"}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10115061","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8779","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"人間の乳幼児はその言語獲得の初期段階において，“I-wanna-do-it”といった表現に対して“I”，“wanna”，“do”，“it”のような要素ごとの区切りを見出すことなく，全体としてひとつの意味を持つ「かたまり」(ホロフレーズ)として認識・使用することが知られている．他方で，近年のニューラル言語モデル(NLM)と人間の言語獲得を対比する一連の研究では，主として事前に設定された語彙を所与とした学習が行われてきている，しかし、語彙を与えた状態では、語の区切りをどのように認識していくかという言語獲得のより基盤的な問いに迫るには限界がある。そこで本研究では，NLMの学習初期においてどのような「かたまり」が認識されているかを検証する．子どもに向けた発話を収集した英語コーパスを用いて，事前の語彙を仮定しない文字レベルNLMを構築し，モデルの分岐エントロピーの局所的な増加部分を分節とみなすという仮定のもとに，NLMにおける単語の分節化の過程を観察した．実験の結果，少なくとも本研究の設定のもとでは，人間の乳幼児が認識・使用するようなホロフレーズは，NLMの学習初期においてひとつの「かたまり」として認識される傾向はほとんど認められなかった．","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"9","bibliographic_titles":[{"bibliographic_title":"研究報告自然言語処理（NL）"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2025-09-14","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"16","bibliographicVolumeNumber":"2025-NL-265"}]},"relation_version_is_last":true,"weko_creator_id":"80578"},"created":"2025-09-05T05:14:19.718618+00:00","updated":"2025-09-05T05:14:23.389228+00:00"}