{"id":2000993,"updated":"2025-02-25T05:35:28.760080+00:00","links":{},"created":"2025-02-25T05:35:24.320610+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:02000993","sets":["1164:4179:1740452116224:1740452168372"]},"path":["1740452168372"],"owner":"80578","recid":"2000993","title":["LLMの学習過程におけるタスク性能の不安定性とその緩和"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2025-03-01"},"_buckets":{"deposit":"c20b76e8-564f-45c5-8336-02e95efc0c83"},"_deposit":{"id":"2000993","pid":{"type":"depid","value":"2000993","revision_id":0},"owners":[80578],"status":"published","created_by":80578},"item_title":"LLMの学習過程におけるタスク性能の不安定性とその緩和","author_link":[],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"LLMの学習過程におけるタスク性能の不安定性とその緩和","subitem_title_language":"ja"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"大規模言語モデル","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2025-03-01","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"奈良先端科学技術大学院大学/国立情報学研究所大規模言語モデル研究開発センター"},{"subitem_text_value":"奈良先端科学技術大学院大学/国立情報学研究所大規模言語モデル研究開発センター"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/2000993/files/IPSJ-NL25263015.pdf","label":"IPSJ-NL25263015.pdf"},"date":[{"dateType":"Available","dateValue":"2027-03-01"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-NL25263015.pdf","filesize":[{"value":"1.2 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"23"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"93ccbd5f-65fa-43af-be55-7f28258cab9a","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2025 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"西田,悠人"}]},{"creatorNames":[{"creatorName":"小田,悠介"}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10115061","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8779","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"言語モデルの学習の安定性は,モデルの信頼性を担保するために重要な要素である.従来,モデルの安定性は損失関数の形状や収束性が主として議論されてきた.そのため,大規模言語モデル(LLM)は自然言語生成タスクをはじめとする下流タスクによって性能を評価・比較するのが主流であるにもかかわらず,LLMの学習過程における下流タスクの性能の安定性についての知見は限定的である.そこで,本稿では,日本語を多く含むコーパスで訓練されたLLMの学習過程の下流タスク性能を調査し,タスク性能の不安定性が多くの事例で観察されることを示す.また,LLMのタスク性能の不安定性を事後処理によって緩和するための初期検討として,チェックポイント平均化および多数決アンサンブルの効果について報告する.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"8","bibliographic_titles":[{"bibliographic_title":"研究報告自然言語処理(NL)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2025-03-01","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"15","bibliographicVolumeNumber":"2025-NL-263"}]},"relation_version_is_last":true,"weko_creator_id":"80578"}}