{"links":{},"id":2007966,"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:02007966","sets":["1164:2240:1771568311705:1771568419073"]},"path":["1771568419073"],"owner":"80578","recid":"2007966","title":["実機ベンチマーク報酬に基づくオンライン強化学習によるLLMのHPCコード生成能力向上"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2026-03-09"},"_buckets":{"deposit":"6b6fe49d-263f-4ecb-93d9-9eef1791dbb9"},"_deposit":{"id":"2007966","pid":{"type":"depid","value":"2007966","revision_id":0},"owners":[80578],"status":"published","created_by":80578},"item_title":"実機ベンチマーク報酬に基づくオンライン強化学習によるLLMのHPCコード生成能力向上","author_link":[],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"実機ベンチマーク報酬に基づくオンライン強化学習によるLLMのHPCコード生成能力向上","subitem_title_language":"ja"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"性能評価とLLM活用技術","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2026-03-09","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"名古屋大学情報学部コンピュータ科学科"},{"subitem_text_value":"名古屋大学情報学研究科"},{"subitem_text_value":"名古屋大学情報基盤センター"},{"subitem_text_value":"名古屋大学情報基盤センター"},{"subitem_text_value":"名古屋大学情報基盤センター/名古屋大学情報基盤センター"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/2007966/files/IPSJ-HPC26203049.pdf","label":"IPSJ-HPC26203049.pdf"},"date":[{"dateType":"Available","dateValue":"2028-03-09"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-HPC26203049.pdf","filesize":[{"value":"1.2 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"14"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"902900d3-d71a-4204-84ce-c105b56c98c3","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2026 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"三笠,諒"}]},{"creatorNames":[{"creatorName":"林,俊一郎"}]},{"creatorNames":[{"creatorName":"椋木,大地"}]},{"creatorNames":[{"creatorName":"星野,哲也"}]},{"creatorNames":[{"creatorName":"片桐,孝洋"}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10463942","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8841","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"大規模言語モデル(LLM)はコード生成において高い能力を示すが,生成コードの実行時性能は保証されず,HPC分野において実行時性能を報酬としてLLMを訓練する試みはほとんど行われていない.本研究では,LLMが生成したコードをスーパーコンピュータ上で実行し,測定した実行時性能(GFLOPS)を報酬として直接フィードバックするオンライン強化学習手法を提案する.さらに,問題ごとに許容する最適化技法を段階的に変化させ,多様な観点からコード最適化を学習させるStaged Quality-Diversity(SQD)アルゴリズムを導入する.GPU学習クラスタとCPUベンチマーククラスタを接続した分散システムを構築し,Group Relative Policy Optimization(GRPO)によりQwen2.5 Coder 14Bを倍精度行列積タスクで訓練する.2つの実験を通じて,実行時性能フィードバックと段階的最適化を組み合わせた強化学習がLLMのHPCコード生成能力を改善しうることを示す.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"10","bibliographic_titles":[{"bibliographic_title":"研究報告ハイパフォーマンスコンピューティング(HPC)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2026-03-09","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"49","bibliographicVolumeNumber":"2026-HPC-203"}]},"relation_version_is_last":true,"weko_creator_id":"80578"},"created":"2026-02-25T05:54:04.235140+00:00","updated":"2026-02-25T05:54:08.098510+00:00"}