{"id":2004355,"created":"2025-09-05T05:13:48.389472+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:02004355","sets":["1164:4179:1740452116224:1757047662412"]},"path":["1757047662412"],"owner":"80578","recid":"2004355","title":["日本語RAGにおけるGenerator評価ベンチマークの構築"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2025-09-14"},"_buckets":{"deposit":"ed9ead53-00fb-472d-bdd7-ae770aa8769a"},"_deposit":{"id":"2004355","pid":{"type":"depid","value":"2004355","revision_id":0},"owners":[80578],"status":"published","created_by":80578},"item_title":"日本語RAGにおけるGenerator評価ベンチマークの構築","author_link":[],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"日本語RAGにおけるGenerator評価ベンチマークの構築","subitem_title_language":"ja"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"データセット構築","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2025-09-14","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"株式会社neoAI"},{"subitem_text_value":"株式会社neoAI"},{"subitem_text_value":"株式会社neoAI"},{"subitem_text_value":"株式会社neoAI"},{"subitem_text_value":"株式会社neoAI"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"neoAI.inc","subitem_text_language":"en"},{"subitem_text_value":"neoAI.inc","subitem_text_language":"en"},{"subitem_text_value":"neoAI.inc","subitem_text_language":"en"},{"subitem_text_value":"neoAI.inc","subitem_text_language":"en"},{"subitem_text_value":"neoAI.inc","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/2004355/files/IPSJ-NL25265002.pdf","label":"IPSJ-NL25265002.pdf"},"date":[{"dateType":"Available","dateValue":"2027-09-14"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-NL25265002.pdf","filesize":[{"value":"607.8 KB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"23"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"4ec6682a-5d1b-42df-809d-35e81e9daaf0","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2025 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"板井,孝樹"}]},{"creatorNames":[{"creatorName":"長谷川,駿一"}]},{"creatorNames":[{"creatorName":"山本,勇太"}]},{"creatorNames":[{"creatorName":"峰岸,剛基"}]},{"creatorNames":[{"creatorName":"大槻,真輝"}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10115061","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8779","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"検索拡張生成(RAG)は,入力クエリに対し外部文書集合から検索器(Retriever)により取得した関連文書集合に基づき,大規模言語モデル(LLM)などの生成器(Generator)が回答を作成する手法である.Generatorには,長文中からの情報抽出と統合,多段階推論,表形式情報の解釈,関連情報不在時の適切な回答拒否など,複数の能力が求められる.しかし既存のGenerator評価ベンチマークは,これらの能力の一部に限定される場合が多く,同一条件下で多面的かつ総合的に評価できる枠組みは十分に整備されていない.本研究では,RAGのGeneratorの能力評価における観点を体系化し,観点1種または2種の全組合せを網羅する評価ベンチマークのJ-RAGBench(Japanese RAG Generator Benchmark)を構築することで,より実用的かつ包括的な評価を可能にすることを目的とする.API提供モデルとオープンウェイトモデルの主要なLLMを評価した結果,総合正解率が9割を超えたモデルは存在せず,評価カテゴリごとの正解率に差が確認され,モデル間で能力の得意・不得意が定量的に明らかになった.これらの結果は,本ベンチマークがRAG実運用でのモデル選定やRAG特化モデル構築のための有用な指標となることを示す.本ベンチマークの評価データセットはオープンソースとして公開する*1.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"14","bibliographic_titles":[{"bibliographic_title":"研究報告自然言語処理(NL)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2025-09-14","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"2","bibliographicVolumeNumber":"2025-NL-265"}]},"relation_version_is_last":true,"weko_creator_id":"80578"},"updated":"2025-09-05T05:13:52.134582+00:00","links":{}}