{"created":"2025-12-01T04:43:36.894431+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:02006071","sets":["1164:4179:1740452116224:1761810280162"]},"path":["1761810280162"],"owner":"80578","recid":"2006071","title":["Japanese SimpleQA: 日本語における事実に基づいた回答能力の評価ベンチマーク"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2025-12-08"},"_buckets":{"deposit":"2973d120-2ffd-4a7b-9d46-a48aeea83bc8"},"_deposit":{"id":"2006071","pid":{"type":"depid","value":"2006071","revision_id":0},"owners":[80578],"status":"published","created_by":80578},"item_title":"Japanese SimpleQA: 日本語における事実に基づいた回答能力の評価ベンチマーク","author_link":[],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Japanese SimpleQA: 日本語における事実に基づいた回答能力の評価ベンチマーク","subitem_title_language":"ja"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"データベース(言語)","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2025-12-08","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"株式会社Preferred Networks"},{"subitem_text_value":"株式会社Preferred Networks"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/2006071/files/IPSJ-NL25266016.pdf","label":"IPSJ-NL25266016.pdf"},"date":[{"dateType":"Available","dateValue":"2027-12-08"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-NL25266016.pdf","filesize":[{"value":"1.2 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"23"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"d7605f4f-efc0-4695-8e4e-9612b0bc63ce","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2025 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"三上,裕明"}]},{"creatorNames":[{"creatorName":"鈴木,脩司"}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10115061","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8779","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本報告では,大規模言語モデル(LLM)の日本語における事実に基づく回答能力(事実性)を評価するベンチマーク「Japanese SimpleQA」の構築と既存モデルの振る舞いについて述べる.Japanese SimpleQAは,短文で特定の事実について問う3000問の日本語質問応答からなるベンチマークである.SimpleQAと同様に,(1)最新のLLMにとっても高難度であり,(2)時間経過による正答の変化がなく,(3)別解が存在しない,質問応答で構成されている.この設計により,「モデルが自己の知識をどの程度正確に認識しているか」を評価できる.Japanese SimpleQAを用い,既存LLMの事実性や,RAGによる事実性の向上効果を明らかにした.構築したベンチマークはgithubで公開している.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"7","bibliographic_titles":[{"bibliographic_title":"研究報告自然言語処理(NL)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2025-12-08","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"16","bibliographicVolumeNumber":"2025-NL-266"}]},"relation_version_is_last":true,"weko_creator_id":"80578"},"id":2006071,"updated":"2025-12-01T04:43:41.038007+00:00","links":{}}