{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00239002","sets":["1164:3500:11474:11710"]},"path":["11710"],"owner":"44499","recid":"239002","title":["大規模言語モデルを用いた文書補強とリランキングによる統計データ検索"],"pubdate":{"attribute_name":"公開日","attribute_value":"2024-09-04"},"_buckets":{"deposit":"78f6a5e1-58c6-4561-9838-2c989d82f3b0"},"_deposit":{"id":"239002","pid":{"type":"depid","value":"239002","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"大規模言語モデルを用いた文書補強とリランキングによる統計データ検索","author_link":["654695","654696"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"大規模言語モデルを用いた文書補強とリランキングによる統計データ検索"},{"subitem_title":"Statistical Data Retrieval using Document Augmentation and Re-Ranking with Large Language Model","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"2B オーガナイズドセッション 偽情報対策技術","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2024-09-04","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"京都産業大学情報理工学部情報理工学科"},{"subitem_text_value":"京都産業大学情報理工学部情報理工学科"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Faculty of Information Science and Engineering, Kyoto Sangyo University","subitem_text_language":"en"},{"subitem_text_value":"Faculty of Information Science and Engineering, Kyoto Sangyo University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/239002/files/IPSJ-IFAT24156013.pdf","label":"IPSJ-IFAT24156013.pdf"},"date":[{"dateType":"Available","dateValue":"2026-09-04"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-IFAT24156013.pdf","filesize":[{"value":"1.1 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"39"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"541329f8-93b9-4e84-9107-15ed541bbb8d","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2024 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"黒川, 博生"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"宮森, 恒"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10114171","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8884","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"統計データは,政府等が保有するオープンデータの一種であり,近年,社会問題となっているフェイクニュースに対処するための事実確認(ファクトチェック)への活用をはじめ,有効活用するためのアドホック検索基盤の重要性が高まっている.しかし,従来の統計データ検索では,文書長の短さや表記ゆれなどによる検索漏れ等により,十分なランキング性能を達成できていない.そこで,本稿では大規模言語モデルを用いた文書補強とリランキングによる統計データのアドホック検索手法を提案する.提案手法では,まず統計データから抽出された見出し,行名,列名,値に基づき,その内容説明を大規模言語モデルで生成することでメタデータを補強した文書を作成する.次に,補強された文書を利用してランキングを行い,最後に大規模言語モデルを用いて意味内容の類似に基づくリランキングを行う.評価実験の結果,提案手法は,文書補強およびリランキングを行わないベースラインと比較して, ランキング性能が nDCG@10 で 0.133 改善することを確認した.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Statistical data is a kind of open data held by governments and others, and its use for fact-checking to address the social problem such as fake news. Therefore, the importance of ad-hoc search infrastructure for effective use of data is increasing. However, traditional statistical data search systems have not achieved sufficient ranking performance due to omission in a search caused by short document length and spelling variations. In this paper, we proprose a statistical data ad-hoc search method using document augmentation and re-ranking with large language model. First, we create a document with augmented metadata by generating descriptions of the contents on the basis of extracted headers, row names, column names, and values from the statistical data using a large language model. Next, we rank the augmented documents and finally re-rank them based on semantic similarity using a large language model. As a result of the evaluation experiment, we confirmed that the proposed method improves the ranking performance by 0.133 in nDCG@10 compared to the baseline that does not perform document augmentation and re-ranking.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告情報基礎とアクセス技術(IFAT)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2024-09-04","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"13","bibliographicVolumeNumber":"2024-IFAT-156"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"id":239002,"updated":"2025-01-19T08:25:31.558523+00:00","links":{},"created":"2025-01-19T01:42:23.888796+00:00"}