@techreport{oai:ipsj.ixsq.nii.ac.jp:02001890, author = {橋本,雄太 and 太田,那優 and Yuta Hashimoto and Nayu Oota}, issue = {3}, month = {May}, note = {この数年の間に,AI文字認識やクラウドソーシング翻刻の進展により,大量の日本語の古文書資料がデジタルテキスト化された.一方で,江戸時代以前の文献資料に利用されている語句や語法は現代人にとって分かりにくく,多くの人々にとって理解の妨げになっている.そこで大規模言語モデルを利用した現代語訳の自動生成の可能性が模索されているが,その品質についてこれまで定量的な評価がなされてこなかった.本研究では,ChatGPT4o,Gemini 1.5,Claude 3.5 Sonnet,DeepSeek R1を利用して中世・近世の古文書史料数十点を現代語訳し,その品質を定量的に比較するとともに,もっとも高い性能を示したClaudeを対象に誤訳やハルシネーションの傾向を調べた., In recent years, advances in AI-based character recognition and crowdsourced transcription have enabled the digitization of large volumes of historical Japanese documents. However, the vocabulary and grammatical constructions found in premodern Japanese texts―particularly those from the Edo period and earlier―often pose challenges for contemporary readers. While the potential of large language models (LLMs) for generating modern Japanese translations of such texts is being explored, their output quality has not yet been quantitatively evaluated. In this study, we translated several dozen medieval and early modern Japanese manuscripts into contemporary Japanese using ChatGPT-4o, Gemini 1.5, Claude 3.5 Sonnet, and DeepSeek R1. We then conducted a quantitative comparison of translation quality and examined tendencies for mistranslation and hallucination in Claude, which exhibited the highest overall performance.}, title = {大規模言語モデルを利用した古文書資料の現代語訳の品質評価}, year = {2025} }