@inproceedings{oai:ipsj.ixsq.nii.ac.jp:00241527, author = {青池, 亨 and Toru, Aoike}, book = {じんもんこん2024論文集}, month = {Nov}, note = {国立国会図書館(NDL)では,OCR処理プログラムとして明治期以降の活字の図書・雑誌資料を対象としたNDLOCRや古典籍資料を対象としたNDL古典籍OCRを開発し、オープンソースで公開してきた.しかし,これらのOCRは,動作環境にGPUを必須とすることから,必要な機材や技術知識の敷居が高く,事実上利用者層を狭めていた点が課題であった.近年,動画等をリアルタイムで処理するための技術として,計算負荷を大幅に抑えてGPUのない環境でも高速かつ高精度に動作する機械学習モデルの研究が顕著に進んでいる.こうした研究の知見を取り入れてこれらのOCR処理プログラムを改良し,CPU環境で高速に動作するOCRの開発をすることとした.その第一歩として「NDL古典籍OCR-Lite」の開発を行ったので報告する.あわせて,本研究の知見を活かした今後の見通しについても紹介する.なお、本研究の成果はオープンソースとして公開を予定している., The National Diet Library (NDL) has developed and released OCRs as open source: NDLOCR for books and periodicals in print from the Meiji period onward, and NDLkotenOCR for classical materials. However, these OCRs, which require a GPU as their operating environment, have a high barrier in terms of the necessary equipment and technical knowledge, despite their “anyone can use them freely” philosophy, and have effectively narrowed the user base. In recent years, there has been remarkable progress in research on machine learning models for real-time processing of video and other data, which can operate at high speed and with high accuracy even in environments without a GPU, greatly reducing the computational load. By reworking the OCR processing program to incorporate knowledge from these fields, the development of an OCR that runs at high speed in a CPU environment was considered. As a first step, we have developed NDLkotenOCR-Lite. The results of this study are scheduled to be released as open source at the end of November 2024. Future prospects for utilizing the findings of this research are also described.}, pages = {181--186}, publisher = {情報処理学会}, title = {CPU環境で高速に動作する軽量OCR「NDL古典籍OCR-Lite」の開発}, volume = {2024}, year = {2024} }