@article{oai:ipsj.ixsq.nii.ac.jp:00228860, author = {飯田, 紗也香 and 竹本, 有紀 and 石川, 由羽 and 髙田, 雅美 and 城, 和貴 and Sayaka, Iida and Yuki, Takemoto and Yu, Ishikawa and Masami, Takata and Kazuki, Joe}, issue = {2}, journal = {情報処理学会論文誌数理モデル化と応用(TOM)}, month = {Oct}, note = {近代書籍にも対応するOCRとして最近NDLOCRが新規開発された.NDLOCRは多数の一般書籍にみられる1段組みの縦書きレイアウトに対しては十分な精度でレイアウト解析を行うことができる.しかし,新聞のような多段組多サイズ見出しを含むレイアウトの近代書籍には,十分な精度でレイアウト解析を行うことができない.そこで,CRAFTと解像度ピラミッドを用いて多段組多サイズ見出しを含むレイアウトの近代書籍に対する文字切り出しを試みる.多段組多サイズ見出し対応のOCR構築における最終的な目標として明治以降の日本人移民が現地で出版した邦字新聞のテキスト化を目指しているが,本論文では,邦字新聞ほどテキスト化の難易度が高くない多段組多サイズ見出しを含む近代書籍の例として帝国議会会議録に焦点を当てる.帝国議会会議録に対して提案手法と他のOCRにおけるレイアウト解析の精度を比較して検証を行う., NDLOCR has recently been newly developed as an OCR for Early-Modern Japanese Printed Books, and it can perform layout analysis with sufficient accuracy for the single-column vertical layouts found in many general books. However, it cannot perform layout analysis with sufficient accuracy for modern books with multi-column layouts and multi-size headings, such as newspapers. Therefore, we attempted to perform text extraction for Early-Modern Japanese Printed Books with layouts that include multi-columns and multi-size headlines using CRAFT and the resolution pyramid. The ultimate goal of OCR construction for multi-column and multi-size headlines is to convert Japanese newspapers published locally by Japanese immigrants after the Meiji period into text. In this paper, we focus on the Imperial Diet Proceedings as an example of an Early-Modern Japanese Printed Book containing multi-column and multi-size headlines, which is not as difficult to convert into text as Japanese newspapers. The proposed method is validated by comparing the accuracy of the proposed method with that of layout analysis by other OCR methods for the Imperial Diet Proceedings.}, pages = {67--79}, title = {多段組多サイズ見出しで構成される近代書籍のレイアウト解析}, volume = {16}, year = {2023} }