{"created":"2025-01-18T23:29:52.032628+00:00","updated":"2025-01-21T23:20:21.361678+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00070805","sets":["1164:1165:6144:6200"]},"path":["6200"],"owner":"10","recid":"70805","title":["学術論文文書画像からのページレイアウトに依存しない自動書誌要素抽出"],"pubdate":{"attribute_name":"公開日","attribute_value":"2010-11-05"},"_buckets":{"deposit":"9ac44540-7806-475e-bf88-c5e1cd67a2be"},"_deposit":{"id":"70805","pid":{"type":"depid","value":"70805","revision_id":0},"owners":[10],"status":"published","created_by":10},"item_title":"学術論文文書画像からのページレイアウトに依存しない自動書誌要素抽出","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"学術論文文書画像からのページレイアウトに依存しない自動書誌要素抽出"},{"subitem_title":"Automatic Extraction of Bibliographic Elements from Scanned Academic Articles without Using Page Layout","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"マルチメディア","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2010-11-05","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"岡山大学大学院自然科学研究科"},{"subitem_text_value":"岡山大学大学院自然科学研究科"},{"subitem_text_value":"国立情報学研究所"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Natural Science and Technology, Okayama University","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Natural Science and Technology, Okayama University","subitem_text_language":"en"},{"subitem_text_value":"National Institute of Informatics","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/70805/files/IPSJ-DBS10151018.pdf"},"date":[{"dateType":"Available","dateValue":"2012-11-05"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-DBS10151018.pdf","filesize":[{"value":"553.7 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"13"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"01cd2c1e-24ec-42a6-b110-b3c661742530","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2010 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"井上, 諒平"},{"creatorName":"太田, 学"},{"creatorName":"高須, 淳宏"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Ryohei, Inoue","creatorNameLang":"en"},{"creatorName":"Manabu, Ohta","creatorNameLang":"en"},{"creatorName":"Atsuhiro, Takasu","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10112482","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"国立情報学研究所の電子図書館 NII-ELS は,国内の主要な学術論文を網羅しており,その蔵書検索には著者名等の書誌情報が利用される.NII-ELS では論文文書画像を蓄積しているため,書誌情報は文書画像からなるべく自動で抽出することが望ましい.現在の文書画像処理技術により一定の抽出精度は達成しているが,学習のため人手で書誌要素を抽出した論文データを学術雑誌ごとに用意する必要がある.しかし 1000 雑誌以上を所蔵する NII-ELS では,各雑誌ごとにこの学習データを用意するコストは無視できない.そこで本研究では,書誌要素抽出対象の雑誌とは異なる雑誌を学習データに用いて効率的に書誌要素を抽出する手法を提案する.提案手法は,論文タイトルページの各行に対して,雑誌のレイアウトに依存しない文字列等の情報を利用して書誌ラベルを付与する.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"NII-ELS developed by the National Institute of Informatics is a digital library which stores scanned document images of a wide variety of academic journals in Japan. Bibliographic information is indispensable for searching such a digital library, hence, automatic extraction of bibliographic data from the images is very important. Therefore, Yakushi et al. proposed an automatic method of extracting bibliographies for academic articles scanned with OCR markup. Although they achieved excellent extraction accuracies for some journals, they needed a substantial amount of training data obtained through costly manual extraction of bibliographies from document images. We cannot ignore this cost because NII-ELS stores more than a thousand journals. This paper, therefore, proposes an automatic bibliography extraction method to use training data collected from journals different from a target journal. The proposed method labels each text line on an article's title page as appropriate bibliographic names by using linguistic information which is independent of page layout varying by journal.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"8","bibliographic_titles":[{"bibliographic_title":"研究報告データベースシステム(DBS)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2010-11-05","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"18","bibliographicVolumeNumber":"2010-DBS-151"}]},"relation_version_is_last":true,"weko_creator_id":"10"},"id":70805,"links":{}}