{"updated":"2025-01-23T03:19:10.568672+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00009638","sets":["581:586:596"]},"path":["596"],"owner":"1","recid":"9638","title":["言語情報と映像情報の統合による物体のモデル学習と認識"],"pubdate":{"attribute_name":"公開日","attribute_value":"2008-03-15"},"_buckets":{"deposit":"4b312d57-0d61-4446-a69d-f4981b0d0acd"},"_deposit":{"id":"9638","pid":{"type":"depid","value":"9638","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"言語情報と映像情報の統合による物体のモデル学習と認識","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"言語情報と映像情報の統合による物体のモデル学習と認識"},{"subitem_title":"Automatic Object Model Acquisition and Object Recognition by Integrating Linguistic and Visual Information","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"論文","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2008-03-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"京都大学大学院情報学研究科"},{"subitem_text_value":"東京大学大学院情報理工学系研究科 現在,農林中央金庫"},{"subitem_text_value":"京都大学大学院情報学研究科"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Informatics, Kyoto University","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information Science and Technology, University of Tokyo,Presently with The Norinchukin Bank","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Informatics, Kyoto University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/9638/files/IPSJ-JNL4903036.pdf"},"date":[{"dateType":"Available","dateValue":"2010-03-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL4903036.pdf","filesize":[{"value":"1.7 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"04defca6-c2a5-433a-a226-efb6af5ab642","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2008 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"柴田, 知秀"},{"creatorName":"加藤紀雄"},{"creatorName":"黒橋, 禎夫"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Tomohide, Shibata","creatorNameLang":"en"},{"creatorName":"Norio, Kato","creatorNameLang":"en"},{"creatorName":"Sadao, Kurohashi","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"近年の計算機・ネットワーク環境の進歩により,膨大な映像アーカイブが蓄積されるようになった.本研究では作業教示映像である料理映像を具体的題材とし,料理映像に現れる食材の物体モデルを自動学習し,それを用いて物体認識を行う手法を提案する.まず,物体がアップになっている画像を抽出し,その画像における注目領域を決定する.次に,画像の周辺の発話から重要な単語をキーワードとして抽出し,注目領域と対応付ける.このような注目領域とキーワードのペアを大量に収集することにより,物体モデルを構築する.物体モデルが構築された後,物体モデルの色情報と談話構造に基づく単語の重要度を考慮することにより,物体認識を行う.2 つの料理番組,計約96 時間分の映像から物体モデルを構築したところ,約100 食材の物体モデルが構築でき,その精度は77.8%であった.また,そのモデルを利用して物体の認識を行ったところ,精度はF 値で0.727 であった.","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Recent years have seen the rapid increase of multimedia contents with the continuing advance of information technology. We focus on cooking TV videos, which are instruction videos, and propose a method for acquiring object models of foods and performing object recognition based on the acquired object model. Close-up images are first extracted from image sequences, and an attention region is determined on the close-up image. Then, an important word is extracted as a keyword from utterances around the close-up image, and is made correspond to the close-up image. By collecting a set of close-up image and keyword from a large amount of videos, we can acquire the object model. After that, object recognition is performed based on the acquired object model and discourse structure. We conducted an experiment on two kinds of cooking TV programs. We acquired the object model of around 100 foods and its accuracy was 77.8%. The F measure of object recognition was 0.727.","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"1464","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicPageStart":"1451","bibliographicIssueDates":{"bibliographicIssueDate":"2008-03-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"3","bibliographicVolumeNumber":"49"}]},"relation_version_is_last":true,"item_2_alternative_title_2":{"attribute_name":"その他タイトル","attribute_value_mlt":[{"subitem_alternative_title":"自然言語"}]},"weko_creator_id":"1"},"created":"2025-01-18T22:44:48.043702+00:00","id":9638,"links":{}}