{"updated":"2025-01-20T00:48:52.827205+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00191255","sets":["1164:3500:9372:9555"]},"path":["9555"],"owner":"11","recid":"191255","title":["A Comparative Study of Deep Learning Approaches for Visual Question Classification in Community QA"],"pubdate":{"attribute_name":"公開日","attribute_value":"2018-09-05"},"_buckets":{"deposit":"5ff38fd1-aedc-43ea-8b7f-f5fafd67c085"},"_deposit":{"id":"191255","pid":{"type":"depid","value":"191255","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"A Comparative Study of Deep Learning Approaches for Visual Question Classification in Community QA","author_link":["440332","440339","440334","440335","440338","440340","440333","440341","440343","440336","440342","440337"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"A Comparative Study of Deep Learning Approaches for Visual Question Classification in Community QA"},{"subitem_title":"A Comparative Study of Deep Learning Approaches for Visual Question Classification in Community QA","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"質問応答・検索","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2018-09-05","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"Waseda University"},{"subitem_text_value":"Carnegie Mellon University"},{"subitem_text_value":"Yahoo Japan Corporation"},{"subitem_text_value":"Yahoo Japan Corporation"},{"subitem_text_value":"Yahoo Japan Corporation"},{"subitem_text_value":"Waseda University"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Waseda University","subitem_text_language":"en"},{"subitem_text_value":"Carnegie Mellon University","subitem_text_language":"en"},{"subitem_text_value":"Yahoo Japan Corporation","subitem_text_language":"en"},{"subitem_text_value":"Yahoo Japan Corporation","subitem_text_language":"en"},{"subitem_text_value":"Yahoo Japan Corporation","subitem_text_language":"en"},{"subitem_text_value":"Waseda University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/191255/files/IPSJ-IFAT18132017.pdf","label":"IPSJ-IFAT18132017.pdf"},"date":[{"dateType":"Available","dateValue":"2020-09-05"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-IFAT18132017.pdf","filesize":[{"value":"1.1 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"39"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"92e2d55a-0903-4c1a-831a-82869e3484a3","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2018 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Hsin-Wen, Liu"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Avikalp, Srivastava"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Sumio, Fujita"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Toru, Shimizu"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Riku, Togashi"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Tetsuya, Sakai"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Hsin-Wen, Liu","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Avikalp, Srivastava","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Sumio, Fujita","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Toru, Shimizu","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Riku, Togashi","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Tetsuya, Sakai","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10114171","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8884","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"Tasks that take not only text but also image as inputs, such as Visual Question Answering (VQA), have received growing attention and become an active research field in recent years. In this study, we consider the task of Visual Question Classification (VQC), where a given question containing both text and an image needs to be classified into one of predefined categories for a Community Question Answering (CQA) site. Our experiments use real data from a major Japanese CQA site called Yahoo Chiebukuro. To our knowledge, our work is the first to systematically compare different deep learning approaches on VQC tasks for CQA. Our study shows that the model that uses HieText for text representation, ResNet50 for image representation, and Multimodal Compact Bilinear pooling for combining the two representations achieved the highest performance in the VQC task.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Tasks that take not only text but also image as inputs, such as Visual Question Answering (VQA), have received growing attention and become an active research field in recent years. In this study, we consider the task of Visual Question Classification (VQC), where a given question containing both text and an image needs to be classified into one of predefined categories for a Community Question Answering (CQA) site. Our experiments use real data from a major Japanese CQA site called Yahoo Chiebukuro. To our knowledge, our work is the first to systematically compare different deep learning approaches on VQC tasks for CQA. Our study shows that the model that uses HieText for text representation, ResNet50 for image representation, and Multimodal Compact Bilinear pooling for combining the two representations achieved the highest performance in the VQC task.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告情報基礎とアクセス技術(IFAT)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2018-09-05","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"17","bibliographicVolumeNumber":"2018-IFAT-132"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"created":"2025-01-19T00:57:09.576090+00:00","id":191255,"links":{}}