@techreport{oai:ipsj.ixsq.nii.ac.jp:00191255, author = {Hsin-Wen, Liu and Avikalp, Srivastava and Sumio, Fujita and Toru, Shimizu and Riku, Togashi and Tetsuya, Sakai and Hsin-Wen, Liu and Avikalp, Srivastava and Sumio, Fujita and Toru, Shimizu and Riku, Togashi and Tetsuya, Sakai}, issue = {17}, month = {Sep}, note = {Tasks that take not only text but also image as inputs, such as Visual Question Answering (VQA), have received growing attention and become an active research field in recent years. In this study, we consider the task of Visual Question Classification (VQC), where a given question containing both text and an image needs to be classified into one of predefined categories for a Community Question Answering (CQA) site. Our experiments use real data from a major Japanese CQA site called Yahoo Chiebukuro. To our knowledge, our work is the first to systematically compare different deep learning approaches on VQC tasks for CQA. Our study shows that the model that uses HieText for text representation, ResNet50 for image representation, and Multimodal Compact Bilinear pooling for combining the two representations achieved the highest performance in the VQC task., Tasks that take not only text but also image as inputs, such as Visual Question Answering (VQA), have received growing attention and become an active research field in recent years. In this study, we consider the task of Visual Question Classification (VQC), where a given question containing both text and an image needs to be classified into one of predefined categories for a Community Question Answering (CQA) site. Our experiments use real data from a major Japanese CQA site called Yahoo Chiebukuro. To our knowledge, our work is the first to systematically compare different deep learning approaches on VQC tasks for CQA. Our study shows that the model that uses HieText for text representation, ResNet50 for image representation, and Multimodal Compact Bilinear pooling for combining the two representations achieved the highest performance in the VQC task.}, title = {A Comparative Study of Deep Learning Approaches for Visual Question Classification in Community QA}, year = {2018} }