{"id":225940,"updated":"2025-01-19T12:38:07.954496+00:00","links":{},"created":"2025-01-19T01:25:25.994152+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00225940","sets":["581:11107:11114"]},"path":["11114"],"owner":"44499","recid":"225940","title":["Spatial Hierarchical Attention Network Based Video-guided Machine Translation"],"pubdate":{"attribute_name":"公開日","attribute_value":"2023-05-15"},"_buckets":{"deposit":"ed1418a5-a182-4e38-a42a-c4f9035ab7c5"},"_deposit":{"id":"225940","pid":{"type":"depid","value":"225940","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"Spatial Hierarchical Attention Network Based Video-guided Machine Translation","author_link":["599037","599034","599032","599036","599039","599038","599035","599033"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Spatial Hierarchical Attention Network Based Video-guided Machine Translation"},{"subitem_title":"Spatial Hierarchical Attention Network Based Video-guided Machine Translation","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[一般論文] multimodal machine translation, video-guided machine translation, hierarchical attention network, spatial features","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2023-05-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Informatics, Kyoto University"},{"subitem_text_value":"Graduate School of Informatics, Kyoto University"},{"subitem_text_value":"Graduate School of Informatics, Kyoto University"},{"subitem_text_value":"Graduate School of Informatics, Kyoto University"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Informatics, Kyoto University","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Informatics, Kyoto University","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Informatics, Kyoto University","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Informatics, Kyoto University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/225940/files/IPSJ-JNL6405012.pdf","label":"IPSJ-JNL6405012.pdf"},"date":[{"dateType":"Available","dateValue":"2025-05-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL6405012.pdf","filesize":[{"value":"2.2 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"5"},{"tax":["include_tax"],"price":"0","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"41ba1d57-29e4-4972-9887-26f1d0c8433b","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2023 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Weiqi, Gu"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Haiyue, Song"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Chenhui, Chu"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Sadao, Kurohashi"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Weiqi, Gu","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Haiyue, Song","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Chenhui, Chu","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Sadao, Kurohashi","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_publisher_15":{"attribute_name":"公開者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"Video-guided machine translation, as one type of multimodal machine translation, aims to engage video contents as auxiliary information to address the word sense ambiguity problem in machine translation. Previous studies only use features from pre-trained action detection models as motion representations of the video to solve the verb sense ambiguity and neglect the noun sense ambiguity problem. To address this, we propose a video-guided machine translation system using both spatial and motion representations. For the spatial part, we propose a hierarchical attention network to model the spatial information from object-level to video-level. We investigate and discuss spatial features extracted from objects with pre-trained convolutional neural network models and spatial concept features extracted from object labels and attributes with pre-trained language models. We further investigate spatial feature filtering by referring to corresponding source sentences. Experiments on the VATEX dataset show that our system achieves a 35.86 BLEU-4 score, which is 0.51 score higher than the single model of the SOTA method. Experiments on the How2 dataset further verify the generalization ability of our proposed system.\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.31(2023) (online)\nDOI http://dx.doi.org/10.2197/ipsjjip.31.299\n------------------------------","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Video-guided machine translation, as one type of multimodal machine translation, aims to engage video contents as auxiliary information to address the word sense ambiguity problem in machine translation. Previous studies only use features from pre-trained action detection models as motion representations of the video to solve the verb sense ambiguity and neglect the noun sense ambiguity problem. To address this, we propose a video-guided machine translation system using both spatial and motion representations. For the spatial part, we propose a hierarchical attention network to model the spatial information from object-level to video-level. We investigate and discuss spatial features extracted from objects with pre-trained convolutional neural network models and spatial concept features extracted from object labels and attributes with pre-trained language models. We further investigate spatial feature filtering by referring to corresponding source sentences. Experiments on the VATEX dataset show that our system achieves a 35.86 BLEU-4 score, which is 0.51 score higher than the single model of the SOTA method. Experiments on the How2 dataset further verify the generalization ability of our proposed system.\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.31(2023) (online)\nDOI http://dx.doi.org/10.2197/ipsjjip.31.299\n------------------------------","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicIssueDates":{"bibliographicIssueDate":"2023-05-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"5","bibliographicVolumeNumber":"64"}]},"relation_version_is_last":true,"weko_creator_id":"44499"}}