{"links":{},"id":224590,"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00224590","sets":["1164:4619:11188:11204"]},"path":["11204"],"owner":"44499","recid":"224590","title":["メタ学習を用いた単語読唇の検討"],"pubdate":{"attribute_name":"公開日","attribute_value":"2023-02-23"},"_buckets":{"deposit":"3b8a1610-e89b-4d8f-99dd-6aab622bce27"},"_deposit":{"id":"224590","pid":{"type":"depid","value":"224590","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"メタ学習を用いた単語読唇の検討","author_link":["592532","592535","592534","592533"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"メタ学習を用いた単語読唇の検討"},{"subitem_title":"A Study of Word Lip-Reading using Meta Learning","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2023-02-23","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"九州工業大学"},{"subitem_text_value":" 九州工業大学"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Kyushu Institute of Technology","subitem_text_language":"en"},{"subitem_text_value":"Kyushu Institute of Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/224590/files/IPSJ-CVIM23233024.pdf","label":"IPSJ-CVIM23233024.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-CVIM23233024.pdf","filesize":[{"value":"1.9 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"20"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"bf8f37fb-bb44-4dd2-85de-acde00846a79","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2023 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"児玉, 道成"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"齊藤, 剛史"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Michinari, Kodama","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Takeshi, Saitoh","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11131797","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8701","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"視覚情報のみを用いて発話内容を推定する読唇技術は，教師あり学習の一種であり，大規模なデータセットが望まれている．しかし，発話シーンの収集はコストがかかる問題がある．そこで本論文では，収集コストを抑えるために，少数データで学習するアプローチの中で，メタ学習を用いる手法を検討する．読唇用公開データセット LRW および SSSD，比較用として行動認識公開データセット UCF101 の三つのデータセットを用いて，ProtoNet や DeepBDC など幾つかのメタ学習手法を用いて認識実験を実施した．その結果，UCF101 に比べると LRW とSSSD では低い認識精度であった．本稿では実施した実験結果を報告する．","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Lip-reading technology, which estimates utterance content using only visual information, is a kind of supervised learning, and a large-scale data set is desired. However, collecting utterance scenes is costly. Therefore, in this paper, in order to reduce the collection cost, we consider a method that uses meta learning in the approach of learning with a small number of data. Recognition experiments were conducted using several meta learning methods such as ProtoNet and DeepBDC using three datasets: public datasets LRW and SSSD for lip-reading, and public action recognition dataset UCF101 for comparison. As a result, compared to UCF101, LRW and SSSD had lower recognition accuracy. In this paper, we report the experimental results.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"5","bibliographic_titles":[{"bibliographic_title":"研究報告コンピュータビジョンとイメージメディア（CVIM）"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2023-02-23","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"24","bibliographicVolumeNumber":"2023-CVIM-233"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"created":"2025-01-19T01:24:09.859348+00:00","updated":"2025-01-19T13:05:27.686719+00:00"}