{"links":{},"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00224455","sets":["1164:5159:11151:11203"]},"path":["11203"],"owner":"44499","recid":"224455","title":["What Do Self-Supervised Speech Representation Models Know?-A Layer-Wise Analysis-"],"pubdate":{"attribute_name":"公開日","attribute_value":"2023-02-21"},"_buckets":{"deposit":"2d8856d4-be48-4479-9daa-b8edfc4784fb"},"_deposit":{"id":"224455","pid":{"type":"depid","value":"224455","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"What Do Self-Supervised Speech Representation Models Know?-A Layer-Wise Analysis-","author_link":["591964","591959","591961","591960","591963","591962","591965","591958"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"What Do Self-Supervised Speech Representation Models Know?-A Layer-Wise Analysis-"},{"subitem_title":"What Do Self-Supervised Speech Representation Models Know?-A Layer-Wise Analysis-","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"招待講演3 ","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2023-02-21","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"Toyota Technological Institute at Chicago"},{"subitem_text_value":"Toyota Technological Institute at Chicago"},{"subitem_text_value":"Toyota Technological Institute at Chicago"},{"subitem_text_value":"Toyota Technological Institute at Chicago"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Toyota Technological Institute at Chicago","subitem_text_language":"en"},{"subitem_text_value":"Toyota Technological Institute at Chicago","subitem_text_language":"en"},{"subitem_text_value":"Toyota Technological Institute at Chicago","subitem_text_language":"en"},{"subitem_text_value":"Toyota Technological Institute at Chicago","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/224455/files/IPSJ-SLP23146058.pdf","label":"IPSJ-SLP23146058.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP23146058.pdf","filesize":[{"value":"896.7 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"ce2b6725-512d-49bf-ae54-153121403906","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2023 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Karen, Livescu"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Ankita, Pasad"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Ju-Chieh, Chou"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Bowen, Shi"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Karen, Livescu","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Ankita, Pasad","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Ju-Chieh, Chou","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Bowen, Shi","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8663","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"Self-supervised speech representations have become ubiquitous in speech processing over the past few years. They have both improved the state of the art and made it feasible to learn speech models with very little labeled data. However, it is not well understood what linguistic information is encoded in pre-trained models and how best to apply them to downstream tasks. In this talk I will describe recent work that begins to build an understanding of the layer-wise information learned by pre-trained speech models. We consider a number of popular pre-trained models and investigate the extent to which their layers encode spectral, phonetic, and word-level information. The results of these analyses also suggest some ways to improve or simplify the application of pre-trained models for downstream tasks.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Self-supervised speech representations have become ubiquitous in speech processing over the past few years. They have both improved the state of the art and made it feasible to learn speech models with very little labeled data. However, it is not well understood what linguistic information is encoded in pre-trained models and how best to apply them to downstream tasks. In this talk I will describe recent work that begins to build an understanding of the layer-wise information learned by pre-trained speech models. We consider a number of popular pre-trained models and investigate the extent to which their layers encode spectral, phonetic, and word-level information. The results of these analyses also suggest some ways to improve or simplify the application of pre-trained models for downstream tasks.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"1","bibliographic_titles":[{"bibliographic_title":"研究報告音声言語情報処理(SLP)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2023-02-21","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"58","bibliographicVolumeNumber":"2023-SLP-146"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"created":"2025-01-19T01:24:02.672116+00:00","updated":"2025-01-19T13:08:48.936234+00:00","id":224455}