{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00220891","sets":["6504:11035:11043"]},"path":["11043"],"owner":"44499","recid":"220891","title":["wav2vec 2.0の事前学習モデルを用いた咽喉マイク音声認識"],"pubdate":{"attribute_name":"公開日","attribute_value":"2022-02-17"},"_buckets":{"deposit":"76399fae-c432-421d-ad0b-6df0e8f7f4ce"},"_deposit":{"id":"220891","pid":{"type":"depid","value":"220891","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"wav2vec 2.0の事前学習モデルを用いた咽喉マイク音声認識","author_link":["578053","578051","578052","578054","578050"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"wav2vec 2.0の事前学習モデルを用いた咽喉マイク音声認識"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"人工知能と認知科学","subitem_subject_scheme":"Other"}]},"item_type_id":"22","publish_date":"2022-02-17","item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_22_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"静岡大"},{"subitem_text_value":"産総研"},{"subitem_text_value":"静岡大"},{"subitem_text_value":"静岡大"},{"subitem_text_value":"静岡大"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/220891/files/IPSJ-Z84-1R-04.pdf","label":"IPSJ-Z84-1R-04.pdf"},"date":[{"dateType":"Available","dateValue":"2022-10-22"}],"format":"application/pdf","filename":"IPSJ-Z84-1R-04.pdf","filesize":[{"value":"948.9 kB"}],"mimetype":"application/pdf","accessrole":"open_date","version_id":"c33899df-5b19-4826-bb62-2dbd7b45414c","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2022 by the Information Processing Society of Japan"}]},"item_22_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"増田, 光汰"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"緒方, 淳"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"西田, 昌史"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"綱川, 隆司"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"西村, 雅史"}],"nameIdentifiers":[{}]}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_5794","resourcetype":"conference paper"}]},"item_22_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00349328","subitem_source_identifier_type":"NCID"}]},"item_22_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"咽喉マイク音声は外部雑音の影響を受けにくいため、多人数会話や高雑音環境下での音声認識に適している。しかし接話マイク音声と比較して音響特徴量が異なり、一般的な音声認識モデルで咽喉マイク音声を認識することは困難である。このことから咽喉マイク音声に適した音声認識モデルを作成する必要があるが、十分な精度の音声認識モデルを構築するために必要な大規模な文字起こし咽喉マイク音声データベースが存在しない。そこで本稿では、少量のラベル付きデータでも十分な精度の音声認識モデルを作成することができる手法として提案されたwav2vec 2.0に着目し、wav2vec2.0の事前学習モデルに対して咽喉マイク音声でファインチューニングを行うことによって、咽喉マイク音声のデータ量不足という問題点を解決することが可能か検討した。","subitem_description_type":"Other"}]},"item_22_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"290","bibliographic_titles":[{"bibliographic_title":"第84回全国大会講演論文集"}],"bibliographicPageStart":"289","bibliographicIssueDates":{"bibliographicIssueDate":"2022-02-17","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"1","bibliographicVolumeNumber":"2022"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"id":220891,"updated":"2025-01-19T14:23:00.945487+00:00","links":{},"created":"2025-01-19T01:20:54.841465+00:00"}