@techreport{oai:ipsj.ixsq.nii.ac.jp:00176411, author = {木田, 祐介 and 谷口, 徹 and Yusuke, Kida and Toru, Taniguchi}, issue = {25}, month = {Dec}, note = {DNN (Deep Neural Network) によって音声認識の精度がめざましく向上したが,マイクから離れた位置から発せられた遠隔音声の認識は依然として大きな課題である.音圧の減衰による SNR (Signal-to-Noise Ratio) の低下と,床や壁,天井などによる音の反射が引き起こす残響が認識精度を劣化させる主な要因として知られており,これまでに様々な対策が提案されている.本稿では,DNN を用いた新たな残響抑圧技術を提案した.提案法は,残響による歪みを加えた特徴量からクリーンな特徴量へのマッピング関数を直接 DNN に学習させる従来の方式とは異なり,線形予測フィルタの係数を推定する DNN を学習し,DNN から出力されたフィルタを用いて残響の抑圧を行う.残響を精度よくモデル化するため,提案法は長時間の時系列パターンのモデル化に適した LSTM (Long-Short Term Memory) を用いてネットワークを構築する. 2014 年に開催された国際コンペである REVERB challenge の単一マイクを用いたタスクにて提案法の評価を行った結果,処理にかかる遅延を 10 ミリ秒に抑えつつ実音声の単語認識誤りを 29.7 % から 25.3 % に削減できた., Performances of automatic speech recognition (ASR) systems have been drastically improved by DNN (Deep Neural Network). However, distant ASR is still a challenging problem. The difficulty of the distant ASR is caused mainly by two factors; decrease of SNR (Signal-to-Noise Ratio) due to sound attenuation, and reverberation which is created when a sound reflects off the wall, floor and ceiling. In this paper, we propose a novel dereverberation method based on DNN. Different from conventional DNN-based approaches which train mapping functions from corrupted features to clean features directly, the proposed method trains DNN which estimates coefficients of a linear prediction filter, and then dereverberates using the filter outputed from the trained DNN. To model reverberation accurately, the proposed method utilizes LSTM (Long-Short Term Memory) which is appropriate for modeling time-series patterns. Experiments were performed on the REVERB challenge task which was an international competition held in 2014. The proposed method reduced WER (Word Error Rate) from 29.7% to 25.3% with short latency of 10 ms.}, title = {LSTMを用いた線形予測フィルタの推定に基づく残響下音声認識}, year = {2016} }