@techreport{oai:ipsj.ixsq.nii.ac.jp:00066399,
 author = {福田, 隆 and 市川, 治 and 西村, 雅史 and Takashi, Fukuda and Osamu, Ichikawa and Masafumi, Nishimura},
 issue = {1},
 month = {Oct},
 note = {発話区間検出 （VAD） は音声認識を高精度化するための重要な要素の一つである．これまでに我々は車内環境を対象とした雑音に頑健な VAD 法を提案し，平均音素長以上の区間から抽出される長時間変動情報と調波構造情報に由来する特徴量が，VAD の性能改善に大きく寄与することを示した．しかし，過去の研究報告では発話単位の検出精度のみに注目していたため評価が限定的であった．本報告では，フレーム単位での音声／非音声識別能力，及び音声認識システムにおける提案法の効果を検証し，多方面からの考察を加える．CENSREC-2 を用いた音声認識実験において，提案法は ETSI-AFE で採用されている VAD と比較して認識誤りを 29.1% 削減した．, Accurate voice activity detection (VAD) is important for robust automatic speech recognition (ASR) systems. We have proposed a statistical-model-based VAD using the long-term temporal and  harmonic structure-related information in speech, which shows good robustness against noise in an automobile environment. But in our previous works, we focused on only the utterance-based speech segment detection performance. This paper further investigates frame-based speech/non-speech discrimination performance of VAD and ASR performance. In an experiment using CENSREC-2, the word error rate was reduced by 29.1% in a test that included an ASR system.},
 title = {長時間スペクトル変動と調波構造に基づく発話区間検出法の音声認識による評価},
 year = {2009}
}