@techreport{oai:ipsj.ixsq.nii.ac.jp:00067060,
 author = {久保, 慶伍 and 三宅, 純平 and 川波, 弘道 and 猿渡, 洋 and 鹿野, 清宏 and Keigo, Kubo and Jumpei, Miyake and Hiromichi, Kawanami and Hiroshi, Saruwatari and Kiyohiro, Shikano},
 issue = {33},
 month = {Dec},
 note = {近年，多様な発話に対応可能な音声対話システムの研究が行われている．その 1 つのアプローチにタスク外発話を検出し，Web 検索で処理する方法がある．しかし，一般に音声対話システムの言語モデルはタスク内の発話を認識できるようドメインを限定して構築されているため，多様性があるタスク外発話を精度良く認識できない．そこで，タスク外発話においてもある程度の認識性能を出せる汎用性の高い言語モデルが必要となる．本報告では，大規模テキストコーパスである Google N-gram （正式名称： Web 日本語Nグラム第 1 版）を用いて言語モデルを構築し，その汎用性を 3 種類の音声データで評価した．読みは形態素解析器 mecab を用いて自動的に付与した．3 種類の音声データにおける単語正解率と単語正解精度を求めた結果，GoogleN-gram から構築した言語モデルは，音声データのドメインに合っている言語モデルよりも性能が劣るものの，新聞コーパスモデルと同等の単語正解率を得た．ただし，今回評価した Google N-gram の言語モデルはあくまでもベースラインであり，誤った読み付与を含んでいるなどの問題点がある．これらを改善すれば，より性能を向上できると考えられる．また，構築した Google N-gram の言語モデルは 3-gram であり，Google N-gram の最大の特徴であるデータ量を有効に活用して 4-gram や 5-gram のモデルを構築すれば，さらなる性能の向上が期待できる．, In recent years, spoken dialogue systems capable of responding to various utterances have been studied. For example, there is an approach that detects out-of-task utterances and process them by the Web retrieval. However, in general, a language model in a spoken dialogue system is built to recognize in-task utterances. Therefore, it is difficult for a spoken dialogue system to recognize various out-of-task utterances with high accuracy. In this report, we constructed a tri-gram language model using the Google N-gram, which is a large text Corpus, and evaluated the versatility of the model with three types of speech data. As the Google N-gram does not include readings, they are automatically given by the morphological analyzer mecab. Results on word correct rate and word accuracy show that the language model built from Google Ngram is inferior to the models that customized for the domain. However, the model has equal performance to the JNAS, the Newspaper language model, on word correct rate. It should be mentioned that the evaluations contained in this report are the first trial and baseline results of the model. Because there are still several problems, such as wrong reading included in the Corpus, we can expect improvements in the performance by correcting them. In addition, as the language model built here is a tri-gram model, If 4-gram or 5-gram models are introduced, further improvement is also expected.},
 title = {Google N-gramを用いた音声認識のタスク汎用性評価の試み},
 year = {2009}
}