@techreport{oai:ipsj.ixsq.nii.ac.jp:00080401,
 author = {阿曽, 慎平 and 齋藤, 毅 and 後藤, 真孝 and 糸山, 克寿 and 高橋, 徹 and 尾形, 哲也 and 奥乃, 博 and Shimpei, Aso and Takeshi, Saitou and Masataka, Goto and Katsutoshi, Itoyama and Toru, Takahashi and Tetsuya, Ogata and Hiroshi, G.Okuno},
 issue = {13},
 month = {Jan},
 note = {本稿では，歌声と朗読音声を識別するシステムについて述べる．入力は無雑音音声，出力は歌声と朗読音声それぞれの尤度 （連続値） である．従来，スペクトル包絡 （MFCC） と基本周波数 （F0） の時間変化に基づいた識別システムが報告されている．これらの特徴量に基づく識別器に，スペクトル変化量のピーク間隔という，音素継続時間に関連する特徴量に基づく識別器を加え，入力音声長に応じて各識別器への重みを変化させた．実験の結果，従来システムでは1秒の音声に対し 86.7% の精度であったのに対し，本システムでは 90.2% という結果を得た．本システムが実時間で動作するデモアプリケーションについても述べる．, In this paper we describe a system that discriminates between singing and speaking voices. Given a clean speech signal, it outputs the likelihood of each of the singing and speaking voices. Previous systems use temporal transition of spectral envelope (MFCC) and fundamental frequency (F0) as discrimina- tion features. Our system adds peak interval of spectral change as a phoneme duration feature and weights these features according to the duration of the input speech signal. Experimental results with one-second speech signal show that our system achieves 90.2 % accuracy compared to 86.7 % with previous systems. We also describe a real-time application demonstrating our system.},
 title = {スペクトル変化量のピーク間隔・F0・MFCCを用いた歌声と朗読音声の自動識別システム},
 year = {2012}
}