@techreport{oai:ipsj.ixsq.nii.ac.jp:00237166, author = {Anuwat, Chaiwongyen and Suradej, Duangpummet and Jessada, Karnjana and Waree, Kongprawechnon and Masashi, Unoki and Anuwat, Chaiwongyen and Suradej, Duangpummet and Jessada, Karnjana and Waree, Kongprawechnon and Masashi, Unoki}, issue = {45}, month = {Jul}, note = {This paper proposes a method to detect deepfakes using speech-pathological features commonly used to assess unnaturalness in disordered voices associated with voice-production mechanisms. We investigated the potential of eleven speech-pathological features for distinguishing between genuine and deepfake speech, including jitter (three types), shimmer (four types), harmonics-to-noise ratio, cepstral-harmonics-to-noise ratio, normalized noise energy, and glottal-to-noise excitation ratio. This paper introduces a new method that employs segmental frames of analysis technique to significantly improve the effectiveness of deepfake speech detection. We evaluated the proposed method using the datasets from the Automatic Speaker Verification Spoofing and Countermeasures Challenges (ASVspoof). The results demonstrate that the proposed method outperforms the baselines in terms of recall and F2-score, achieving 99.46% and 98.59%, respectively, on the ASVspoof 2019 dataset., This paper proposes a method to detect deepfakes using speech-pathological features commonly used to assess unnaturalness in disordered voices associated with voice-production mechanisms. We investigated the potential of eleven speech-pathological features for distinguishing between genuine and deepfake speech, including jitter (three types), shimmer (four types), harmonics-to-noise ratio, cepstral-harmonics-to-noise ratio, normalized noise energy, and glottal-to-noise excitation ratio. This paper introduces a new method that employs segmental frames of analysis technique to significantly improve the effectiveness of deepfake speech detection. We evaluated the proposed method using the datasets from the Automatic Speaker Verification Spoofing and Countermeasures Challenges (ASVspoof). The results demonstrate that the proposed method outperforms the baselines in terms of recall and F2-score, achieving 99.46% and 98.59%, respectively, on the ASVspoof 2019 dataset.}, title = {Study on Potential of Speech-pathological Features for Deepfake Speech Detection}, year = {2024} }