@techreport{oai:ipsj.ixsq.nii.ac.jp:00232825, author = {半田, 尚暉 and 植村, あい子 and 吉田, 典正 and Naoki, Handa and Aiko, Uemura and Norimasa, Yoshida}, issue = {8}, month = {Mar}, note = {本研究では,「空耳」と呼ばれる,音声を聞いた際に本来の言葉とは異なる意味の言葉に聞こえる現象に着目する.空耳は異言語を母国語で誤認することで生じると仮定し,入力の言語(例えば英語)とは異なる言語(日本語)で学習された音声認識ツールを用いて,音声に含まれる非言語情報を考慮した空耳の自動生成に取り組む.具体的には,TV 番組の「空耳アワー」で報告された楽曲に対して日本語モデルの音声認識ツールを複数用いて,出力結果を空耳として利用できるか検証する.発音記号に着目し報告された楽曲から評価基準を定め,発音での近さを評価した.そして,報告された事例と楽曲の聴こえ方が似ている曲を選別し,音声認識を行った.実験結果から,高性能な音声認識手法ほど空耳の自動生成には向かないことが分かった.また,音声認識を通じて新しい空耳を作成できるケースもあり,空耳を音声認識によって生成できる可能性が示唆された., This study focuses on a phenomenon known as “Soramimi” in which we regard some sounds as having a different meaning from the original words. We assume that Soramimi is caused by the misrecognition of a foreign language with one's native language. Accordingly, we use speech recognition tools trained with a different language (e.g., Japanese) from the input language (e.g., English) to generate Soramimi by taking into account the nonverbal information contained in vocals. Specifically, we use several Japanese model speech recognition tools for songs reported in the TV program “Soramimi Hour” to verify whether the output results are usable as Soramimi. Our analysis focuses on phonetic symbols and defines the evaluation criteria from the reported songs in Soramimi Hour. We evaluated the similarity based on pronunciation. Then, we selected songs with particular similarities between the way the music was heard and the reported cases. We then performed speech recognition on these selections. Experimental results show that the more robust speech recognition methods are unsuitable for automatic Soramimi generation. In some cases, new Soramimi were generated through speech recognition, suggesting that Soramimi can be generated by speech recognition.}, title = {音声認識を用いた空耳自動生成の検討}, year = {2024} }