@article{oai:ipsj.ixsq.nii.ac.jp:00160365,
 author = {Hideyuki, Tachibana and Yu, Mizuno and Nobutaka, Ono and Shigeki, Sagayama and Hideyuki, Tachibana and Yu, Mizuno and Nobutaka, Ono and Shigeki, Sagayama},
 issue = {5},
 journal = {情報処理学会論文誌},
 month = {May},
 note = {This paper describes an automatic karaoke generation system, which can suppress the singing voice in audio music signals, and can also change the pitch of the song. Furthermore, this system accepts the streaming input, and it works in real-time. To the best of our knowledge, there have been no real-time audio-to-audio karaoke system that has the two functions above. This paper particularly describes the two technical components, as well as some comments on the implementation. In this system, the authors employed two signal processing techniques: singing voice suppression that is based on two-stage HPSS, a vocal enhancement technique that the authors proposed previously, and a pitch shift technique that is based on the spectrogram stretch and phase vocoder. The attached video file shows that the system works in real-time, and the sound quality may be practically acceptable.
\n------------------------------
This is a preprint of an article intended for publication Journal of
Information Processing(JIP). This preprint should not be cited. This
article should be cited as: Journal of Information Processing Vol.24(2016) No.3 (online)
DOI　http://dx.doi.org/10.2197/ipsjjip.24.470
------------------------------, This paper describes an automatic karaoke generation system, which can suppress the singing voice in audio music signals, and can also change the pitch of the song. Furthermore, this system accepts the streaming input, and it works in real-time. To the best of our knowledge, there have been no real-time audio-to-audio karaoke system that has the two functions above. This paper particularly describes the two technical components, as well as some comments on the implementation. In this system, the authors employed two signal processing techniques: singing voice suppression that is based on two-stage HPSS, a vocal enhancement technique that the authors proposed previously, and a pitch shift technique that is based on the spectrogram stretch and phase vocoder. The attached video file shows that the system works in real-time, and the sound quality may be practically acceptable.
\n------------------------------
This is a preprint of an article intended for publication Journal of
Information Processing(JIP). This preprint should not be cited. This
article should be cited as: Journal of Information Processing Vol.24(2016) No.3 (online)
DOI　http://dx.doi.org/10.2197/ipsjjip.24.470
------------------------------},
 title = {A Real-time Audio-to-audio Karaoke Generation System for Monaural Recordings Based on Singing Voice Suppression and Key Conversion Techniques},
 volume = {57},
 year = {2016}
}