@techreport{oai:ipsj.ixsq.nii.ac.jp:00192700, author = {Andros, Tjandra and Sakriani, Sakti and Satoshi, Nakamura and Andros, Tjandra and Sakriani, Sakti and Satoshi, Nakamura}, issue = {3}, month = {Dec}, note = {In this paper, we construct the first end-to-end attention-based encoder-decoder model to process directly from raw speech waveform to the text transcription. We called the model as ”Attention-basedWav2Text”. To assist the training process of the end-to-end model, we propose to utilize a feature transfer learning. Experimental results also reveal that the proposed Attention-based Wav2Text model directly with raw waveform could achieve a better result in comparison with the attentional encoder-decoder model trained on standard front-end filterbank features., In this paper, we construct the first end-to-end attention-based encoder-decoder model to process directly from raw speech waveform to the text transcription. We called the model as ”Attention-basedWav2Text”. To assist the training process of the end-to-end model, we propose to utilize a feature transfer learning. Experimental results also reveal that the proposed Attention-based Wav2Text model directly with raw waveform could achieve a better result in comparison with the attentional encoder-decoder model trained on standard front-end filterbank features.}, title = {Feature Transfer Learning for Wav2Text Sequence-to-Sequence ASR}, year = {2018} }