@techreport{oai:ipsj.ixsq.nii.ac.jp:00232534, author = {Jaeyoung, Lee and Tatsuya, Kawahara and Jaeyoung, Lee and Tatsuya, Kawahara}, issue = {64}, month = {Feb}, note = {The performance of automatic speech recognition (ASR) for low-resource languages has seen significant improvement, owing to the recent advancements in large-scale pre-training and fine-tuning paradigms. This study investigates optimizing fine-tuning for low-resource languages, utilizing hierarchical intermediate connectionist temporal classification (CTC). This approach employs target units of varying granularity, from subwords to phonemes, across different CTC losses, taking advantage of the hierarchical linguistic structure of natural languages. We apply this technique to the fine-tuning of a large pre-trained model, investigating the conditions under which it is most effective., The performance of automatic speech recognition (ASR) for low-resource languages has seen significant improvement, owing to the recent advancements in large-scale pre-training and fine-tuning paradigms. This study investigates optimizing fine-tuning for low-resource languages, utilizing hierarchical intermediate connectionist temporal classification (CTC). This approach employs target units of varying granularity, from subwords to phonemes, across different CTC losses, taking advantage of the hierarchical linguistic structure of natural languages. We apply this technique to the fine-tuning of a large pre-trained model, investigating the conditions under which it is most effective.}, title = {Low-resource Speech Recognition using Hierarchical CTC and Large Pre-trained Model}, year = {2024} }