@techreport{oai:ipsj.ixsq.nii.ac.jp:00222579,
 author = {Naoki, Mikamo and Yasubumi, Sakakibara and Kengo, Sato and Naoki, Mikamo and Yasubumi, Sakakibara and Kengo, Sato},
 issue = {10},
 month = {Nov},
 note = {Various experimental and computational methods have been proposed for RNA secondary structure prediction. However, computational prediction of RNA secondary structure considering RNA modifications has not been done yet. In this study, we attempted to develop a method for predicting secondary structure from RNA sequences containing RNA modifications. Our method is based on MXfold2, the most accurate computational RNA secondary structure method based on deep learning that does not take into account RNA modifications. We have developed two types of representations of modified bases: one-hot representation, which is the same as before, and chemical fingerprinting. In particular, the fingerprinting method allows bases to be input as chemical structures and is expected to predict the secondary structure of modified bases with higher accuracy than the one-hot representation. Then, we built our dataset including RNA modifications. Since RNA sequences containing modifications and their secondary structures are limited, we trained on a dataset that did not include modifications and then fine-tuned it with tRNA data to handle the modifications. The dataset with modifications used in this study was obtained from MODOMICS, a database of RNAs containing modifications, and other literature. For benchmarking, our method was trained on two types of sequences, one with and one without modifications during fine tuning. We compared the base representations of the existing methods MXfold2, the one-hot representation extended to the modified bases, and the fingerprinting representation. Comparison with MXfold2 shows that it is possible to predict the secondary structure of RNA more accurately by distinguishing modifications for input sequences that contain modifications. It was also suggested that the use of fingerprint representation rather than one-hot representation can deal with RNA modifications that do not appear in the training data., Various experimental and computational methods have been proposed for RNA secondary structure prediction. However, computational prediction of RNA secondary structure considering RNA modifications has not been done yet. In this study, we attempted to develop a method for predicting secondary structure from RNA sequences containing RNA modifications. Our method is based on MXfold2, the most accurate computational RNA secondary structure method based on deep learning that does not take into account RNA modifications. We have developed two types of representations of modified bases: one-hot representation, which is the same as before, and chemical fingerprinting. In particular, the fingerprinting method allows bases to be input as chemical structures and is expected to predict the secondary structure of modified bases with higher accuracy than the one-hot representation. Then, we built our dataset including RNA modifications. Since RNA sequences containing modifications and their secondary structures are limited, we trained on a dataset that did not include modifications and then fine-tuned it with tRNA data to handle the modifications. The dataset with modifications used in this study was obtained from MODOMICS, a database of RNAs containing modifications, and other literature. For benchmarking, our method was trained on two types of sequences, one with and one without modifications during fine tuning. We compared the base representations of the existing methods MXfold2, the one-hot representation extended to the modified bases, and the fingerprinting representation. Comparison with MXfold2 shows that it is possible to predict the secondary structure of RNA more accurately by distinguishing modifications for input sequences that contain modifications. It was also suggested that the use of fingerprint representation rather than one-hot representation can deal with RNA modifications that do not appear in the training data.},
 title = {Extending a deep learning-based RNA secondary structure prediction algorithm for RNA modifications},
 year = {2022}
}