@article{oai:ipsj.ixsq.nii.ac.jp:00217725,
 author = {柳田, 智也 and サクテイ, サクリアニ and 中村, 哲 and Tomoya, Yanagita and Sakriani, Sakti and Satoshi, Nakamura},
 issue = {4},
 journal = {情報処理学会論文誌},
 month = {Apr},
 note = {同時音声通訳システムは，話者の発話中に翻訳を行い音声を生成する．その実現のために，文より短いテキストから，音声を生成する逐次音声合成が必要である．本論文は，同時音声通訳システムの実現を目指して，日本語における逐次音声合成の提案を目的とする．先行研究は，逐次音声合成のために使用する言語特徴を制限し，合成範囲を単語としている．しかし，日本語音声合成は，アクセント句と呼ばれる単位が重要であり，単語の逐次音声合成が，音声品質と遅延のトレードオフとして適さない可能性がある．本論文では，日本語逐次音声合成のため，逐次音声合成の言語特徴を提案する．そして，言語特徴の組合せから，遅延と音声品質に最適な合成範囲を決定する．実験結果より，アクセント句から呼気段落の合成範囲が音声の品質を保持するために必要であることを示した．さらに，遅延評価を通して，アクセント句が日本語の逐次音声合成へ適することを示した．, A simultaneous speech translation system translates while the speaker speaks and generates speech sequentially. To construct the system, an incremental Text-to-speech (iTTS) system which synthesizes a speech in a shorter synthesis unit is required. This work proposes a Japanese iTTS system for the simultaneous speech translation. Most of the researchers used the word unit as the synthesis unit. However, in Japanese speech synthesis, a unit called an accent phrase is important, and word-by-word synthesis may not be suitable. In this paper, we propose a linguistic feature and synthesis unit for Japanese iTTS. Experimental result shows that accent phrase or breath group are essential for a Japanese iTTS system as a trade-off between quality and synthesis units for the Japanese iTTS. Then, an accent phrase is a more appropriate incremental synthesis unit than a breath group through delay analysis.},
 pages = {1149--1158},
 title = {日本語逐次音声合成における合成単位},
 volume = {63},
 year = {2022}
}