@article{oai:ipsj.ixsq.nii.ac.jp:00195410, author = {星野, 翔 and 宮尾, 祐介 and 須藤, 克仁 and 林, 克彦 and 永田, 昌明 and Sho, Hoshino and Yusuke, Miyao and Katsuhito, Sudoh and Katsuhiko, Hayashi and Masaaki, Nagata}, issue = {3}, journal = {情報処理学会論文誌}, month = {Mar}, note = {本論文は,英語と日本語のように語順が大きく異なる言語対における統計的機械翻訳の精度向上のため,統語に基づく単純な事前並べ替え手法を提案する.まず,句構造構文解析器を用いて入力文を構文解析および2分木化して,2分木化句構造木を得る.次に,線形サポートベクタマシンを2値分類器として用いて,2分木の各ノードに反転または非反転の並べ替えラベルを付与する.その後,構文木に付与された並べ替えラベルに従い,入力文を並べ替え,統計的機械翻訳システムを用いて翻訳する.類似の手法は過去に幾度となく試行されているが,提案手法は,2値分類器の学習に必要なオラクル並べ替えラベルおよび分類器の素性テンプレートを同時に改良する.大規模特許データを用いる英日・日英翻訳実験において,我々の提案手法は先行研究の事前並べ替え手法の翻訳精度を大幅に改善できることを示す., We propose a simple syntax-based preordering method that improves translation accuracy of distant language pairs, such as English and Japanese, using statistical machine translation. Our method reorders a source-side binary constituent tree by assigning reordering labels, whether the order of child nodes under a binary node should be reversed, using linear support vector machine as a binary classifier. While this idea has been repeatedly implemented in the task of preordering, the way how to obtain oracle reordering labels used for training the classifier remains in a nontrivial open problem. We introduce a procedure to obtain the oracle reordering labels as well as a set of features that improves binary classification accuracy on the task of predicting reordering labels. The tree reordered according to the classified labels is used to yield reordered source sentence, which is fed to a standard statistical machine translation system to generate translation. Experimental results in English-to-Japanese and Japanese-to-English patent translation show that our proposal substantially improves a previously proposed method in terms of translation accuracy.}, pages = {890--902}, title = {統計的機械翻訳のための統語に基づく単純な事前並べ替え手法}, volume = {60}, year = {2019} }