@article{oai:ipsj.ixsq.nii.ac.jp:00185108,
 author = {田村, 晃裕 and 隅田, 英一郎 and Akihiro, Tamura and Eiichiro, Sumita},
 issue = {12},
 journal = {情報処理学会論文誌},
 month = {Dec},
 note = {本稿では，各文書を「文書-セグメント（たとえば，段落やセクション）-単語」の階層構造でモデル化する新たな多言語トピックモデル「Bilingual Segmented Topic Model（BiSTM）」を提案する．Bilingual Latent Dirichlet Allocation（BiLDA）などの従来の多言語トピックモデルは，対応関係がある文書のトピック分布を共有させることで，異言語の文書間の対応関係を反映したモデル化を行う．一方で，BiSTMは，文書間の対応関係に加えて，対応関係のあるセグメントのトピック分布も共有させることにより，異言語のセグメント間の対応関係も反映したモデル化を行う．また，本稿では，セグメントが与えられていない場合にも提案モデルを適用できるようにするため，Duら(2013)の教師なしトピック分割手法をBiSTMに導入し，潜在トピックとセグメント境界を同時に推定するモデルも提案する．日英および仏英の多言語コーパスを使った評価実験を通じて，提案モデルはBiLDAよりパープレキシティの観点で優れたモデルであることを示し，対訳対抽出の性能も改善できることを示す．, This paper proposes the bilingual segmented topic model (BiSTM), which hierarchically models documents by treating each document as a set of segments, e.g., sections. While previous bilingual topic models, such as bilingual latent Dirichlet allocation (BiLDA), consider only cross-lingual alignments between entire documents, the proposed model considers cross-lingual alignments between segments in addition to document-level alignments and assigns the same topic distribution to aligned segments. This paper also presents a method for simultaneously inferring latent topics and segmentation boundaries, incorporating unsupervised topic segmentation into BiSTM. Experiments using a Japanese-English and French-English Wikipedia corpus show that the proposed model significantly outperforms BiLDA in terms of perplexity and demonstrates improved performance in translation pair extraction.},
 pages = {2080--2092},
 title = {セグメント構造を持つバイリンガルトピックモデル},
 volume = {58},
 year = {2017}
}