@techreport{oai:ipsj.ixsq.nii.ac.jp:02007728, author = {Frederikus,Hudi and Nanakorn,Nina and Justin,Vasselli and Yusuke,Sakai and Hidetaka,Kamigaito and Akihiro,Tamura and Taro,Watanabe and Frederikus Hudi and Nanakorn Nina and Justin Vasselli and Yusuke Sakai and Hidetaka Kamigaito and Akihiro Tamura and Taro Watanabe}, issue = {2}, month = {Feb}, note = {We introduce LecTrans, a large-scale benchmark for evaluating multimodal translation in academic lectures. While existing MT benchmarks primarily focus on text-only or tightly aligned inputs, academic lectures require translation across spoken and visual modalities in long-form, knowledge-intensive settings, making them a challenging and informative testbed for multimodal language understanding. LecTrans is composed of approximately 350 hours of expert-taught online lectures and includes transcripts, presentation slides, and professionally verified translations in seven languages. A 162-minute evaluation subset further includes human-corrected transcripts and domain-term annotations across 17 subjects. We formulate lecture translation as a dual-task evaluation problem that distinguishes between transcription translation and slide translation. This formulation enables modality-aware analysis of translation behavior and error propagation. We provide strong text-only and multimodal baselines and present analysis that reveals challenges in long-context and domain-specific translation. LecTrans serves as a benchmark for assessing the capabilities of multimodal translation systems on academic content., We introduce LecTrans, a large-scale benchmark for evaluating multimodal translation in academic lectures. While existing MT benchmarks primarily focus on text-only or tightly aligned inputs, academic lectures require translation across spoken and visual modalities in long-form, knowledge-intensive settings, making them a challenging and informative testbed for multimodal language understanding. LecTrans is composed of approximately 350 hours of expert-taught online lectures and includes transcripts, presentation slides, and professionally verified translations in seven languages. A 162-minute evaluation subset further includes human-corrected transcripts and domain-term annotations across 17 subjects. We formulate lecture translation as a dual-task evaluation problem that distinguishes between transcription translation and slide translation. This formulation enables modality-aware analysis of translation behavior and error propagation. We provide strong text-only and multimodal baselines and present analysis that reveals challenges in long-context and domain-specific translation. LecTrans serves as a benchmark for assessing the capabilities of multimodal translation systems on academic content.}, title = {Lecture Translation Benchmark from Online Educational Videos}, year = {2026} }