@techreport{oai:ipsj.ixsq.nii.ac.jp:00087484,
 author = {CeciliaGonzalez-Alvarez and Youhei, Kanehagi and Kosei, Takemoto and Yohei, Kishimoto and Kohei, Muto and Hiroki, Mikami and Akihiro, Hayashi and Keiji, Kimura and Hironori, Kasahara and Cecilia, Gonzalez-Alvarez and Youhei, Kanehagi and Kosei, Takemoto and Yohei, Kishimoto and Kohei, Muto and Hiroki, Mikami and Akihiro, Hayashi and Keiji, Kimura and Hironori, Kasahara},
 issue = {10},
 month = {Dec},
 note = {To satisfy the demands of auto parallelizing compilers in the diverse industry of multicores, we have developed the OSCAR API Analyzer. It allows programs automatically parallelized by the OSCAR compiler with OSCAR API directives to target many different platforms using just sequential compilers. We have evaluated the execution performance of the parallelization of Fortran SPEC benchmarks (tomcatv, swim2000, mgrid2000) and media C benchmarks (AAC encoder, Optical flow, MPEG2 encoder, MPEG2 decoder, Face detect) on five HPC servers and four embedded multicores. Speedups on servers were up to 18x for 32 cores (swim2000 on Hitachi SR16000), whereas on embedded systems, AAC encoder speedup was up to 47x on TilePro64, for 64 homogeneous cores, and up to 32.65x for the optical flow on the heterogeneous multicore RP-X, using 8 cores and 4 accelerators., To satisfy the demands of auto parallelizing compilers in the diverse industry of multicores, we have developed the OSCAR API Analyzer. It allows programs automatically parallelized by the OSCAR compiler with OSCAR API directives to target many different platforms using just sequential compilers. We have evaluated the execution performance of the parallelization of Fortran SPEC benchmarks (tomcatv, swim2000, mgrid2000) and media C benchmarks (AAC encoder, Optical flow, MPEG2 encoder, MPEG2 decoder, Face detect) on five HPC servers and four embedded multicores. Speedups on servers were up to 18x for 32 cores (swim2000 on Hitachi SR16000), whereas on embedded systems, AAC encoder speedup was up to 47x on TilePro64, for 64 homogeneous cores, and up to 32.65x for the optical flow on the heterogeneous multicore RP-X, using 8 cores and 4 accelerators.},
 title = {Automatic parallelization with OSCAR API Analyzer: a cross-platform performance evaluation},
 year = {2012}
}