@techreport{oai:ipsj.ixsq.nii.ac.jp:00208888, author = {Jie, Yin and Balazs, Gerofi and Atsushi, Hori and Yutaka, Ishikawa and Jie, Yin and Balazs, Gerofi and Atsushi, Hori and Yutaka, Ishikawa}, issue = {24}, month = {Dec}, note = {Multi-component workflows play a significant role in High-Performance Computing and Big Data applications. They usually contain multiple, independently developed components that execute side-by-side to perform sophisticated computation and exchange data through file I/O over the parallel file system. However, file I/O can become an impediment in such systems and cause undesirable performance degradation due to its relatively low speed (compared to the interconnect fabric), which is unacceptable especially for applications with strict time constraints. The Data Transfer Framework (DTF) is an I/O arbitration layer working with the PnetCDF I/O library to eliminate the bottleneck by transparently redirecting file I/O operations through the parallel file system, to message passing via the high-speed interconnect fabrics between coupled components. Scalable and high-speed data transfer between components can be thus easily achieved with minimal development effort by using DTF. However, previous work provides insufficient scalability evaluation of DTF. In order to comprehensively evaluate the scalability of an I/O middleware like DTF and highlight its major advantages, we have designed an ensemble-based I/O benchmark that adopts the I/O model of the real-time weather forecasting application called SCALE-LETKF and present the scalability evaluation results of DTF against file I/O on two supercomputers, Fugaku and Oakforest-PACS, respectively. We provide insights into DTF's scalability and performance enhancements with the intention to impact future I/O middleware design., Multi-component workflows play a significant role in High-Performance Computing and Big Data applications. They usually contain multiple, independently developed components that execute side-by-side to perform sophisticated computation and exchange data through file I/O over the parallel file system. However, file I/O can become an impediment in such systems and cause undesirable performance degradation due to its relatively low speed (compared to the interconnect fabric), which is unacceptable especially for applications with strict time constraints. The Data Transfer Framework (DTF) is an I/O arbitration layer working with the PnetCDF I/O library to eliminate the bottleneck by transparently redirecting file I/O operations through the parallel file system, to message passing via the high-speed interconnect fabrics between coupled components. Scalable and high-speed data transfer between components can be thus easily achieved with minimal development effort by using DTF. However, previous work provides insufficient scalability evaluation of DTF. In order to comprehensively evaluate the scalability of an I/O middleware like DTF and highlight its major advantages, we have designed an ensemble-based I/O benchmark that adopts the I/O model of the real-time weather forecasting application called SCALE-LETKF and present the scalability evaluation results of DTF against file I/O on two supercomputers, Fugaku and Oakforest-PACS, respectively. We provide insights into DTF's scalability and performance enhancements with the intention to impact future I/O middleware design.}, title = {Scalability Evaluation of Data Transfer Framework for Multi-Component Applications}, year = {2020} }