@techreport{oai:ipsj.ixsq.nii.ac.jp:00211886, author = {Shinichiro, Takizawa and Yusuke, Tanimura and Hidemoto, Nakada and Ryousei, Takano and Hirotaka, Ogawa and Shinichiro, Takizawa and Yusuke, Tanimura and Hidemoto, Nakada and Ryousei, Takano and Hirotaka, Ogawa}, issue = {18}, month = {Jul}, note = {ABCI is the world's first large-scale Open AI Computing Infrastructure for both developing AI technologies and bridging them into the industry, operated by AIST, Japan since August 2018. It delivers 19.88 petaflops of HPL performance and achieves 70 seconds for training ResNet-50 model in MLPerf Training v0.6. Last November we achieved world's fastest records for CosmoFlow and DeepCAM in MLPerf HPC benchmarks. ABCI was the fastest supercomputer in Japan until Fugaku made a spectacular debut, however, it soon became short of computing capacity and I/O performance due to the rapid expansion of its usage. This forced us to make a major upgrade to ABCI. With this upgrade, we have added 120 compute nodes and a stroage system with a capacity of 11 PBytes. We named the whole system which includes both existing ABCI and the newly added equipments as ABCI 2.0. ABCI 2.0 provides the same software environment that ABCI provided. It enables that existing ABCI users can easily use the newly equipments in a similar way they used ABCI. We compared the performance of existing and new compute nodes and found that new nodes had 4.1 times higher performance than existing nodes in training ResNet-50 model using PyTorch. We expect that the new nodes largely contributes to increase the system throughput., ABCI is the world's first large-scale Open AI Computing Infrastructure for both developing AI technologies and bridging them into the industry, operated by AIST, Japan since August 2018. It delivers 19.88 petaflops of HPL performance and achieves 70 seconds for training ResNet-50 model in MLPerf Training v0.6. Last November we achieved world's fastest records for CosmoFlow and DeepCAM in MLPerf HPC benchmarks. ABCI was the fastest supercomputer in Japan until Fugaku made a spectacular debut, however, it soon became short of computing capacity and I/O performance due to the rapid expansion of its usage. This forced us to make a major upgrade to ABCI. With this upgrade, we have added 120 compute nodes and a stroage system with a capacity of 11 PBytes. We named the whole system which includes both existing ABCI and the newly added equipments as ABCI 2.0. ABCI 2.0 provides the same software environment that ABCI provided. It enables that existing ABCI users can easily use the newly equipments in a similar way they used ABCI. We compared the performance of existing and new compute nodes and found that new nodes had 4.1 times higher performance than existing nodes in training ResNet-50 model using PyTorch. We expect that the new nodes largely contributes to increase the system throughput.}, title = {ABCI 2.0: Advances in Open AI Computing Infrastructure at AIST}, year = {2021} }