@techreport{oai:ipsj.ixsq.nii.ac.jp:00182827, author = {Duo, Zhang and Mingxi, Li and Yusuke, Tanimura and Hidemoto, Nakada and Duo, Zhang and Mingxi, Li and Yusuke, Tanimura and Hidemoto, Nakada}, issue = {26}, month = {Jul}, note = {For modern machine learning systems, including deep learning systems, parallelization is inevitable since they are required to process massive amount of training data. One of the hot area of this area is the dataparallel learning where multiple nodes cooperate each other exchanging parameter / gradient periodically. In this paper, we focus on the network resource requirement for this kind of application. We investigate 3-layered Clos network and omega-network adding to the 2-layered fat tree network which we have already reported. As parameter exchange method, we tested direct parameter exchange method and centralized server method. We evaluated these three types of network with SimGrid, a simulator for distributed environment, and confirmed that with suitable parameter exchange methods, we can maintain performance with higher over subscription factor., For modern machine learning systems, including deep learning systems, parallelization is inevitable since they are required to process massive amount of training data. One of the hot area of this area is the dataparallel learning where multiple nodes cooperate each other exchanging parameter / gradient periodically. In this paper, we focus on the network resource requirement for this kind of application. We investigate 3-layered Clos network and omega-network adding to the 2-layered fat tree network which we have already reported. As parameter exchange method, we tested direct parameter exchange method and centralized server method. We evaluated these three types of network with SimGrid, a simulator for distributed environment, and confirmed that with suitable parameter exchange methods, we can maintain performance with higher over subscription factor.}, title = {A study on Network Structure and Parameter Exchange Method in large-scale Cluster for Machine Learning}, year = {2017} }