{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00172916","sets":["1164:1579:8444:8870"]},"path":["8870"],"owner":"11","recid":"172916","title":["パラメータサーバを用いた並列機械学習システムにおける耐故障性のシミュレーション"],"pubdate":{"attribute_name":"公開日","attribute_value":"2016-08-01"},"_buckets":{"deposit":"df53e263-f84e-4e4e-8a87-37e60b0b5ede"},"_deposit":{"id":"172916","pid":{"type":"depid","value":"172916","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"パラメータサーバを用いた並列機械学習システムにおける耐故障性のシミュレーション","author_link":["352162","352163","352161","352164","352166","352165"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"パラメータサーバを用いた並列機械学習システムにおける耐故障性のシミュレーション"},{"subitem_title":"A simulation study on fault tolerancy of parallel machine learning systems with parameter servers","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"耐故障・信頼性","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2016-08-01","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"筑波大学/産業技術総合研究所"},{"subitem_text_value":"産業技術総合研究所/筑波大学"},{"subitem_text_value":"産業技術総合研究所/筑波大学"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"University of Tsukuba / National Institute of Advanced Industrial Science and Technology","subitem_text_language":"en"},{"subitem_text_value":"National Institute of Advanced Industrial Science and Technology / University of Tsukuba","subitem_text_language":"en"},{"subitem_text_value":"National Institute of Advanced Industrial Science and Technology / University of Tsukuba","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/172916/files/IPSJ-ARC16221020.pdf","label":"IPSJ-ARC16221020.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-ARC16221020.pdf","filesize":[{"value":"479.6 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"16"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"dd01d6aa-c21f-48d1-9522-34cee4e4d9af","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2016 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"黎, 明曦"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"谷村, 勇輔"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"中田, 秀基"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Mingxi, Li","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Yusuke, Tanimura","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Hidemoto, Nakada","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10096105","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8574","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"大規模なデータを対象とする機械学習システムの高速化には並列化が必須である.パラメータサーバと多数のワーカ計算機を用いるデータ並列機械学習システムにおいては,一般の大規模システムと同様に耐故障性が問題になるが,並列機械学習システムにおける耐故障性の議論は進んでいない.本稿ではパラメータサーバを用いた並列機械学習システムにおける耐故障性に関して議論し,シミュレーションを用いて大規模なシステムにおける定量的な評価を行う.その結果,パラメータサーバ上の情報を用いることでチェックポイントのコストを大幅に低減することができること,さらには,収束への悪影響を許容すれば,チェックポイントからのリカバリコストも低減できることを明らかにした.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Parallel computation is essential for machine learning systems to be more faster. There are two techniques to build parallel machine learning systems; namely data parallel method and model parallel method. In this paper, we only disuss data parallel where large number of parameter servers and computation servers communicate each other to perform computation. Fault tolerancy is a big problem on large scale computation system in general, however, there are not much discussions about the fault folerancy of parallel machine learning system, in this paper, we discuss the fault tolerancy of parallel machine learning systems which use parameter servers. Parameter servers gives extra redundancy to the system and could double as the checkpoint server. We also quantitatively evaluate several fault tolerance method using parallel environment simulator SimGrid.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告システム・アーキテクチャ(ARC)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2016-08-01","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"20","bibliographicVolumeNumber":"2016-ARC-221"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"id":172916,"updated":"2025-01-20T07:32:46.419255+00:00","links":{},"created":"2025-01-19T00:43:15.863792+00:00"}