{"links":{},"id":158050,"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00158050","sets":["934:1119:8503:8504"]},"path":["8504"],"owner":"11","recid":"158050","title":["Scalable Work Stealing of Native Threads on an x86-64 Infiniband Cluster"],"pubdate":{"attribute_name":"公開日","attribute_value":"2016-03-08"},"_buckets":{"deposit":"40215097-583d-4bf2-9bc4-383f0cf8df41"},"_deposit":{"id":"158050","pid":{"type":"depid","value":"158050","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"Scalable Work Stealing of Native Threads on an x86-64 Infiniband Cluster","author_link":["300864","300866","300863","300865"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Scalable Work Stealing of Native Threads on an x86-64 Infiniband Cluster"},{"subitem_title":"Scalable Work Stealing of Native Threads on an x86-64 Infiniband Cluster","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[高性能計算] task parallelism, lightweight multithreading, thread migration, inter-node work stealing, remote direct memory access, Infiniband","subitem_subject_scheme":"Other"}]},"item_type_id":"3","publish_date":"2016-03-08","item_3_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"The University of Tokyo"},{"subitem_text_value":"The University of Tokyo"}]},"item_3_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"The University of Tokyo","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":11,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/158050/files/IPSJ-TACS0901003.pdf","label":"IPSJ-TACS0901003.pdf"},"date":[{"dateType":"Available","dateValue":"2018-03-08"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-TACS0901003.pdf","filesize":[{"value":"1.6 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"5"},{"tax":["include_tax"],"price":"0","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"16"},{"tax":["include_tax"],"price":"0","billingrole":"11"},{"tax":["include_tax"],"price":"0","billingrole":"14"},{"tax":["include_tax"],"price":"0","billingrole":"15"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"fdb3f784-d32d-456d-9095-8daeca7824b9","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2016 by the Information Processing Society of Japan"}]},"item_3_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Shigeki, Akiyama"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Kenjiro, Taura"}],"nameIdentifiers":[{}]}]},"item_3_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Shigeki, Akiyama","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Kenjiro, Taura","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_3_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11833852","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_3_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7829","subitem_source_identifier_type":"ISSN"}]},"item_3_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"Task parallelism on large-scale distributed memory environments is still a challenging problem. The focuses of our work are flexibility of task model and scalability of inter-node load balancing. General task models provide functionalities for suspending and resuming tasks at any program point, and such a model enables us flexible task scheduling to achieve higher processor utilization, locality-aware task placement, etc. To realize such a task model, we have to employ a thread―an execution context containing register values and stack frames―as a representation of a task, and implement thread migration for inter-node load balancing. However, an existing thread migration scheme, iso-address, has a scalability limitation: it requires virtual memory proportional to the number of processors in each node. In large-scale distributed memory environments, this results in a huge virtual memory usage beyond the virtual address space limit of current 64bit CPUs. Furthermore, this huge virtual memory consumption makes it impossible to implement one-sided work stealing with Remote Direct Memory Access (RDMA) operations. One-sided work stealing is a popular approach to achieving high efficiency of load balancing; therefore this also limits scalability of distributed memory task parallelism. In prior work, we propose uni-address, a new thread migration scheme which significantly reduces virtual memory usage for thread stacks and enables RDMA-based work stealing, and implements a lightweight multithread library supporting RDMA-based work stealing on top of Fujitsu FX10 system. In this paper, we port the library to an x86-64 Infiniband cluster with GASNet communication library. We develop one-sided and non one-sided implementations of inter-node work stealing, and evaluate the performance and efficiency of the work stealing implementations.\n\\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.24(2016) No.3(online)\n------------------------------","subitem_description_type":"Other"}]},"item_3_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Task parallelism on large-scale distributed memory environments is still a challenging problem. The focuses of our work are flexibility of task model and scalability of inter-node load balancing. General task models provide functionalities for suspending and resuming tasks at any program point, and such a model enables us flexible task scheduling to achieve higher processor utilization, locality-aware task placement, etc. To realize such a task model, we have to employ a thread―an execution context containing register values and stack frames―as a representation of a task, and implement thread migration for inter-node load balancing. However, an existing thread migration scheme, iso-address, has a scalability limitation: it requires virtual memory proportional to the number of processors in each node. In large-scale distributed memory environments, this results in a huge virtual memory usage beyond the virtual address space limit of current 64bit CPUs. Furthermore, this huge virtual memory consumption makes it impossible to implement one-sided work stealing with Remote Direct Memory Access (RDMA) operations. One-sided work stealing is a popular approach to achieving high efficiency of load balancing; therefore this also limits scalability of distributed memory task parallelism. In prior work, we propose uni-address, a new thread migration scheme which significantly reduces virtual memory usage for thread stacks and enables RDMA-based work stealing, and implements a lightweight multithread library supporting RDMA-based work stealing on top of Fujitsu FX10 system. In this paper, we port the library to an x86-64 Infiniband cluster with GASNet communication library. We develop one-sided and non one-sided implementations of inter-node work stealing, and evaluate the performance and efficiency of the work stealing implementations.\n\\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.24(2016) No.3(online)\n------------------------------","subitem_description_type":"Other"}]},"item_3_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌コンピューティングシステム（ACS）"}],"bibliographicIssueDates":{"bibliographicIssueDate":"2016-03-08","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"1","bibliographicVolumeNumber":"9"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"created":"2025-01-19T00:31:50.271748+00:00","updated":"2025-01-20T06:55:05.381222+00:00"}