[{"abstract":[{"text":"Determining the degree of inherent parallelism in classical sequential algorithms and leveraging it for fast parallel execution is a key topic in parallel computing, and detailed analyses are known for a wide range of classical algorithms. In this paper, we perform the first such analysis for the fundamental Union-Find problem, in which we are given a graph as a sequence of edges, and must maintain its connectivity structure under edge additions. We prove that classic sequential algorithms for this problem are well-parallelizable under reasonable assumptions, addressing a conjecture by [Blelloch, 2017]. More precisely, we show via a new potential argument that, under uniform random edge ordering, parallel union-find operations are unlikely to interfere: T concurrent threads processing the graph in parallel will encounter memory contention O(T2 · log |V| · log |E|) times in expectation, where |E| and |V| are the number of edges and nodes in the graph, respectively. We leverage this result to design a new parallel Union-Find algorithm that is both internally deterministic, i.e., its results are guaranteed to match those of a sequential execution, but also work-efficient and scalable, as long as the number of threads T is O(|E|1 over 3 - ε), for an arbitrarily small constant ε > 0, which holds for most large real-world graphs. We present lower bounds which show that our analysis is close to optimal, and experimental results suggesting that the performance cost of internal determinism is limited.","lang":"eng"}],"file":[{"checksum":"72e312aabf0c5248c99b5cd3a88e4c88","content_type":"application/pdf","file_size":2087937,"creator":"dernst","access_level":"open_access","file_name":"2023_SPAA_Fedorov.pdf","relation":"main_file","date_created":"2023-07-31T10:53:08Z","success":1,"file_id":"13334","date_updated":"2023-07-31T10:53:08Z"}],"publication_status":"published","article_processing_charge":"Yes (in subscription journal)","oa_version":"Published Version","quality_controlled":"1","_id":"13262","year":"2023","date_created":"2023-07-23T22:01:12Z","title":"Provably-efficient and internally-deterministic parallel Union-Find","external_id":{"arxiv":["2304.09331"]},"has_accepted_license":"1","citation":{"short":"A. Fedorov, D. Hashemi, G. Nadiradze, D.-A. Alistarh, in:, Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures, Association for Computing Machinery, 2023, pp. 261–271.","chicago":"Fedorov, Alexander, Diba Hashemi, Giorgi Nadiradze, and Dan-Adrian Alistarh. “Provably-Efficient and Internally-Deterministic Parallel Union-Find.” In <i>Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures</i>, 261–71. Association for Computing Machinery, 2023. <a href=\"https://doi.org/10.1145/3558481.3591082\">https://doi.org/10.1145/3558481.3591082</a>.","ista":"Fedorov A, Hashemi D, Nadiradze G, Alistarh D-A. 2023. Provably-efficient and internally-deterministic parallel Union-Find. Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures. SPAA: Symposium on Parallelism in Algorithms and Architectures, 261–271.","mla":"Fedorov, Alexander, et al. “Provably-Efficient and Internally-Deterministic Parallel Union-Find.” <i>Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures</i>, Association for Computing Machinery, 2023, pp. 261–71, doi:<a href=\"https://doi.org/10.1145/3558481.3591082\">10.1145/3558481.3591082</a>.","ieee":"A. Fedorov, D. Hashemi, G. Nadiradze, and D.-A. Alistarh, “Provably-efficient and internally-deterministic parallel Union-Find,” in <i>Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures</i>, Orlando, FL, United States, 2023, pp. 261–271.","ama":"Fedorov A, Hashemi D, Nadiradze G, Alistarh D-A. Provably-efficient and internally-deterministic parallel Union-Find. In: <i>Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures</i>. Association for Computing Machinery; 2023:261-271. doi:<a href=\"https://doi.org/10.1145/3558481.3591082\">10.1145/3558481.3591082</a>","apa":"Fedorov, A., Hashemi, D., Nadiradze, G., &#38; Alistarh, D.-A. (2023). Provably-efficient and internally-deterministic parallel Union-Find. In <i>Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures</i> (pp. 261–271). Orlando, FL, United States: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3558481.3591082\">https://doi.org/10.1145/3558481.3591082</a>"},"date_updated":"2023-07-31T10:54:32Z","author":[{"id":"2e711909-896a-11ed-bdf8-eb0f5a2984c6","full_name":"Fedorov, Alexander","first_name":"Alexander","last_name":"Fedorov"},{"last_name":"Hashemi","first_name":"Diba","full_name":"Hashemi, Diba","id":"ed9595ea-2f8f-11ee-ba95-d2b546540783"},{"last_name":"Nadiradze","first_name":"Giorgi","full_name":"Nadiradze, Giorgi","id":"3279A00C-F248-11E8-B48F-1D18A9856A87"},{"full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","last_name":"Alistarh"}],"oa":1,"publication_identifier":{"isbn":["9781450395458"]},"publication":"Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures","day":"17","month":"06","department":[{"_id":"DaAl"},{"_id":"GradSch"}],"publisher":"Association for Computing Machinery","page":"261-271","conference":{"start_date":"2023-06-17","name":"SPAA: Symposium on Parallelism in Algorithms and Architectures","location":"Orlando, FL, United States","end_date":"2023-06-19"},"arxiv":1,"scopus_import":"1","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"status":"public","language":[{"iso":"eng"}],"date_published":"2023-06-17T00:00:00Z","doi":"10.1145/3558481.3591082","ddc":["000"],"type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","file_date_updated":"2023-07-31T10:53:08Z"},{"doi":"10.1145/3503221.3508432","date_published":"2022-04-02T00:00:00Z","language":[{"iso":"eng"}],"status":"public","type":"conference","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","publisher":"Association for Computing Machinery","department":[{"_id":"DaAl"}],"month":"04","isi":1,"main_file_link":[{"open_access":"1","url":" https://doi.org/10.48550/arXiv.2109.00657"}],"arxiv":1,"page":"353-367","conference":{"name":"PPoPP: Sympopsium on Principles and Practice of Parallel Programming","location":"Seoul, Republic of Korea","start_date":"2022-04-02","end_date":"2022-04-06"},"acknowledgement":"We would like to thank the anonymous reviewers for their useful comments. This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).","scopus_import":"1","project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning"}],"external_id":{"arxiv":["2109.00657"],"isi":["000883318200025"]},"title":"Multi-queues can be state-of-the-art priority schedulers","date_updated":"2023-08-03T06:48:35Z","author":[{"full_name":"Postnikova, Anastasiia","last_name":"Postnikova","first_name":"Anastasiia"},{"first_name":"Nikita","last_name":"Koval","id":"2F4DB10C-F248-11E8-B48F-1D18A9856A87","full_name":"Koval, Nikita"},{"full_name":"Nadiradze, Giorgi","id":"3279A00C-F248-11E8-B48F-1D18A9856A87","first_name":"Giorgi","last_name":"Nadiradze"},{"orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","first_name":"Dan-Adrian"}],"citation":{"ama":"Postnikova A, Koval N, Nadiradze G, Alistarh D-A. Multi-queues can be state-of-the-art priority schedulers. In: <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>. Association for Computing Machinery; 2022:353-367. doi:<a href=\"https://doi.org/10.1145/3503221.3508432\">10.1145/3503221.3508432</a>","apa":"Postnikova, A., Koval, N., Nadiradze, G., &#38; Alistarh, D.-A. (2022). Multi-queues can be state-of-the-art priority schedulers. In <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i> (pp. 353–367). Seoul, Republic of Korea: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3503221.3508432\">https://doi.org/10.1145/3503221.3508432</a>","ieee":"A. Postnikova, N. Koval, G. Nadiradze, and D.-A. Alistarh, “Multi-queues can be state-of-the-art priority schedulers,” in <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>, Seoul, Republic of Korea, 2022, pp. 353–367.","mla":"Postnikova, Anastasiia, et al. “Multi-Queues Can Be State-of-the-Art Priority Schedulers.” <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>, Association for Computing Machinery, 2022, pp. 353–67, doi:<a href=\"https://doi.org/10.1145/3503221.3508432\">10.1145/3503221.3508432</a>.","ista":"Postnikova A, Koval N, Nadiradze G, Alistarh D-A. 2022. Multi-queues can be state-of-the-art priority schedulers. Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming. PPoPP: Sympopsium on Principles and Practice of Parallel Programming, 353–367.","chicago":"Postnikova, Anastasiia, Nikita Koval, Giorgi Nadiradze, and Dan-Adrian Alistarh. “Multi-Queues Can Be State-of-the-Art Priority Schedulers.” In <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>, 353–67. Association for Computing Machinery, 2022. <a href=\"https://doi.org/10.1145/3503221.3508432\">https://doi.org/10.1145/3503221.3508432</a>.","short":"A. Postnikova, N. Koval, G. Nadiradze, D.-A. Alistarh, in:, Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, Association for Computing Machinery, 2022, pp. 353–367."},"day":"02","publication_identifier":{"isbn":["9781450392044"]},"publication":"Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","oa":1,"related_material":{"record":[{"relation":"research_data","id":"13076","status":"public"}]},"publication_status":"published","abstract":[{"lang":"eng","text":"Designing and implementing efficient parallel priority schedulers is an active research area. An intriguing proposed design is the Multi-Queue: given n threads and m ≥ n distinct priority queues, task insertions are performed uniformly at random, while, to delete, a thread picks two queues uniformly at random, and removes the observed task of higher priority. This approach scales well, and has probabilistic rank guarantees: roughly, the rank of each task removed, relative to remaining tasks in all other queues, is O (m) in expectation. Yet, the performance of this pattern is below that of well-engineered schedulers, which eschew theoretical guarantees for practical efficiency.\r\n\r\nWe investigate whether it is possible to design and implement a Multi-Queue-based task scheduler that is both highly-efficient and has analytical guarantees. We propose a new variant called the Stealing Multi-Queue (SMQ), a cache-efficient variant of the Multi-Queue, which leverages both queue affinity---each thread has a local queue, from which tasks are usually removed; but, with some probability, threads also attempt to steal higher-priority tasks from the other queues---and task batching, that is, the processing of several tasks in a single insert / remove step. These ideas are well-known for task scheduling without priorities; our theoretical contribution is showing that, despite relaxations, this design can still provide rank guarantees, which in turn implies bounds on total work performed. We provide a general SMQ implementation which can surpass state-of-the-art schedulers such as OBIM and PMOD in terms of performance on popular graph-processing benchmarks. Notably, the performance improvement comes mainly from the superior rank guarantees provided by our scheduler, confirming that analytically-reasoned approaches can still provide performance improvements for priority task scheduling."}],"quality_controlled":"1","oa_version":"Preprint","article_processing_charge":"No","ec_funded":1,"date_created":"2022-04-17T22:01:46Z","year":"2022","_id":"11180"},{"_id":"13076","year":"2022","date_created":"2023-05-23T17:05:40Z","article_processing_charge":"No","oa_version":"Published Version","main_file_link":[{"url":"https://doi.org/10.5281/zenodo.5813846","open_access":"1"}],"abstract":[{"lang":"eng","text":"The source code for replicating experiments presented in the paper.\r\n\r\nThe implementation of the designed priority schedulers can be found in Galois-2.2.1/include/Galois/WorkList/:\r\nStealingMultiQueue.h is the StealingMultiQueue.\r\nMQOptimized/ contains MQ Optimized variants.\r\n\r\nWe provide images that contain all the dependencies and datasets. Images can be pulled from npostnikova/mq-based-schedulers repository, or downloaded from Zenodo. See readme for more detail."}],"month":"01","department":[{"_id":"DaAl"}],"related_material":{"link":[{"relation":"software","url":"https://github.com/npostnikova/mq-based-schedulers/tree/v1.1"}],"record":[{"relation":"used_in_publication","status":"public","id":"11180"}]},"publisher":"Zenodo","oa":1,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","day":"03","citation":{"mla":"Postnikova, Anastasiia, et al. <i>Multi-Queues Can Be State-of-the-Art Priority Schedulers</i>. Zenodo, 2022, doi:<a href=\"https://doi.org/10.5281/ZENODO.5733408\">10.5281/ZENODO.5733408</a>.","apa":"Postnikova, A., Koval, N., Nadiradze, G., &#38; Alistarh, D.-A. (2022). Multi-queues can be state-of-the-art priority schedulers. Zenodo. <a href=\"https://doi.org/10.5281/ZENODO.5733408\">https://doi.org/10.5281/ZENODO.5733408</a>","ieee":"A. Postnikova, N. Koval, G. Nadiradze, and D.-A. Alistarh, “Multi-queues can be state-of-the-art priority schedulers.” Zenodo, 2022.","ama":"Postnikova A, Koval N, Nadiradze G, Alistarh D-A. Multi-queues can be state-of-the-art priority schedulers. 2022. doi:<a href=\"https://doi.org/10.5281/ZENODO.5733408\">10.5281/ZENODO.5733408</a>","chicago":"Postnikova, Anastasiia, Nikita Koval, Giorgi Nadiradze, and Dan-Adrian Alistarh. “Multi-Queues Can Be State-of-the-Art Priority Schedulers.” Zenodo, 2022. <a href=\"https://doi.org/10.5281/ZENODO.5733408\">https://doi.org/10.5281/ZENODO.5733408</a>.","ista":"Postnikova A, Koval N, Nadiradze G, Alistarh D-A. 2022. Multi-queues can be state-of-the-art priority schedulers, Zenodo, <a href=\"https://doi.org/10.5281/ZENODO.5733408\">10.5281/ZENODO.5733408</a>.","short":"A. Postnikova, N. Koval, G. Nadiradze, D.-A. Alistarh, (2022)."},"type":"research_data_reference","author":[{"last_name":"Postnikova","first_name":"Anastasiia","full_name":"Postnikova, Anastasiia"},{"full_name":"Koval, Nikita","id":"2F4DB10C-F248-11E8-B48F-1D18A9856A87","last_name":"Koval","first_name":"Nikita"},{"full_name":"Nadiradze, Giorgi","id":"3279A00C-F248-11E8-B48F-1D18A9856A87","last_name":"Nadiradze","first_name":"Giorgi"},{"last_name":"Alistarh","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian"}],"date_updated":"2023-08-03T06:48:34Z","ddc":["510"],"status":"public","title":"Multi-queues can be state-of-the-art priority schedulers","date_published":"2022-01-03T00:00:00Z","doi":"10.5281/ZENODO.5733408"},{"ddc":["000"],"intvolume":"       209","alternative_title":["LIPIcs"],"status":"public","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"language":[{"iso":"eng"}],"date_published":"2021-10-04T00:00:00Z","doi":"10.4230/LIPIcs.DISC.2021.4","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","file_date_updated":"2021-11-12T09:33:26Z","volume":209,"type":"conference","month":"10","department":[{"_id":"DaAl"}],"publisher":"Schloss Dagstuhl - Leibniz Zentrum für Informatik","scopus_import":"1","acknowledgement":"Dan Alistarh: Supported in part by the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML). Giorgi Nadiradze: Supported in part by the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML). The authors would like to thank the DISC anonymous reviewers for their useful\r\nfeedback and comments.","conference":{"end_date":"2021-10-08","start_date":"2021-10-04","location":"Freiburg, Germany","name":"DISC: Distributed Computing"},"article_number":"4","has_accepted_license":"1","title":"Lower bounds for shared-memory leader election under bounded write contention","project":[{"grant_number":"805223","call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning"}],"oa":1,"publication":"35th International Symposium on Distributed Computing","publication_identifier":{"isbn":["9-783-9597-7210-5"],"issn":["1868-8969"]},"day":"04","author":[{"full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","last_name":"Alistarh"},{"full_name":"Gelashvili, Rati","last_name":"Gelashvili","first_name":"Rati"},{"id":"3279A00C-F248-11E8-B48F-1D18A9856A87","full_name":"Nadiradze, Giorgi","first_name":"Giorgi","last_name":"Nadiradze"}],"date_updated":"2022-08-19T07:23:28Z","citation":{"short":"D.-A. Alistarh, R. Gelashvili, G. Nadiradze, in:, 35th International Symposium on Distributed Computing, Schloss Dagstuhl - Leibniz Zentrum für Informatik, 2021.","ama":"Alistarh D-A, Gelashvili R, Nadiradze G. Lower bounds for shared-memory leader election under bounded write contention. In: <i>35th International Symposium on Distributed Computing</i>. Vol 209. Schloss Dagstuhl - Leibniz Zentrum für Informatik; 2021. doi:<a href=\"https://doi.org/10.4230/LIPIcs.DISC.2021.4\">10.4230/LIPIcs.DISC.2021.4</a>","apa":"Alistarh, D.-A., Gelashvili, R., &#38; Nadiradze, G. (2021). Lower bounds for shared-memory leader election under bounded write contention. In <i>35th International Symposium on Distributed Computing</i> (Vol. 209). Freiburg, Germany: Schloss Dagstuhl - Leibniz Zentrum für Informatik. <a href=\"https://doi.org/10.4230/LIPIcs.DISC.2021.4\">https://doi.org/10.4230/LIPIcs.DISC.2021.4</a>","ieee":"D.-A. Alistarh, R. Gelashvili, and G. Nadiradze, “Lower bounds for shared-memory leader election under bounded write contention,” in <i>35th International Symposium on Distributed Computing</i>, Freiburg, Germany, 2021, vol. 209.","mla":"Alistarh, Dan-Adrian, et al. “Lower Bounds for Shared-Memory Leader Election under Bounded Write Contention.” <i>35th International Symposium on Distributed Computing</i>, vol. 209, 4, Schloss Dagstuhl - Leibniz Zentrum für Informatik, 2021, doi:<a href=\"https://doi.org/10.4230/LIPIcs.DISC.2021.4\">10.4230/LIPIcs.DISC.2021.4</a>.","ista":"Alistarh D-A, Gelashvili R, Nadiradze G. 2021. Lower bounds for shared-memory leader election under bounded write contention. 35th International Symposium on Distributed Computing. DISC: Distributed Computing, LIPIcs, vol. 209, 4.","chicago":"Alistarh, Dan-Adrian, Rati Gelashvili, and Giorgi Nadiradze. “Lower Bounds for Shared-Memory Leader Election under Bounded Write Contention.” In <i>35th International Symposium on Distributed Computing</i>, Vol. 209. Schloss Dagstuhl - Leibniz Zentrum für Informatik, 2021. <a href=\"https://doi.org/10.4230/LIPIcs.DISC.2021.4\">https://doi.org/10.4230/LIPIcs.DISC.2021.4</a>."},"article_processing_charge":"No","oa_version":"Published Version","quality_controlled":"1","abstract":[{"lang":"eng","text":"This paper gives tight logarithmic lower bounds on the solo step complexity of leader election in an asynchronous shared-memory model with single-writer multi-reader (SWMR) registers, for both deterministic and randomized obstruction-free algorithms. The approach extends to lower bounds for deterministic and randomized obstruction-free algorithms using multi-writer registers under bounded write concurrency, showing a trade-off between the solo step complexity of a leader election algorithm, and the worst-case number of stalls incurred by a processor in an execution."}],"file":[{"access_level":"open_access","creator":"cchlebak","checksum":"b4cdc6668c899a601c5e6a96b8ca54d9","file_size":706791,"content_type":"application/pdf","date_updated":"2021-11-12T09:33:26Z","file_name":"2021_LIPIcsDISC_Alistarh.pdf","file_id":"10277","relation":"main_file","date_created":"2021-11-12T09:33:26Z","success":1}],"publication_status":"published","_id":"10217","year":"2021","date_created":"2021-11-07T23:01:23Z","ec_funded":1},{"degree_awarded":"PhD","publisher":"Institute of Science and Technology Austria","department":[{"_id":"GradSch"},{"_id":"DaAl"}],"month":"12","page":"132","ddc":["000"],"alternative_title":["ISTA Thesis"],"language":[{"iso":"eng"}],"status":"public","doi":"10.15479/at:ista:10429","date_published":"2021-12-09T00:00:00Z","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","file_date_updated":"2022-03-28T12:55:12Z","type":"dissertation","oa_version":"Published Version","article_processing_charge":"No","file":[{"date_updated":"2021-12-09T17:47:49Z","relation":"main_file","date_created":"2021-12-09T17:47:49Z","success":1,"file_id":"10436","file_name":"Thesis_Final_09_12_2021.pdf","creator":"gnadirad","access_level":"open_access","content_type":"application/pdf","file_size":2370859,"checksum":"6bf14e9a523387328f016c0689f5e10e"},{"date_updated":"2022-03-28T12:55:12Z","file_name":"Thesis_Final_09_12_2021.zip","relation":"source_file","date_created":"2021-12-09T17:47:49Z","file_id":"10437","creator":"gnadirad","access_level":"closed","checksum":"914d6c5ca86bd0add471971a8f4c4341","content_type":"application/zip","file_size":2596924}],"abstract":[{"text":"The scalability of concurrent data structures and distributed algorithms strongly depends on\r\nreducing the contention for shared resources and the costs of synchronization and communication. We show how such cost reductions can be attained by relaxing the strict consistency conditions required by sequential implementations. In the first part of the thesis, we consider relaxation in the context of concurrent data structures. Specifically, in data structures \r\nsuch as priority queues, imposing strong semantics renders scalability impossible, since a correct implementation of the remove operation should return only the element with highest priority. Intuitively, attempting to invoke remove operations concurrently  creates a race condition. This bottleneck  can be circumvented by relaxing semantics of the affected data structure, thus allowing removal of the elements which are no longer required to have the highest priority. We prove that the randomized implementations of relaxed data structures provide provable guarantees on the priority of the removed elements even under concurrency. Additionally, we show that in some cases the relaxed data structures can be used to scale the classical algorithms which are usually implemented with the exact ones. In the second part, we study parallel variants of the  stochastic gradient descent (SGD) algorithm, which distribute computation  among the multiple processors, thus reducing the running time. Unfortunately, in order for standard parallel SGD to succeed, each processor has to maintain a local copy of the necessary model parameter, which is identical to the local copies of other processors; the overheads from this perfect consistency in terms of communication and synchronization can negate the speedup gained by distributing the computation. We show that the consistency conditions required by SGD can be  relaxed, allowing the algorithm to be more flexible in terms of tolerating quantized communication, asynchrony, or even crash faults, while its convergence remains asymptotically the same.","lang":"eng"}],"related_material":{"record":[{"relation":"part_of_dissertation","id":"10432","status":"public"},{"status":"public","id":"6673","relation":"part_of_dissertation"},{"relation":"part_of_dissertation","status":"public","id":"5965"},{"status":"public","id":"10435","relation":"part_of_dissertation"}]},"publication_status":"published","year":"2021","_id":"10429","ec_funded":1,"date_created":"2021-12-08T21:52:28Z","has_accepted_license":"1","project":[{"name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020","grant_number":"805223","_id":"268A44D6-B435-11E9-9278-68D0E5697425"}],"title":"On achieving scalability through relaxation","publication_identifier":{"issn":["2663-337X"]},"oa":1,"day":"09","supervisor":[{"last_name":"Alistarh","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian"}],"citation":{"short":"G. Nadiradze, On Achieving Scalability through Relaxation, Institute of Science and Technology Austria, 2021.","ista":"Nadiradze G. 2021. On achieving scalability through relaxation. Institute of Science and Technology Austria.","chicago":"Nadiradze, Giorgi. “On Achieving Scalability through Relaxation.” Institute of Science and Technology Austria, 2021. <a href=\"https://doi.org/10.15479/at:ista:10429\">https://doi.org/10.15479/at:ista:10429</a>.","apa":"Nadiradze, G. (2021). <i>On achieving scalability through relaxation</i>. Institute of Science and Technology Austria. <a href=\"https://doi.org/10.15479/at:ista:10429\">https://doi.org/10.15479/at:ista:10429</a>","ieee":"G. Nadiradze, “On achieving scalability through relaxation,” Institute of Science and Technology Austria, 2021.","ama":"Nadiradze G. On achieving scalability through relaxation. 2021. doi:<a href=\"https://doi.org/10.15479/at:ista:10429\">10.15479/at:ista:10429</a>","mla":"Nadiradze, Giorgi. <i>On Achieving Scalability through Relaxation</i>. Institute of Science and Technology Austria, 2021, doi:<a href=\"https://doi.org/10.15479/at:ista:10429\">10.15479/at:ista:10429</a>."},"author":[{"last_name":"Nadiradze","first_name":"Giorgi","orcid":"0000-0001-5634-0731","full_name":"Nadiradze, Giorgi","id":"3279A00C-F248-11E8-B48F-1D18A9856A87"}],"date_updated":"2023-10-17T11:48:55Z"},{"conference":{"end_date":"2021-02-09","start_date":"2021-02-02","name":"AAAI: Association for the Advancement of Artificial Intelligence","location":"Virtual"},"page":"9037-9045","arxiv":1,"acknowledgement":"We would like to thank Christopher De Sa for his feedback on an earlier draft of this paper, as well as the anonymous AAAI reviewers for their useful comments. This project has received\r\nfunding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML). Bapi\r\nChatterjee was supported by the European Union’s Horizon 2020 research and innovation programme under the Marie Sklodowska-Curie grant agreement No. 754411 (ISTPlus).","month":"05","department":[{"_id":"DaAl"}],"main_file_link":[{"url":"https://ojs.aaai.org/index.php/AAAI/article/view/17092","open_access":"1"}],"volume":35,"type":"conference","user_id":"8b945eb4-e2f2-11eb-945a-df72226e66a9","status":"public","language":[{"iso":"eng"}],"date_published":"2021-05-18T00:00:00Z","intvolume":"        35","_id":"10432","year":"2021","ec_funded":1,"date_created":"2021-12-09T09:21:35Z","abstract":[{"lang":"eng","text":"One key element behind the recent progress of machine learning has been the ability to train machine learning models in large-scale distributed shared-memory and message-passing environments. Most of these models are trained employing variants of stochastic gradient descent (SGD) based optimization, but most methods involve some type of consistency relaxation relative to sequential SGD, to mitigate its large communication or synchronization costs at scale. In this paper, we introduce a general consistency condition covering communication-reduced and asynchronous distributed SGD implementations. Our framework, called elastic consistency, decouples the system-specific aspects of the implementation from the SGD convergence requirements, giving a general way to obtain convergence bounds for a wide variety of distributed SGD methods used in practice. Elastic consistency can be used to re-derive or improve several previous convergence bounds in message-passing and shared-memory settings, but also to analyze new models and distribution schemes. As a direct application, we propose and analyze a new synchronization-avoiding scheduling scheme for distributed SGD, and show that it can be used to efficiently train deep convolutional models for image classification."}],"publication_status":"published","related_material":{"record":[{"id":"10429","status":"public","relation":"dissertation_contains"}]},"article_processing_charge":"No","oa_version":"Published Version","quality_controlled":"1","author":[{"first_name":"Giorgi","last_name":"Nadiradze","id":"3279A00C-F248-11E8-B48F-1D18A9856A87","full_name":"Nadiradze, Giorgi","orcid":"0000-0001-5634-0731"},{"first_name":"Ilia","last_name":"Markov","id":"D0CF4148-C985-11E9-8066-0BDEE5697425","full_name":"Markov, Ilia"},{"orcid":"0000-0002-2742-4028","id":"3C41A08A-F248-11E8-B48F-1D18A9856A87","full_name":"Chatterjee, Bapi","last_name":"Chatterjee","first_name":"Bapi"},{"full_name":"Kungurtsev, Vyacheslav ","last_name":"Kungurtsev","first_name":"Vyacheslav "},{"last_name":"Alistarh","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X"}],"citation":{"short":"G. Nadiradze, I. Markov, B. Chatterjee, V. Kungurtsev, D.-A. Alistarh, in:, Proceedings of the AAAI Conference on Artificial Intelligence, 2021, pp. 9037–9045.","ama":"Nadiradze G, Markov I, Chatterjee B, Kungurtsev V, Alistarh D-A. Elastic consistency: A practical consistency model for distributed stochastic gradient descent. In: <i>Proceedings of the AAAI Conference on Artificial Intelligence</i>. Vol 35. ; 2021:9037-9045.","ieee":"G. Nadiradze, I. Markov, B. Chatterjee, V. Kungurtsev, and D.-A. Alistarh, “Elastic consistency: A practical consistency model for distributed stochastic gradient descent,” in <i>Proceedings of the AAAI Conference on Artificial Intelligence</i>, Virtual, 2021, vol. 35, no. 10, pp. 9037–9045.","apa":"Nadiradze, G., Markov, I., Chatterjee, B., Kungurtsev, V., &#38; Alistarh, D.-A. (2021). Elastic consistency: A practical consistency model for distributed stochastic gradient descent. In <i>Proceedings of the AAAI Conference on Artificial Intelligence</i> (Vol. 35, pp. 9037–9045). Virtual.","mla":"Nadiradze, Giorgi, et al. “Elastic Consistency: A Practical Consistency Model for Distributed Stochastic Gradient Descent.” <i>Proceedings of the AAAI Conference on Artificial Intelligence</i>, vol. 35, no. 10, 2021, pp. 9037–45.","ista":"Nadiradze G, Markov I, Chatterjee B, Kungurtsev V, Alistarh D-A. 2021. Elastic consistency: A practical consistency model for distributed stochastic gradient descent. Proceedings of the AAAI Conference on Artificial Intelligence. AAAI: Association for the Advancement of Artificial Intelligence vol. 35, 9037–9045.","chicago":"Nadiradze, Giorgi, Ilia Markov, Bapi Chatterjee, Vyacheslav  Kungurtsev, and Dan-Adrian Alistarh. “Elastic Consistency: A Practical Consistency Model for Distributed Stochastic Gradient Descent.” In <i>Proceedings of the AAAI Conference on Artificial Intelligence</i>, 35:9037–45, 2021."},"date_updated":"2023-09-07T13:31:39Z","oa":1,"publication":"Proceedings of the AAAI Conference on Artificial Intelligence","day":"18","title":"Elastic consistency: A practical consistency model for distributed stochastic gradient descent","external_id":{"arxiv":["2001.05918"]},"project":[{"call_identifier":"H2020","_id":"260C2330-B435-11E9-9278-68D0E5697425","grant_number":"754411","name":"ISTplus - Postdoctoral Fellowships"},{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning"}],"issue":"10"},{"quality_controlled":"1","oa_version":"Published Version","article_processing_charge":"No","publication_status":"published","related_material":{"record":[{"relation":"dissertation_contains","status":"public","id":"10429"}]},"abstract":[{"lang":"eng","text":"Decentralized optimization is emerging as a viable alternative for scalable distributed machine learning, but also introduces new challenges in terms of synchronization costs. To this end, several communication-reduction techniques, such as non-blocking communication, quantization, and local steps, have been explored in the decentralized setting. Due to the complexity of analyzing optimization in such a relaxed setting, this line of work often assumes \\emph{global} communication rounds, which require additional synchronization. In this paper, we consider decentralized optimization in the simpler, but harder to analyze, \\emph{asynchronous gossip} model, in which communication occurs in discrete, randomly chosen pairings among nodes. Perhaps surprisingly, we show that a variant of SGD called \\emph{SwarmSGD} still converges in this setting, even if \\emph{non-blocking communication}, \\emph{quantization}, and \\emph{local steps} are all applied \\emph{in conjunction}, and even if the node data distributions and underlying graph topology are both \\emph{heterogenous}. Our analysis is based on a new connection with multi-dimensional load-balancing processes. We implement this algorithm and deploy it in a super-computing environment, showing that it can outperform previous decentralized methods in terms of end-to-end training time, and that it can even rival carefully-tuned large-batch SGD for certain tasks."}],"ec_funded":1,"date_created":"2021-12-09T10:59:12Z","year":"2021","_id":"10435","project":[{"grant_number":"754411","call_identifier":"H2020","_id":"260C2330-B435-11E9-9278-68D0E5697425","name":"ISTplus - Postdoctoral Fellowships"},{"name":"Elastic Coordination for Scalable Machine Learning","_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","call_identifier":"H2020"}],"external_id":{"arxiv":["1910.12308"]},"title":"Asynchronous decentralized SGD with quantized and local updates","day":"01","publication":"35th Conference on Neural Information Processing Systems","oa":1,"date_updated":"2023-10-17T11:48:56Z","author":[{"id":"3279A00C-F248-11E8-B48F-1D18A9856A87","full_name":"Nadiradze, Giorgi","orcid":"0000-0001-5634-0731","last_name":"Nadiradze","first_name":"Giorgi"},{"id":"bcc145fd-e77f-11ea-ae8b-80d661dbff67","full_name":"Sabour, Amirmojtaba","last_name":"Sabour","first_name":"Amirmojtaba"},{"last_name":"Davies","first_name":"Peter","orcid":"0000-0002-5646-9524","full_name":"Davies, Peter","id":"11396234-BB50-11E9-B24C-90FCE5697425"},{"first_name":"Shigang","last_name":"Li","full_name":"Li, Shigang"},{"orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","last_name":"Alistarh"}],"citation":{"short":"G. Nadiradze, A. Sabour, P. Davies, S. Li, D.-A. Alistarh, in:, 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021.","mla":"Nadiradze, Giorgi, et al. “Asynchronous Decentralized SGD with Quantized and Local Updates.” <i>35th Conference on Neural Information Processing Systems</i>, Neural Information Processing Systems Foundation, 2021.","ieee":"G. Nadiradze, A. Sabour, P. Davies, S. Li, and D.-A. Alistarh, “Asynchronous decentralized SGD with quantized and local updates,” in <i>35th Conference on Neural Information Processing Systems</i>, Sydney, Australia, 2021.","ama":"Nadiradze G, Sabour A, Davies P, Li S, Alistarh D-A. Asynchronous decentralized SGD with quantized and local updates. In: <i>35th Conference on Neural Information Processing Systems</i>. Neural Information Processing Systems Foundation; 2021.","apa":"Nadiradze, G., Sabour, A., Davies, P., Li, S., &#38; Alistarh, D.-A. (2021). Asynchronous decentralized SGD with quantized and local updates. In <i>35th Conference on Neural Information Processing Systems</i>. Sydney, Australia: Neural Information Processing Systems Foundation.","chicago":"Nadiradze, Giorgi, Amirmojtaba Sabour, Peter Davies, Shigang Li, and Dan-Adrian Alistarh. “Asynchronous Decentralized SGD with Quantized and Local Updates.” In <i>35th Conference on Neural Information Processing Systems</i>. Neural Information Processing Systems Foundation, 2021.","ista":"Nadiradze G, Sabour A, Davies P, Li S, Alistarh D-A. 2021. Asynchronous decentralized SGD with quantized and local updates. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems."},"main_file_link":[{"open_access":"1","url":"https://papers.nips.cc/paper/2021/hash/362c99307cdc3f2d8b410652386a9dd1-Abstract.html"}],"department":[{"_id":"DaAl"}],"publisher":"Neural Information Processing Systems Foundation","month":"12","acknowledgement":"We gratefully acknowledge funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML). PD partly conducted this work while at IST Austria and was supported by the European Union’s Horizon 2020 programme under the Marie Skłodowska-Curie grant agreement No. 754411. SL was funded in part by European Research Council (ERC) under the European Union’s Horizon 2020 programme (grant agreement DAPP, No. 678880, and EPiGRAM-HS, No. 801039).\r\n","arxiv":1,"conference":{"end_date":"2021-12-14","start_date":"2021-12-06","location":"Sydney, Australia","name":"NeurIPS: Neural Information Processing Systems"},"date_published":"2021-12-01T00:00:00Z","language":[{"iso":"eng"}],"status":"public","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","type":"conference"},{"has_accepted_license":"1","title":"Dynamic averaging load balancing on cycles","external_id":{"isi":["000734004600001"],"arxiv":["2003.09297"]},"project":[{"name":"Elastic Coordination for Scalable Machine Learning","_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","grant_number":"805223"},{"name":"IST Austria Open Access Fund","_id":"B67AFEDC-15C9-11EA-A837-991A96BB2854"}],"day":"24","oa":1,"publication_identifier":{"eissn":["1432-0541"],"issn":["0178-4617"]},"publication":"Algorithmica","date_updated":"2024-03-05T07:35:53Z","citation":{"chicago":"Alistarh, Dan-Adrian, Giorgi Nadiradze, and Amirmojtaba Sabour. “Dynamic Averaging Load Balancing on Cycles.” <i>Algorithmica</i>. Springer Nature, 2021. <a href=\"https://doi.org/10.1007/s00453-021-00905-9\">https://doi.org/10.1007/s00453-021-00905-9</a>.","ista":"Alistarh D-A, Nadiradze G, Sabour A. 2021. Dynamic averaging load balancing on cycles. Algorithmica.","mla":"Alistarh, Dan-Adrian, et al. “Dynamic Averaging Load Balancing on Cycles.” <i>Algorithmica</i>, Springer Nature, 2021, doi:<a href=\"https://doi.org/10.1007/s00453-021-00905-9\">10.1007/s00453-021-00905-9</a>.","apa":"Alistarh, D.-A., Nadiradze, G., &#38; Sabour, A. (2021). Dynamic averaging load balancing on cycles. <i>Algorithmica</i>. Virtual, Online; Germany: Springer Nature. <a href=\"https://doi.org/10.1007/s00453-021-00905-9\">https://doi.org/10.1007/s00453-021-00905-9</a>","ama":"Alistarh D-A, Nadiradze G, Sabour A. Dynamic averaging load balancing on cycles. <i>Algorithmica</i>. 2021. doi:<a href=\"https://doi.org/10.1007/s00453-021-00905-9\">10.1007/s00453-021-00905-9</a>","ieee":"D.-A. Alistarh, G. Nadiradze, and A. Sabour, “Dynamic averaging load balancing on cycles,” <i>Algorithmica</i>. Springer Nature, 2021.","short":"D.-A. Alistarh, G. Nadiradze, A. Sabour, Algorithmica (2021)."},"author":[{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","last_name":"Alistarh","first_name":"Dan-Adrian"},{"orcid":"0000-0001-5634-0731","full_name":"Nadiradze, Giorgi","id":"3279A00C-F248-11E8-B48F-1D18A9856A87","first_name":"Giorgi","last_name":"Nadiradze"},{"id":"bcc145fd-e77f-11ea-ae8b-80d661dbff67","full_name":"Sabour, Amirmojtaba","first_name":"Amirmojtaba","last_name":"Sabour"}],"quality_controlled":"1","article_processing_charge":"Yes (via OA deal)","oa_version":"Published Version","related_material":{"record":[{"relation":"earlier_version","status":"public","id":"15077"}],"link":[{"url":"https://doi.org/10.4230/LIPIcs.ICALP.2020.7","relation":"earlier_version"}]},"publication_status":"published","article_type":"original","abstract":[{"lang":"eng","text":"We consider the following dynamic load-balancing process: given an underlying graph G with n nodes, in each step t≥ 0, one unit of load is created, and placed at a randomly chosen graph node. In the same step, the chosen node picks a random neighbor, and the two nodes balance their loads by averaging them. We are interested in the expected gap between the minimum and maximum loads at nodes as the process progresses, and its dependence on n and on the graph structure. Variants of the above graphical balanced allocation process have been studied previously by Peres, Talwar, and Wieder [Peres et al., 2015], and by Sauerwald and Sun [Sauerwald and Sun, 2015]. These authors left as open the question of characterizing the gap in the case of cycle graphs in the dynamic case, where weights are created during the algorithm’s execution. For this case, the only known upper bound is of 𝒪(n log n), following from a majorization argument due to [Peres et al., 2015], which analyzes a related graphical allocation process. In this paper, we provide an upper bound of 𝒪 (√n log n) on the expected gap of the above process for cycles of length n. We introduce a new potential analysis technique, which enables us to bound the difference in load between k-hop neighbors on the cycle, for any k ≤ n/2. We complement this with a \"gap covering\" argument, which bounds the maximum value of the gap by bounding its value across all possible subsets of a certain structure, and recursively bounding the gaps within each subset. We provide analytical and experimental evidence that our upper bound on the gap is tight up to a logarithmic factor. "}],"file":[{"checksum":"21169b25b0c8e17b21e12af22bff9870","content_type":"application/pdf","file_size":525950,"creator":"cchlebak","access_level":"open_access","file_name":"2021_Algorithmica_Alistarh.pdf","date_created":"2021-12-27T10:36:40Z","relation":"main_file","success":1,"file_id":"10577","date_updated":"2021-12-27T10:36:40Z"}],"ec_funded":1,"date_created":"2020-08-24T06:24:04Z","_id":"8286","year":"2021","ddc":["000"],"date_published":"2021-12-24T00:00:00Z","doi":"10.1007/s00453-021-00905-9","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"status":"public","language":[{"iso":"eng"}],"file_date_updated":"2021-12-27T10:36:40Z","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","type":"journal_article","isi":1,"month":"12","department":[{"_id":"DaAl"}],"publisher":"Springer Nature","acknowledgement":"The authors sincerely thank Thomas Sauerwald and George Giakkoupis for insightful discussions, and Mohsen Ghaffari, Yuval Peres, and Udi Wieder for feedback on earlier versions of this draft. We also thank the ICALP anonymous reviewers for their very useful comments. Open access funding provided by Institute of Science and Technology (IST Austria). Funding was provided by European Research Council (Grant No. PR1042ERC01).","scopus_import":"1","arxiv":1,"conference":{"end_date":"2020-07-11","start_date":"2020-07-08","location":"Virtual, Online; Germany","name":"ICALP: International Colloquium on Automata, Languages, and Programming "}},{"_id":"8723","year":"2021","date_created":"2020-11-05T15:25:43Z","ec_funded":1,"abstract":[{"text":"Deep learning at scale is dominated by communication time. Distributing samples across nodes usually yields the best performance, but poses scaling challenges due to global information dissemination and load imbalance across uneven sample lengths. State-of-the-art decentralized optimizers mitigate the problem, but require more iterations to achieve the same accuracy as their globally-communicating counterparts. We present Wait-Avoiding Group Model Averaging (WAGMA) SGD, a wait-avoiding stochastic optimizer that reduces global communication via subgroup weight exchange. The key insight is a combination of algorithmic changes to the averaging scheme and the use of a group allreduce operation. We prove the convergence of WAGMA-SGD, and empirically show that it retains convergence rates similar to Allreduce-SGD. For evaluation, we train ResNet-50 on ImageNet; Transformer for machine translation; and deep reinforcement learning for navigation at scale. Compared with state-of-the-art decentralized SGD variants, WAGMA-SGD significantly improves training throughput (e.g., 2.1× on 1,024 GPUs for reinforcement learning), and achieves the fastest time-to-solution (e.g., the highest score using the shortest training time for Transformer).","lang":"eng"}],"article_type":"original","publication_status":"published","article_processing_charge":"No","oa_version":"Preprint","quality_controlled":"1","author":[{"first_name":"Shigang","last_name":"Li","full_name":"Li, Shigang"},{"full_name":"Tal Ben-Nun, Tal Ben-Nun","first_name":"Tal Ben-Nun","last_name":"Tal Ben-Nun"},{"id":"3279A00C-F248-11E8-B48F-1D18A9856A87","full_name":"Nadiradze, Giorgi","first_name":"Giorgi","last_name":"Nadiradze"},{"full_name":"Girolamo, Salvatore Di","last_name":"Girolamo","first_name":"Salvatore Di"},{"first_name":"Nikoli","last_name":"Dryden","full_name":"Dryden, Nikoli"},{"first_name":"Dan-Adrian","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X"},{"full_name":"Hoefler, Torsten","last_name":"Hoefler","first_name":"Torsten"}],"citation":{"chicago":"Li, Shigang, Tal Ben-Nun Tal Ben-Nun, Giorgi Nadiradze, Salvatore Di Girolamo, Nikoli Dryden, Dan-Adrian Alistarh, and Torsten Hoefler. “Breaking (Global) Barriers in Parallel Stochastic Optimization with Wait-Avoiding Group Averaging.” <i>IEEE Transactions on Parallel and Distributed Systems</i>. IEEE, 2021. <a href=\"https://doi.org/10.1109/TPDS.2020.3040606\">https://doi.org/10.1109/TPDS.2020.3040606</a>.","ista":"Li S, Tal Ben-Nun TB-N, Nadiradze G, Girolamo SD, Dryden N, Alistarh D-A, Hoefler T. 2021. Breaking (global) barriers in parallel stochastic optimization with wait-avoiding group averaging. IEEE Transactions on Parallel and Distributed Systems. 32(7), 9271898.","mla":"Li, Shigang, et al. “Breaking (Global) Barriers in Parallel Stochastic Optimization with Wait-Avoiding Group Averaging.” <i>IEEE Transactions on Parallel and Distributed Systems</i>, vol. 32, no. 7, 9271898, IEEE, 2021, doi:<a href=\"https://doi.org/10.1109/TPDS.2020.3040606\">10.1109/TPDS.2020.3040606</a>.","apa":"Li, S., Tal Ben-Nun, T. B.-N., Nadiradze, G., Girolamo, S. D., Dryden, N., Alistarh, D.-A., &#38; Hoefler, T. (2021). Breaking (global) barriers in parallel stochastic optimization with wait-avoiding group averaging. <i>IEEE Transactions on Parallel and Distributed Systems</i>. IEEE. <a href=\"https://doi.org/10.1109/TPDS.2020.3040606\">https://doi.org/10.1109/TPDS.2020.3040606</a>","ieee":"S. Li <i>et al.</i>, “Breaking (global) barriers in parallel stochastic optimization with wait-avoiding group averaging,” <i>IEEE Transactions on Parallel and Distributed Systems</i>, vol. 32, no. 7. IEEE, 2021.","ama":"Li S, Tal Ben-Nun TB-N, Nadiradze G, et al. Breaking (global) barriers in parallel stochastic optimization with wait-avoiding group averaging. <i>IEEE Transactions on Parallel and Distributed Systems</i>. 2021;32(7). doi:<a href=\"https://doi.org/10.1109/TPDS.2020.3040606\">10.1109/TPDS.2020.3040606</a>","short":"S. Li, T.B.-N. Tal Ben-Nun, G. Nadiradze, S.D. Girolamo, N. Dryden, D.-A. Alistarh, T. Hoefler, IEEE Transactions on Parallel and Distributed Systems 32 (2021)."},"date_updated":"2023-08-04T11:08:52Z","oa":1,"publication":"IEEE Transactions on Parallel and Distributed Systems","publication_identifier":{"issn":["10459219"]},"day":"01","external_id":{"arxiv":["2005.00124"],"isi":["000621405200019"]},"title":"Breaking (global) barriers in parallel stochastic optimization with wait-avoiding group averaging","project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning"}],"issue":"7","article_number":"9271898","arxiv":1,"scopus_import":"1","acknowledgement":"This project has received funding from the European Research Council (ERC) under the European Union’s Hori-\r\nzon 2020 programme under Grant DAPP, Grant 678880; EPi-GRAM-HS, Grant 801039; and ERC Starting Grant ScaleML, Grant 805223. The work of Tal Ben-Nun is supported by the Swiss National Science Foundation (Ambizione Project No. 185778). The work of Nikoli Dryden is supported by the ETH Postdoctoral Fellowship. The authors would like to thank the Swiss National Supercomputing Center for providing the computing resources and technical support.","month":"07","publisher":"IEEE","department":[{"_id":"DaAl"}],"isi":1,"main_file_link":[{"url":"https://arxiv.org/abs/2005.00124","open_access":"1"}],"volume":32,"type":"journal_article","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","status":"public","language":[{"iso":"eng"}],"date_published":"2021-07-01T00:00:00Z","doi":"10.1109/TPDS.2020.3040606","intvolume":"        32"},{"oa":1,"publication":"47th International Colloquium on Automata, Languages, and Programming","day":"29","author":[{"last_name":"Alistarh","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X"},{"id":"3279A00C-F248-11E8-B48F-1D18A9856A87","full_name":"Nadiradze, Giorgi","orcid":"0000-0001-5634-0731","first_name":"Giorgi","last_name":"Nadiradze"},{"id":"bcc145fd-e77f-11ea-ae8b-80d661dbff67","full_name":"Sabour, Amirmojtaba","first_name":"Amirmojtaba","last_name":"Sabour"}],"date_updated":"2024-03-05T07:35:53Z","citation":{"ama":"Alistarh D-A, Nadiradze G, Sabour A. Dynamic averaging load balancing on cycles. In: <i>47th International Colloquium on Automata, Languages, and Programming</i>. Vol 168. Schloss Dagstuhl - Leibniz-Zentrum für Informatik; 2020. doi:<a href=\"https://doi.org/10.4230/LIPIcs.ICALP.2020.7\">10.4230/LIPIcs.ICALP.2020.7</a>","ieee":"D.-A. Alistarh, G. Nadiradze, and A. Sabour, “Dynamic averaging load balancing on cycles,” in <i>47th International Colloquium on Automata, Languages, and Programming</i>, Saarbrücken, Germany, Virtual, 2020, vol. 168.","apa":"Alistarh, D.-A., Nadiradze, G., &#38; Sabour, A. (2020). Dynamic averaging load balancing on cycles. In <i>47th International Colloquium on Automata, Languages, and Programming</i> (Vol. 168). Saarbrücken, Germany, Virtual: Schloss Dagstuhl - Leibniz-Zentrum für Informatik. <a href=\"https://doi.org/10.4230/LIPIcs.ICALP.2020.7\">https://doi.org/10.4230/LIPIcs.ICALP.2020.7</a>","mla":"Alistarh, Dan-Adrian, et al. “Dynamic Averaging Load Balancing on Cycles.” <i>47th International Colloquium on Automata, Languages, and Programming</i>, vol. 168, 7, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2020, doi:<a href=\"https://doi.org/10.4230/LIPIcs.ICALP.2020.7\">10.4230/LIPIcs.ICALP.2020.7</a>.","ista":"Alistarh D-A, Nadiradze G, Sabour A. 2020. Dynamic averaging load balancing on cycles. 47th International Colloquium on Automata, Languages, and Programming. ICALP: International Colloquium on Automata, Languages, and Programming, LIPIcs, vol. 168, 7.","chicago":"Alistarh, Dan-Adrian, Giorgi Nadiradze, and Amirmojtaba Sabour. “Dynamic Averaging Load Balancing on Cycles.” In <i>47th International Colloquium on Automata, Languages, and Programming</i>, Vol. 168. Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2020. <a href=\"https://doi.org/10.4230/LIPIcs.ICALP.2020.7\">https://doi.org/10.4230/LIPIcs.ICALP.2020.7</a>.","short":"D.-A. Alistarh, G. Nadiradze, A. Sabour, in:, 47th International Colloquium on Automata, Languages, and Programming, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2020."},"article_number":"7","has_accepted_license":"1","title":"Dynamic averaging load balancing on cycles","external_id":{"arxiv":["2003.09297"]},"project":[{"name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223"}],"_id":"15077","year":"2020","ec_funded":1,"date_created":"2024-03-05T07:25:37Z","article_processing_charge":"No","oa_version":"Published Version","quality_controlled":"1","abstract":[{"lang":"eng","text":"We consider the following dynamic load-balancing process: given an underlying graph G with n nodes, in each step t≥ 0, one unit of load is created, and placed at a randomly chosen graph node. In the same step, the chosen node picks a random neighbor, and the two nodes balance their loads by averaging them. We are interested in the expected gap between the minimum and maximum loads at nodes as the process progresses, and its dependence on n and on the graph structure. Variants of the above graphical balanced allocation process have been studied previously by Peres, Talwar, and Wieder [Peres et al., 2015], and by Sauerwald and Sun [Sauerwald and Sun, 2015]. These authors left as open the question of characterizing the gap in the case of cycle graphs in the dynamic case, where weights are created during the algorithm’s execution. For this case, the only known upper bound is of 𝒪(n log n), following from a majorization argument due to [Peres et al., 2015], which analyzes a related graphical allocation process. In this paper, we provide an upper bound of 𝒪 (√n log n) on the expected gap of the above process for cycles of length n. We introduce a new potential analysis technique, which enables us to bound the difference in load between k-hop neighbors on the cycle, for any k ≤ n/2. We complement this with a \"gap covering\" argument, which bounds the maximum value of the gap by bounding its value across all possible subsets of a certain structure, and recursively bounding the gaps within each subset. We provide analytical and experimental evidence that our upper bound on the gap is tight up to a logarithmic factor."}],"file":[{"file_id":"15078","relation":"main_file","success":1,"date_created":"2024-03-05T07:25:15Z","file_name":"2020_LIPIcs_Alistarh.pdf","date_updated":"2024-03-05T07:25:15Z","file_size":782987,"content_type":"application/pdf","checksum":"e5eb16199f4ccfd77a321977eb3f026f","access_level":"open_access","creator":"dernst"}],"related_material":{"record":[{"status":"public","id":"8286","relation":"later_version"}]},"publication_status":"published","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","file_date_updated":"2024-03-05T07:25:15Z","volume":168,"type":"conference","alternative_title":["LIPIcs"],"ddc":["000"],"intvolume":"       168","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/3.0/legalcode","name":"Creative Commons Attribution 3.0 Unported (CC BY 3.0)","short":"CC BY (3.0)","image":"/images/cc_by.png"},"status":"public","language":[{"iso":"eng"}],"date_published":"2020-06-29T00:00:00Z","doi":"10.4230/LIPIcs.ICALP.2020.7","scopus_import":"1","acknowledgement":"The authors sincerely thank Thomas Sauerwald and George Giakkoupis for insightful discussions, and Mohsen Ghaffari, Yuval Peres, and Udi Wieder for feedback on earlier\r\nversions of this draft. We also thank the ICALP anonymous reviewers for their very useful comments.\r\nFunding: European Research Council funding award PR1042ERC01","conference":{"end_date":"2020-07-11","name":"ICALP: International Colloquium on Automata, Languages, and Programming","start_date":"2020-07-08","location":"Saarbrücken, Germany, Virtual"},"arxiv":1,"month":"06","department":[{"_id":"DaAl"}],"publisher":"Schloss Dagstuhl - Leibniz-Zentrum für Informatik"},{"user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","type":"conference","language":[{"iso":"eng"}],"status":"public","doi":"10.1145/3323165.3323201","date_published":"2019-06-01T00:00:00Z","scopus_import":"1","conference":{"end_date":"2019-06-24","location":"Phoenix, AZ, United States","name":"SPAA: Symposium on Parallelism in Algorithms and Architectures","start_date":"2019-06-22"},"page":"145-154","arxiv":1,"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2003.09363"}],"isi":1,"department":[{"_id":"DaAl"}],"publisher":"ACM Press","month":"06","publication":"31st ACM Symposium on Parallelism in Algorithms and Architectures","publication_identifier":{"isbn":["9781450361842"]},"oa":1,"day":"01","citation":{"short":"D.-A. Alistarh, G. Nadiradze, N. Koval, in:, 31st ACM Symposium on Parallelism in Algorithms and Architectures, ACM Press, 2019, pp. 145–154.","chicago":"Alistarh, Dan-Adrian, Giorgi Nadiradze, and Nikita Koval. “Efficiency Guarantees for Parallel Incremental Algorithms under Relaxed Schedulers.” In <i>31st ACM Symposium on Parallelism in Algorithms and Architectures</i>, 145–54. ACM Press, 2019. <a href=\"https://doi.org/10.1145/3323165.3323201\">https://doi.org/10.1145/3323165.3323201</a>.","ista":"Alistarh D-A, Nadiradze G, Koval N. 2019. Efficiency guarantees for parallel incremental algorithms under relaxed schedulers. 31st ACM Symposium on Parallelism in Algorithms and Architectures. SPAA: Symposium on Parallelism in Algorithms and Architectures, 145–154.","mla":"Alistarh, Dan-Adrian, et al. “Efficiency Guarantees for Parallel Incremental Algorithms under Relaxed Schedulers.” <i>31st ACM Symposium on Parallelism in Algorithms and Architectures</i>, ACM Press, 2019, pp. 145–54, doi:<a href=\"https://doi.org/10.1145/3323165.3323201\">10.1145/3323165.3323201</a>.","ieee":"D.-A. Alistarh, G. Nadiradze, and N. Koval, “Efficiency guarantees for parallel incremental algorithms under relaxed schedulers,” in <i>31st ACM Symposium on Parallelism in Algorithms and Architectures</i>, Phoenix, AZ, United States, 2019, pp. 145–154.","ama":"Alistarh D-A, Nadiradze G, Koval N. Efficiency guarantees for parallel incremental algorithms under relaxed schedulers. In: <i>31st ACM Symposium on Parallelism in Algorithms and Architectures</i>. ACM Press; 2019:145-154. doi:<a href=\"https://doi.org/10.1145/3323165.3323201\">10.1145/3323165.3323201</a>","apa":"Alistarh, D.-A., Nadiradze, G., &#38; Koval, N. (2019). Efficiency guarantees for parallel incremental algorithms under relaxed schedulers. In <i>31st ACM Symposium on Parallelism in Algorithms and Architectures</i> (pp. 145–154). Phoenix, AZ, United States: ACM Press. <a href=\"https://doi.org/10.1145/3323165.3323201\">https://doi.org/10.1145/3323165.3323201</a>"},"author":[{"last_name":"Alistarh","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"full_name":"Nadiradze, Giorgi","id":"3279A00C-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-5634-0731","last_name":"Nadiradze","first_name":"Giorgi"},{"id":"2F4DB10C-F248-11E8-B48F-1D18A9856A87","full_name":"Koval, Nikita","first_name":"Nikita","last_name":"Koval"}],"date_updated":"2023-09-07T13:31:39Z","project":[{"name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020","grant_number":"805223","_id":"268A44D6-B435-11E9-9278-68D0E5697425"}],"title":"Efficiency guarantees for parallel incremental algorithms under relaxed schedulers","external_id":{"isi":["000507618500018"],"arxiv":["2003.09363"]},"year":"2019","_id":"6673","ec_funded":1,"date_created":"2019-07-24T08:59:36Z","oa_version":"Preprint","article_processing_charge":"No","quality_controlled":"1","abstract":[{"lang":"eng","text":"Several classic problems in graph processing and computational geometry are solved via incremental algorithms, which split computation into a series of small tasks acting on shared state, which gets updated progressively. While the sequential variant of such algorithms usually specifies a fixed (but sometimes random) order in which the tasks should be performed, a standard approach to parallelizing such algorithms is to relax this constraint to allow for out-of-order parallel execution. This is the case for parallel implementations of Dijkstra's single-source shortest-paths (SSSP) algorithm, and for parallel Delaunay mesh triangulation. While many software frameworks parallelize incremental computation in this way, it is still not well understood whether this relaxed ordering approach can still provide any complexity guarantees. In this paper, we address this problem, and analyze the efficiency guarantees provided by a range of incremental algorithms when parallelized via relaxed schedulers. We show that, for algorithms such as Delaunay mesh triangulation and sorting by insertion, schedulers with a maximum relaxation factor of k in terms of the maximum priority inversion allowed will introduce a maximum amount of wasted work of O(łog n poly(k)), where n is the number of tasks to be executed. For SSSP, we show that the additional work is O(poly(k), dmax / wmin), where dmax is the maximum distance between two nodes, and wmin is the minimum such distance. In practical settings where n >> k, this suggests that the overheads of relaxation will be outweighed by the improved scalability of the relaxed scheduler. On the negative side, we provide lower bounds showing that certain algorithms will inherently incur a non-trivial amount of wasted work due to scheduler relaxation, even for relatively benign relaxed schedulers."}],"publication_status":"published","related_material":{"record":[{"relation":"dissertation_contains","id":"10429","status":"public"}]}},{"page":"283 - 292","conference":{"end_date":"2017-07-27","start_date":"2017-07-25","name":"PODC: Principles of Distributed Computing","location":"Washington, WA, USA"},"scopus_import":"1","publisher":"ACM","department":[{"_id":"DaAl"}],"month":"07","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1706.04178"}],"isi":1,"volume":"Part F129314","type":"conference","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","language":[{"iso":"eng"}],"status":"public","doi":"10.1145/3087801.3087810","date_published":"2017-07-26T00:00:00Z","year":"2017","_id":"791","date_created":"2018-12-11T11:48:31Z","abstract":[{"text":"Consider the following random process: we are given n queues, into which elements of increasing labels are inserted uniformly at random. To remove an element, we pick two queues at random, and remove the element of lower label (higher priority) among the two. The cost of a removal is the rank of the label removed, among labels still present in any of the queues, that is, the distance from the optimal choice at each step. Variants of this strategy are prevalent in state-of-the-art concurrent priority queue implementations. Nonetheless, it is not known whether such implementations provide any rank guarantees, even in a sequential model. We answer this question, showing that this strategy provides surprisingly strong guarantees: Although the single-choice process, where we always insert and remove from a single randomly chosen queue, has degrading cost, going to infinity as we increase the number of steps, in the two choice process, the expected rank of a removed element is O(n) while the expected worst-case cost is O(n log n). These bounds are tight, and hold irrespective of the number of steps for which we run the process. The argument is based on a new technical connection between &quot;heavily loaded&quot; balls-into-bins processes and priority scheduling. Our analytic results inspire a new concurrent priority queue implementation, which improves upon the state of the art in terms of practical performance.","lang":"eng"}],"publication_status":"published","oa_version":"Submitted Version","article_processing_charge":"No","quality_controlled":"1","citation":{"short":"D.-A. Alistarh, J. Kopinsky, J. Li, G. Nadiradze, in:, Proceedings of the ACM Symposium on Principles of Distributed Computing, ACM, 2017, pp. 283–292.","ista":"Alistarh D-A, Kopinsky J, Li J, Nadiradze G. 2017. The power of choice in priority scheduling. Proceedings of the ACM Symposium on Principles of Distributed Computing. PODC: Principles of Distributed Computing vol. Part F129314, 283–292.","chicago":"Alistarh, Dan-Adrian, Justin Kopinsky, Jerry Li, and Giorgi Nadiradze. “The Power of Choice in Priority Scheduling.” In <i>Proceedings of the ACM Symposium on Principles of Distributed Computing</i>, Part F129314:283–92. ACM, 2017. <a href=\"https://doi.org/10.1145/3087801.3087810\">https://doi.org/10.1145/3087801.3087810</a>.","ieee":"D.-A. Alistarh, J. Kopinsky, J. Li, and G. Nadiradze, “The power of choice in priority scheduling,” in <i>Proceedings of the ACM Symposium on Principles of Distributed Computing</i>, Washington, WA, USA, 2017, vol. Part F129314, pp. 283–292.","apa":"Alistarh, D.-A., Kopinsky, J., Li, J., &#38; Nadiradze, G. (2017). The power of choice in priority scheduling. In <i>Proceedings of the ACM Symposium on Principles of Distributed Computing</i> (Vol. Part F129314, pp. 283–292). Washington, WA, USA: ACM. <a href=\"https://doi.org/10.1145/3087801.3087810\">https://doi.org/10.1145/3087801.3087810</a>","ama":"Alistarh D-A, Kopinsky J, Li J, Nadiradze G. The power of choice in priority scheduling. In: <i>Proceedings of the ACM Symposium on Principles of Distributed Computing</i>. Vol Part F129314. ACM; 2017:283-292. doi:<a href=\"https://doi.org/10.1145/3087801.3087810\">10.1145/3087801.3087810</a>","mla":"Alistarh, Dan-Adrian, et al. “The Power of Choice in Priority Scheduling.” <i>Proceedings of the ACM Symposium on Principles of Distributed Computing</i>, vol. Part F129314, ACM, 2017, pp. 283–92, doi:<a href=\"https://doi.org/10.1145/3087801.3087810\">10.1145/3087801.3087810</a>."},"date_updated":"2023-09-27T12:17:59Z","author":[{"orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","first_name":"Dan-Adrian"},{"last_name":"Kopinsky","first_name":"Justin","full_name":"Kopinsky, Justin"},{"first_name":"Jerry","last_name":"Li","full_name":"Li, Jerry"},{"full_name":"Nadiradze, Giorgi","id":"3279A00C-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-5634-0731","last_name":"Nadiradze","first_name":"Giorgi"}],"publication":"Proceedings of the ACM Symposium on Principles of Distributed Computing","publication_identifier":{"isbn":["978-145034992-5"]},"oa":1,"day":"26","publist_id":"6864","title":"The power of choice in priority scheduling","external_id":{"isi":["000462995000035"]}}]