[{"publication":"Distributed Computing","title":"Communication-efficient randomized consensus","file":[{"relation":"main_file","content_type":"application/pdf","access_level":"open_access","creator":"dernst","date_updated":"2020-07-14T12:46:38Z","checksum":"69b46e537acdcac745237ddb853fcbb5","date_created":"2019-01-22T07:25:51Z","file_id":"5867","file_size":595707,"file_name":"2017_DistribComp_Alistarh.pdf"}],"issue":"6","scopus_import":1,"has_accepted_license":"1","project":[{"name":"IST Austria Open Access Fund","_id":"B67AFEDC-15C9-11EA-A837-991A96BB2854"}],"citation":{"chicago":"Alistarh, Dan-Adrian, James Aspnes, Valerie King, and Jared Saia. “Communication-Efficient Randomized Consensus.” <i>Distributed Computing</i>. Springer, 2018. <a href=\"https://doi.org/10.1007/s00446-017-0315-1\">https://doi.org/10.1007/s00446-017-0315-1</a>.","ieee":"D.-A. Alistarh, J. Aspnes, V. King, and J. Saia, “Communication-efficient randomized consensus,” <i>Distributed Computing</i>, vol. 31, no. 6. Springer, pp. 489–501, 2018.","short":"D.-A. Alistarh, J. Aspnes, V. King, J. Saia, Distributed Computing 31 (2018) 489–501.","apa":"Alistarh, D.-A., Aspnes, J., King, V., &#38; Saia, J. (2018). Communication-efficient randomized consensus. <i>Distributed Computing</i>. Springer. <a href=\"https://doi.org/10.1007/s00446-017-0315-1\">https://doi.org/10.1007/s00446-017-0315-1</a>","mla":"Alistarh, Dan-Adrian, et al. “Communication-Efficient Randomized Consensus.” <i>Distributed Computing</i>, vol. 31, no. 6, Springer, 2018, pp. 489–501, doi:<a href=\"https://doi.org/10.1007/s00446-017-0315-1\">10.1007/s00446-017-0315-1</a>.","ista":"Alistarh D-A, Aspnes J, King V, Saia J. 2018. Communication-efficient randomized consensus. Distributed Computing. 31(6), 489–501.","ama":"Alistarh D-A, Aspnes J, King V, Saia J. Communication-efficient randomized consensus. <i>Distributed Computing</i>. 2018;31(6):489-501. doi:<a href=\"https://doi.org/10.1007/s00446-017-0315-1\">10.1007/s00446-017-0315-1</a>"},"publication_status":"published","oa_version":"Published Version","page":"489-501","day":"01","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"abstract":[{"lang":"eng","text":"We consider the problem of consensus in the challenging classic model. In this model, the adversary is adaptive; it can choose which processors crash at any point during the course of the algorithm. Further, communication is via asynchronous message passing: there is no known upper bound on the time to send a message from one processor to another, and all messages and coin flips are seen by the adversary. We describe a new randomized consensus protocol with expected message complexity O(n2log2n) when fewer than n / 2 processes may fail by crashing. This is an almost-linear improvement over the best previously known protocol, and within logarithmic factors of a known Ω(n2) message lower bound. The protocol further ensures that no process sends more than O(nlog3n) messages in expectation, which is again within logarithmic factors of optimal. We also present a generalization of the algorithm to an arbitrary number of failures t, which uses expected O(nt+t2log2t) total messages. Our approach is to build a message-efficient, resilient mechanism for aggregating individual processor votes, implementing the message-passing equivalent of a weak shared coin. Roughly, in our protocol, a processor first announces its votes to small groups, then propagates them to increasingly larger groups as it generates more and more votes. To bound the number of messages that an individual process might have to send or receive, the protocol progressively increases the weight of generated votes. The main technical challenge is bounding the impact of votes that are still “in flight” (generated, but not fully propagated) on the final outcome of the shared coin, especially since such votes might have different weights. We achieve this by leveraging the structure of the algorithm, and a technical argument based on martingale concentration bounds. Overall, we show that it is possible to build an efficient message-passing implementation of a shared coin, and in the process (almost-optimally) solve the classic consensus problem in the asynchronous message-passing model."}],"status":"public","date_updated":"2023-02-23T12:23:25Z","volume":31,"month":"11","quality_controlled":"1","publication_identifier":{"issn":["01782770"]},"publist_id":"7281","ddc":["000"],"article_processing_charge":"Yes (via OA deal)","user_id":"3E5EF7F0-F248-11E8-B48F-1D18A9856A87","file_date_updated":"2020-07-14T12:46:38Z","intvolume":"        31","date_created":"2018-12-11T11:47:01Z","department":[{"_id":"DaAl"}],"date_published":"2018-11-01T00:00:00Z","author":[{"full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh"},{"first_name":"James","full_name":"Aspnes, James","last_name":"Aspnes"},{"full_name":"King, Valerie","first_name":"Valerie","last_name":"King"},{"first_name":"Jared","full_name":"Saia, Jared","last_name":"Saia"}],"oa":1,"_id":"536","type":"journal_article","language":[{"iso":"eng"}],"doi":"10.1007/s00446-017-0315-1","publisher":"Springer","year":"2018"},{"user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","article_processing_charge":"No","conference":{"location":"Egham, United Kingdom","end_date":"2018-07-27","start_date":"2018-07-23","name":"PODC: Principles of Distributed Computing"},"scopus_import":"1","date_created":"2019-02-13T09:48:55Z","department":[{"_id":"DaAl"}],"citation":{"chicago":"Alistarh, Dan-Adrian. “A Brief Tutorial on Distributed and Concurrent Machine Learning.” In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, 487–88. ACM Press, 2018. <a href=\"https://doi.org/10.1145/3212734.3212798\">https://doi.org/10.1145/3212734.3212798</a>.","ieee":"D.-A. Alistarh, “A brief tutorial on distributed and concurrent machine learning,” in <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, Egham, United Kingdom, 2018, pp. 487–488.","short":"D.-A. Alistarh, in:, Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18, ACM Press, 2018, pp. 487–488.","apa":"Alistarh, D.-A. (2018). A brief tutorial on distributed and concurrent machine learning. In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i> (pp. 487–488). Egham, United Kingdom: ACM Press. <a href=\"https://doi.org/10.1145/3212734.3212798\">https://doi.org/10.1145/3212734.3212798</a>","mla":"Alistarh, Dan-Adrian. “A Brief Tutorial on Distributed and Concurrent Machine Learning.” <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, ACM Press, 2018, pp. 487–88, doi:<a href=\"https://doi.org/10.1145/3212734.3212798\">10.1145/3212734.3212798</a>.","ista":"Alistarh D-A. 2018. A brief tutorial on distributed and concurrent machine learning. Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18. PODC: Principles of Distributed Computing, 487–488.","ama":"Alistarh D-A. A brief tutorial on distributed and concurrent machine learning. In: <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>. ACM Press; 2018:487-488. doi:<a href=\"https://doi.org/10.1145/3212734.3212798\">10.1145/3212734.3212798</a>"},"publication":"Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC '18","isi":1,"external_id":{"isi":["000458186900063"]},"title":"A brief tutorial on distributed and concurrent machine learning","month":"07","quality_controlled":"1","publication_identifier":{"isbn":["9781450357951"]},"status":"public","type":"conference","language":[{"iso":"eng"}],"doi":"10.1145/3212734.3212798","publisher":"ACM Press","date_updated":"2023-09-19T10:42:28Z","year":"2018","oa_version":"None","page":"487-488","date_published":"2018-07-27T00:00:00Z","publication_status":"published","day":"27","abstract":[{"lang":"eng","text":"The area of machine learning has made considerable progress over the past decade, enabled by the widespread availability of large datasets, as well as by improved algorithms and models. Given the large computational demands of machine learning workloads, parallelism, implemented either through single-node concurrency or through multi-node distribution, has been a third key ingredient to advances in machine learning.\r\nThe goal of this tutorial is to provide the audience with an overview of standard distribution techniques in machine learning, with an eye towards the intriguing trade-offs between synchronization and communication costs of distributed machine learning algorithms, on the one hand, and their convergence, on the other.The tutorial will focus on parallelization strategies for the fundamental stochastic gradient descent (SGD) algorithm, which is a key tool when training machine learning models, from classical instances such as linear regression, to state-of-the-art neural network architectures.\r\nThe tutorial will describe the guarantees provided by this algorithm in the sequential case, and then move on to cover both shared-memory and message-passing parallelization strategies, together with the guarantees they provide, and corresponding trade-offs. The presentation will conclude with a broad overview of ongoing research in distributed and concurrent machine learning. The tutorial will assume no prior knowledge beyond familiarity with basic concepts in algebra and analysis.\r\n"}],"author":[{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian"}],"_id":"5961"},{"isi":1,"publication_identifier":{"isbn":["9781450357951"]},"quality_controlled":"1","month":"07","conference":{"location":"Egham, United Kingdom","end_date":"2018-07-27","start_date":"2018-07-23","name":"PODC: Principles of Distributed Computing"},"article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","department":[{"_id":"DaAl"}],"date_created":"2019-02-13T09:58:58Z","date_published":"2018-07-23T00:00:00Z","_id":"5962","oa":1,"author":[{"last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian"},{"full_name":"De Sa, Christopher","first_name":"Christopher","last_name":"De Sa"},{"last_name":"Konstantinov","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87","first_name":"Nikola H","full_name":"Konstantinov, Nikola H"}],"type":"conference","year":"2018","publisher":"ACM Press","doi":"10.1145/3212734.3212763","language":[{"iso":"eng"}],"title":"The convergence of stochastic gradient descent in asynchronous shared memory","external_id":{"isi":["000458186900022"],"arxiv":["1803.08841"]},"publication":"Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC '18","main_file_link":[{"url":"https://arxiv.org/abs/1803.08841","open_access":"1"}],"scopus_import":"1","arxiv":1,"citation":{"chicago":"Alistarh, Dan-Adrian, Christopher De Sa, and Nikola H Konstantinov. “The Convergence of Stochastic Gradient Descent in Asynchronous Shared Memory.” In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, 169–78. ACM Press, 2018. <a href=\"https://doi.org/10.1145/3212734.3212763\">https://doi.org/10.1145/3212734.3212763</a>.","ieee":"D.-A. Alistarh, C. De Sa, and N. H. Konstantinov, “The convergence of stochastic gradient descent in asynchronous shared memory,” in <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, Egham, United Kingdom, 2018, pp. 169–178.","apa":"Alistarh, D.-A., De Sa, C., &#38; Konstantinov, N. H. (2018). The convergence of stochastic gradient descent in asynchronous shared memory. In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i> (pp. 169–178). Egham, United Kingdom: ACM Press. <a href=\"https://doi.org/10.1145/3212734.3212763\">https://doi.org/10.1145/3212734.3212763</a>","short":"D.-A. Alistarh, C. De Sa, N.H. Konstantinov, in:, Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18, ACM Press, 2018, pp. 169–178.","mla":"Alistarh, Dan-Adrian, et al. “The Convergence of Stochastic Gradient Descent in Asynchronous Shared Memory.” <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, ACM Press, 2018, pp. 169–78, doi:<a href=\"https://doi.org/10.1145/3212734.3212763\">10.1145/3212734.3212763</a>.","ista":"Alistarh D-A, De Sa C, Konstantinov NH. 2018. The convergence of stochastic gradient descent in asynchronous shared memory. Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18. PODC: Principles of Distributed Computing, 169–178.","ama":"Alistarh D-A, De Sa C, Konstantinov NH. The convergence of stochastic gradient descent in asynchronous shared memory. In: <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>. ACM Press; 2018:169-178. doi:<a href=\"https://doi.org/10.1145/3212734.3212763\">10.1145/3212734.3212763</a>"},"abstract":[{"text":"Stochastic Gradient Descent (SGD) is a fundamental algorithm in machine learning, representing the optimization backbone for training several classic models, from regression to neural networks. Given the recent practical focus on distributed machine learning, significant work has been dedicated to the convergence properties of this algorithm under the inconsistent and noisy updates arising from execution in a distributed environment. However, surprisingly, the convergence properties of this classic algorithm in the standard shared-memory model are still not well-understood. In this work, we address this gap, and provide new convergence bounds for lock-free concurrent stochastic gradient descent, executing in the classic asynchronous shared memory model, against a strong adaptive adversary. Our results give improved upper and lower bounds on the \"price of asynchrony'' when executing the fundamental SGD algorithm in a concurrent setting. They show that this classic optimization tool can converge faster and with a wider range of parameters than previously known under asynchronous iterations. At the same time, we exhibit a fundamental trade-off between the maximum delay in the system and the rate at which SGD can converge, which governs the set of parameters under which this algorithm can still work efficiently.","lang":"eng"}],"day":"23","page":"169-178","publication_status":"published","oa_version":"Preprint","status":"public","date_updated":"2023-09-19T10:42:53Z"},{"article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","conference":{"location":"Egham, United Kingdom","start_date":"2018-07-23","end_date":"2018-07-27","name":"PODC: Principles of Distributed Computing"},"date_created":"2019-02-13T10:03:25Z","department":[{"_id":"DaAl"}],"isi":1,"month":"07","quality_controlled":"1","publication_identifier":{"isbn":["9781450357951"]},"type":"conference","language":[{"iso":"eng"}],"doi":"10.1145/3212734.3212756","publisher":"ACM Press","year":"2018","date_published":"2018-07-23T00:00:00Z","author":[{"orcid":"0000-0003-3650-940X","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian"},{"id":"3569F0A0-F248-11E8-B48F-1D18A9856A87","last_name":"Brown","first_name":"Trevor A","full_name":"Brown, Trevor A"},{"last_name":"Kopinsky","first_name":"Justin","full_name":"Kopinsky, Justin"},{"last_name":"Nadiradze","first_name":"Giorgi","full_name":"Nadiradze, Giorgi"}],"oa":1,"_id":"5963","scopus_import":"1","arxiv":1,"citation":{"ieee":"D.-A. Alistarh, T. A. Brown, J. Kopinsky, and G. Nadiradze, “Relaxed schedulers can efficiently parallelize iterative algorithms,” in <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, Egham, United Kingdom, 2018, pp. 377–386.","chicago":"Alistarh, Dan-Adrian, Trevor A Brown, Justin Kopinsky, and Giorgi Nadiradze. “Relaxed Schedulers Can Efficiently Parallelize Iterative Algorithms.” In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, 377–86. ACM Press, 2018. <a href=\"https://doi.org/10.1145/3212734.3212756\">https://doi.org/10.1145/3212734.3212756</a>.","ama":"Alistarh D-A, Brown TA, Kopinsky J, Nadiradze G. Relaxed schedulers can efficiently parallelize iterative algorithms. In: <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>. ACM Press; 2018:377-386. doi:<a href=\"https://doi.org/10.1145/3212734.3212756\">10.1145/3212734.3212756</a>","apa":"Alistarh, D.-A., Brown, T. A., Kopinsky, J., &#38; Nadiradze, G. (2018). Relaxed schedulers can efficiently parallelize iterative algorithms. In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i> (pp. 377–386). Egham, United Kingdom: ACM Press. <a href=\"https://doi.org/10.1145/3212734.3212756\">https://doi.org/10.1145/3212734.3212756</a>","short":"D.-A. Alistarh, T.A. Brown, J. Kopinsky, G. Nadiradze, in:, Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18, ACM Press, 2018, pp. 377–386.","ista":"Alistarh D-A, Brown TA, Kopinsky J, Nadiradze G. 2018. Relaxed schedulers can efficiently parallelize iterative algorithms. Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18. PODC: Principles of Distributed Computing, 377–386.","mla":"Alistarh, Dan-Adrian, et al. “Relaxed Schedulers Can Efficiently Parallelize Iterative Algorithms.” <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, ACM Press, 2018, pp. 377–86, doi:<a href=\"https://doi.org/10.1145/3212734.3212756\">10.1145/3212734.3212756</a>."},"publication":"Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC '18","external_id":{"isi":["000458186900048"],"arxiv":["1808.04155"]},"title":"Relaxed schedulers can efficiently parallelize iterative algorithms","main_file_link":[{"url":"https://arxiv.org/abs/1808.04155","open_access":"1"}],"status":"public","date_updated":"2023-09-19T10:43:21Z","oa_version":"Preprint","page":"377-386","publication_status":"published","day":"23","abstract":[{"text":"There has been significant progress in understanding the parallelism inherent to iterative sequential algorithms: for many classic algorithms, the depth of the dependence structure is now well understood, and scheduling techniques have been developed to exploit this shallow dependence structure for efficient parallel implementations. A related, applied research strand has studied methods by which certain iterative task-based algorithms can be efficiently parallelized via relaxed concurrent priority schedulers. These allow for high concurrency when inserting and removing tasks, at the cost of executing superfluous work due to the relaxed semantics of the scheduler. In this work, we take a step towards unifying these two research directions, by showing that there exists a family of relaxed priority schedulers that can efficiently and deterministically execute classic iterative algorithms such as greedy maximal independent set (MIS) and matching. Our primary result shows that, given a randomized scheduler with an expected relaxation factor of k in terms of the maximum allowed priority inversions on a task, and any graph on n vertices, the scheduler is able to execute greedy MIS with only an additive factor of \\poly(k) expected additional iterations compared to an exact (but not scalable) scheduler. This counter-intuitive result demonstrates that the overhead of relaxation when computing MIS is not dependent on the input size or structure of the input graph. Experimental results show that this overhead can be clearly offset by the gain in performance due to the highly scalable scheduler. In sum, we present an efficient method to deterministically parallelize iterative sequential algorithms, with provable runtime guarantees in terms of the number of executed tasks to completion.","lang":"eng"}]},{"article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","conference":{"location":"Egham, United Kingdom","start_date":"2018-07-23","end_date":"2018-07-27","name":"PODC: Principles of Distributed Computing"},"department":[{"_id":"DaAl"}],"date_created":"2019-02-13T10:08:19Z","isi":1,"quality_controlled":"1","publication_identifier":{"isbn":["9781450357951"]},"month":"07","type":"conference","publisher":"ACM Press","year":"2018","language":[{"iso":"eng"}],"doi":"10.1145/3212734.3212785","date_published":"2018-07-23T00:00:00Z","oa":1,"_id":"5964","author":[{"full_name":"Aksenov, Vitaly","first_name":"Vitaly","last_name":"Aksenov"},{"first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","orcid":"0000-0003-3650-940X"},{"last_name":"Kuznetsov","first_name":"Petr","full_name":"Kuznetsov, Petr"}],"scopus_import":"1","citation":{"ieee":"V. Aksenov, D.-A. Alistarh, and P. Kuznetsov, “Brief Announcement: Performance prediction for coarse-grained locking,” in <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, Egham, United Kingdom, 2018, pp. 411–413.","chicago":"Aksenov, Vitaly, Dan-Adrian Alistarh, and Petr Kuznetsov. “Brief Announcement: Performance Prediction for Coarse-Grained Locking.” In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, 411–13. ACM Press, 2018. <a href=\"https://doi.org/10.1145/3212734.3212785\">https://doi.org/10.1145/3212734.3212785</a>.","ama":"Aksenov V, Alistarh D-A, Kuznetsov P. Brief Announcement: Performance prediction for coarse-grained locking. In: <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>. ACM Press; 2018:411-413. doi:<a href=\"https://doi.org/10.1145/3212734.3212785\">10.1145/3212734.3212785</a>","mla":"Aksenov, Vitaly, et al. “Brief Announcement: Performance Prediction for Coarse-Grained Locking.” <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, ACM Press, 2018, pp. 411–13, doi:<a href=\"https://doi.org/10.1145/3212734.3212785\">10.1145/3212734.3212785</a>.","apa":"Aksenov, V., Alistarh, D.-A., &#38; Kuznetsov, P. (2018). Brief Announcement: Performance prediction for coarse-grained locking. In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i> (pp. 411–413). Egham, United Kingdom: ACM Press. <a href=\"https://doi.org/10.1145/3212734.3212785\">https://doi.org/10.1145/3212734.3212785</a>","ista":"Aksenov V, Alistarh D-A, Kuznetsov P. 2018. Brief Announcement: Performance prediction for coarse-grained locking. Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18. PODC: Principles of Distributed Computing, 411–413.","short":"V. Aksenov, D.-A. Alistarh, P. Kuznetsov, in:, Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18, ACM Press, 2018, pp. 411–413."},"external_id":{"isi":["000458186900052"]},"title":"Brief Announcement: Performance prediction for coarse-grained locking","publication":"Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC '18","main_file_link":[{"open_access":"1","url":"https://hal-univ-lyon3.archives-ouvertes.fr/INRIA/hal-01887733v1"}],"status":"public","date_updated":"2023-09-19T10:43:45Z","abstract":[{"lang":"eng","text":"A standard design pattern found in many concurrent data structures, such as hash tables or ordered containers, is an alternation of parallelizable sections that incur no data conflicts and critical sections that must run sequentially and are protected with locks. A lock can be viewed as a queue that arbitrates the order in which the critical sections are executed, and a natural question is whether we can use stochastic analysis to predict the resulting throughput. As a preliminary evidence to the affirmative, we describe a simple model that can be used to predict the throughput of coarse-grained lock-based algorithms. We show that our model works well for CLH lock, and we expect it to work for other popular lock designs such as TTAS, MCS, etc."}],"oa_version":"Submitted Version","page":"411-413","publication_status":"published","day":"23"},{"abstract":[{"text":"Relaxed concurrent data structures have become increasingly popular, due to their scalability in graph processing and machine learning applications (\\citeNguyen13, gonzalez2012powergraph ). Despite considerable interest, there exist families of natural, high performing randomized relaxed concurrent data structures, such as the popular MultiQueue~\\citeMQ pattern for implementing relaxed priority queue data structures, for which no guarantees are known in the concurrent setting~\\citeAKLN17. Our main contribution is in showing for the first time that, under a set of analytic assumptions, a family of relaxed concurrent data structures, including variants of MultiQueues, but also a new approximate counting algorithm we call the MultiCounter, provides strong probabilistic guarantees on the degree of relaxation with respect to the sequential specification, in arbitrary concurrent executions. We formalize these guarantees via a new correctness condition called distributional linearizability, tailored to concurrent implementations with randomized relaxations. Our result is based on a new analysis of an asynchronous variant of the classic power-of-two-choices load balancing algorithm, in which placement choices can be based on inconsistent, outdated information (this result may be of independent interest). We validate our results empirically, showing that the MultiCounter algorithm can implement scalable relaxed timestamps.","lang":"eng"}],"day":"16","publication_status":"published","page":"133-142","oa_version":"Preprint","date_updated":"2023-09-19T10:44:13Z","status":"public","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1804.01018"}],"title":"Distributionally linearizable data structures","related_material":{"record":[{"relation":"dissertation_contains","status":"public","id":"10429"}]},"external_id":{"isi":["000545269600016"],"arxiv":["1804.01018"]},"publication":"Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA '18","citation":{"ama":"Alistarh D-A, Brown TA, Kopinsky J, Li JZ, Nadiradze G. Distributionally linearizable data structures. In: <i>Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18</i>. ACM Press; 2018:133-142. doi:<a href=\"https://doi.org/10.1145/3210377.3210411\">10.1145/3210377.3210411</a>","short":"D.-A. Alistarh, T.A. Brown, J. Kopinsky, J.Z. Li, G. Nadiradze, in:, Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18, ACM Press, 2018, pp. 133–142.","mla":"Alistarh, Dan-Adrian, et al. “Distributionally Linearizable Data Structures.” <i>Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18</i>, ACM Press, 2018, pp. 133–42, doi:<a href=\"https://doi.org/10.1145/3210377.3210411\">10.1145/3210377.3210411</a>.","ista":"Alistarh D-A, Brown TA, Kopinsky J, Li JZ, Nadiradze G. 2018. Distributionally linearizable data structures. Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18. SPAA: Symposium on Parallelism in Algorithms and Architectures, 133–142.","apa":"Alistarh, D.-A., Brown, T. A., Kopinsky, J., Li, J. Z., &#38; Nadiradze, G. (2018). Distributionally linearizable data structures. In <i>Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18</i> (pp. 133–142). Vienna, Austria: ACM Press. <a href=\"https://doi.org/10.1145/3210377.3210411\">https://doi.org/10.1145/3210377.3210411</a>","ieee":"D.-A. Alistarh, T. A. Brown, J. Kopinsky, J. Z. Li, and G. Nadiradze, “Distributionally linearizable data structures,” in <i>Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18</i>, Vienna, Austria, 2018, pp. 133–142.","chicago":"Alistarh, Dan-Adrian, Trevor A Brown, Justin Kopinsky, Jerry Z. Li, and Giorgi Nadiradze. “Distributionally Linearizable Data Structures.” In <i>Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18</i>, 133–42. ACM Press, 2018. <a href=\"https://doi.org/10.1145/3210377.3210411\">https://doi.org/10.1145/3210377.3210411</a>."},"arxiv":1,"scopus_import":"1","_id":"5965","oa":1,"author":[{"full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Brown","id":"3569F0A0-F248-11E8-B48F-1D18A9856A87","first_name":"Trevor A","full_name":"Brown, Trevor A"},{"full_name":"Kopinsky, Justin","first_name":"Justin","last_name":"Kopinsky"},{"full_name":"Li, Jerry Z.","first_name":"Jerry Z.","last_name":"Li"},{"last_name":"Nadiradze","full_name":"Nadiradze, Giorgi","first_name":"Giorgi"}],"date_published":"2018-07-16T00:00:00Z","year":"2018","publisher":"ACM Press","doi":"10.1145/3210377.3210411","language":[{"iso":"eng"}],"type":"conference","publication_identifier":{"isbn":["9781450357999"]},"quality_controlled":"1","month":"07","isi":1,"department":[{"_id":"DaAl"}],"date_created":"2019-02-13T10:17:19Z","conference":{"name":"SPAA: Symposium on Parallelism in Algorithms and Architectures","start_date":"2018-07-16","end_date":"2018-07-18","location":"Vienna, Austria"},"article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1"},{"citation":{"chicago":"Alistarh, Dan-Adrian, Syed Kamran Haider, Raphael Kübler, and Giorgi Nadiradze. “The Transactional Conflict Problem.” In <i>Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18</i>, 383–92. ACM Press, 2018. <a href=\"https://doi.org/10.1145/3210377.3210406\">https://doi.org/10.1145/3210377.3210406</a>.","ieee":"D.-A. Alistarh, S. K. Haider, R. Kübler, and G. Nadiradze, “The transactional conflict problem,” in <i>Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18</i>, Vienna, Austria, 2018, pp. 383–392.","short":"D.-A. Alistarh, S.K. Haider, R. Kübler, G. Nadiradze, in:, Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18, ACM Press, 2018, pp. 383–392.","apa":"Alistarh, D.-A., Haider, S. K., Kübler, R., &#38; Nadiradze, G. (2018). The transactional conflict problem. In <i>Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18</i> (pp. 383–392). Vienna, Austria: ACM Press. <a href=\"https://doi.org/10.1145/3210377.3210406\">https://doi.org/10.1145/3210377.3210406</a>","ista":"Alistarh D-A, Haider SK, Kübler R, Nadiradze G. 2018. The transactional conflict problem. Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18. SPAA: Symposium on Parallelism in Algorithms and Architectures, 383–392.","mla":"Alistarh, Dan-Adrian, et al. “The Transactional Conflict Problem.” <i>Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18</i>, ACM Press, 2018, pp. 383–92, doi:<a href=\"https://doi.org/10.1145/3210377.3210406\">10.1145/3210377.3210406</a>.","ama":"Alistarh D-A, Haider SK, Kübler R, Nadiradze G. The transactional conflict problem. In: <i>Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA ’18</i>. ACM Press; 2018:383-392. doi:<a href=\"https://doi.org/10.1145/3210377.3210406\">10.1145/3210377.3210406</a>"},"arxiv":1,"scopus_import":"1","main_file_link":[{"url":"https://arxiv.org/abs/1804.00947","open_access":"1"}],"title":"The transactional conflict problem","external_id":{"arxiv":["1804.00947"],"isi":["000545269600046"]},"publication":"Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures  - SPAA '18","date_updated":"2023-09-19T10:44:49Z","status":"public","abstract":[{"lang":"eng","text":"The transactional conflict problem arises in transactional systems whenever two or more concurrent transactions clash on a data item. While the standard solution to such conflicts is to immediately abort one of the transactions, some practical systems consider the alternative of delaying conflict resolution for a short interval, which may allow one of the transactions to commit. The challenge in the transactional conflict problem is to choose the optimal length of this delay interval so as to minimize the overall running time penalty for the conflicting transactions. In this paper, we propose a family of optimal online algorithms for the transactional conflict problem. Specifically, we consider variants of this problem which arise in different implementations of transactional systems, namely \"requestor wins'' and \"requestor aborts'' implementations: in the former, the recipient of a coherence request is aborted, whereas in the latter, it is the requestor which has to abort. Both strategies are implemented by real systems. We show that the requestor aborts case can be reduced to a classic instance of the ski rental problem, while the requestor wins case leads to a new version of this classical problem, for which we derive optimal deterministic and randomized algorithms. Moreover, we prove that, under a simplified adversarial model, our algorithms are constant-competitive with the offline optimum in terms of throughput. We validate our algorithmic results empirically through a hardware simulation of hardware transactional memory (HTM), showing that our algorithms can lead to non-trivial performance improvements for classic concurrent data structures."}],"day":"16","publication_status":"published","page":"383-392","oa_version":"Preprint","department":[{"_id":"DaAl"}],"date_created":"2019-02-13T10:26:07Z","conference":{"location":"Vienna, Austria","start_date":"2018-07-16","end_date":"2018-07-18","name":"SPAA: Symposium on Parallelism in Algorithms and Architectures"},"article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","publication_identifier":{"isbn":["9781450357999"]},"quality_controlled":"1","month":"07","isi":1,"year":"2018","publisher":"ACM Press","doi":"10.1145/3210377.3210406","language":[{"iso":"eng"}],"type":"conference","_id":"5966","oa":1,"author":[{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian"},{"last_name":"Haider","full_name":"Haider, Syed Kamran","first_name":"Syed Kamran"},{"first_name":"Raphael","full_name":"Kübler, Raphael","last_name":"Kübler"},{"first_name":"Giorgi","full_name":"Nadiradze, Giorgi","last_name":"Nadiradze"}],"date_published":"2018-07-16T00:00:00Z"},{"date_published":"2018-09-01T00:00:00Z","oa_version":"None","publication_status":"published","day":"01","abstract":[{"lang":"eng","text":"The concurrent memory reclamation problem is that of devising a way for a deallocating thread to verify that no other concurrent threads hold references to a memory block being deallocated. To date, in the absence of automatic garbage collection, there is no satisfactory solution to this problem; existing tracking methods like hazard pointers, reference counters, or epoch-based techniques like RCU are either prohibitively expensive or require significant programming expertise to the extent that implementing them efficiently can be worthy of a publication. None of the existing techniques are automatic or even semi-automated.\r\nIn this article, we take a new approach to concurrent memory reclamation. Instead of manually tracking access to memory locations as done in techniques like hazard pointers, or restricting shared accesses to specific epoch boundaries as in RCU, our algorithm, called ThreadScan, leverages operating system signaling to automatically detect which memory locations are being accessed by concurrent threads.\r\nInitial empirical evidence shows that ThreadScan scales surprisingly well and requires negligible programming effort beyond the standard use of Malloc and Free."}],"author":[{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian"},{"last_name":"Leiserson","full_name":"Leiserson, William","first_name":"William"},{"last_name":"Matveev","full_name":"Matveev, Alexander","first_name":"Alexander"},{"first_name":"Nir","full_name":"Shavit, Nir","last_name":"Shavit"}],"_id":"6001","status":"public","type":"journal_article","language":[{"iso":"eng"}],"doi":"10.1145/3201897","publisher":"Association for Computing Machinery","date_updated":"2023-02-23T13:17:54Z","year":"2018","publication":"ACM Transactions on Parallel Computing","volume":4,"related_material":{"record":[{"status":"public","id":"779","relation":"earlier_version"}]},"title":"ThreadScan: Automatic and scalable memory reclamation","month":"09","quality_controlled":"1","publication_identifier":{"issn":["2329-4949"]},"user_id":"3E5EF7F0-F248-11E8-B48F-1D18A9856A87","issue":"4","scopus_import":1,"intvolume":"         4","article_number":"18","date_created":"2019-02-14T13:24:11Z","department":[{"_id":"DaAl"}],"citation":{"ama":"Alistarh D-A, Leiserson W, Matveev A, Shavit N. ThreadScan: Automatic and scalable memory reclamation. <i>ACM Transactions on Parallel Computing</i>. 2018;4(4). doi:<a href=\"https://doi.org/10.1145/3201897\">10.1145/3201897</a>","mla":"Alistarh, Dan-Adrian, et al. “ThreadScan: Automatic and Scalable Memory Reclamation.” <i>ACM Transactions on Parallel Computing</i>, vol. 4, no. 4, 18, Association for Computing Machinery, 2018, doi:<a href=\"https://doi.org/10.1145/3201897\">10.1145/3201897</a>.","ista":"Alistarh D-A, Leiserson W, Matveev A, Shavit N. 2018. ThreadScan: Automatic and scalable memory reclamation. ACM Transactions on Parallel Computing. 4(4), 18.","short":"D.-A. Alistarh, W. Leiserson, A. Matveev, N. Shavit, ACM Transactions on Parallel Computing 4 (2018).","apa":"Alistarh, D.-A., Leiserson, W., Matveev, A., &#38; Shavit, N. (2018). ThreadScan: Automatic and scalable memory reclamation. <i>ACM Transactions on Parallel Computing</i>. Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3201897\">https://doi.org/10.1145/3201897</a>","ieee":"D.-A. Alistarh, W. Leiserson, A. Matveev, and N. Shavit, “ThreadScan: Automatic and scalable memory reclamation,” <i>ACM Transactions on Parallel Computing</i>, vol. 4, no. 4. Association for Computing Machinery, 2018.","chicago":"Alistarh, Dan-Adrian, William Leiserson, Alexander Matveev, and Nir Shavit. “ThreadScan: Automatic and Scalable Memory Reclamation.” <i>ACM Transactions on Parallel Computing</i>. Association for Computing Machinery, 2018. <a href=\"https://doi.org/10.1145/3201897\">https://doi.org/10.1145/3201897</a>."}},{"language":[{"iso":"eng"}],"doi":"10.1109/SiPS.2018.8598402","publisher":"IEEE","date_updated":"2023-09-19T14:41:51Z","year":"2018","status":"public","type":"conference","author":[{"last_name":"Stojanov","first_name":"Alen","full_name":"Stojanov, Alen"},{"last_name":"Smith","first_name":"Tyler Michael","full_name":"Smith, Tyler Michael"},{"full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh"},{"first_name":"Markus","full_name":"Puschel, Markus","last_name":"Puschel"}],"_id":"6031","date_published":"2018-12-31T00:00:00Z","publication_status":"published","oa_version":"None","day":"31","abstract":[{"lang":"eng","text":"We introduce Clover, a new library for efficient computation using low-precision data, providing mathematical routines required by fundamental methods in optimization and sparse recovery. Our library faithfully implements variants of stochastic quantization that guarantee convergence at low precision, and supports data formats from 4-bit quantized to 32-bit IEEE-754 on current Intel processors. In particular, we show that 4-bit can be implemented efficiently using Intel AVX despite the lack of native support for this data format. Experimental results with dot product, matrix-vector multiplication (MVM), gradient descent (GD), and iterative hard thresholding (IHT) demonstrate that the attainable speedups are in many cases close to linear with respect to the reduction of precision due to reduced data movement. Finally, for GD and IHT, we show examples of absolute speedup achieved by 4-bit versus 32-bit, by iterating until a given target error is achieved."}],"article_number":"8598402","date_created":"2019-02-17T22:59:25Z","citation":{"ista":"Stojanov A, Smith TM, Alistarh D-A, Puschel M. 2018. Fast quantized arithmetic on x86: Trading compute for data movement. 2018 IEEE International Workshop on Signal Processing Systems. SiPS: Workshop on Signal Processing Systems vol. 2018–October, 8598402.","apa":"Stojanov, A., Smith, T. M., Alistarh, D.-A., &#38; Puschel, M. (2018). Fast quantized arithmetic on x86: Trading compute for data movement. In <i>2018 IEEE International Workshop on Signal Processing Systems</i> (Vol. 2018–October). Cape Town, South Africa: IEEE. <a href=\"https://doi.org/10.1109/SiPS.2018.8598402\">https://doi.org/10.1109/SiPS.2018.8598402</a>","short":"A. Stojanov, T.M. Smith, D.-A. Alistarh, M. Puschel, in:, 2018 IEEE International Workshop on Signal Processing Systems, IEEE, 2018.","mla":"Stojanov, Alen, et al. “Fast Quantized Arithmetic on X86: Trading Compute for Data Movement.” <i>2018 IEEE International Workshop on Signal Processing Systems</i>, vol. 2018–October, 8598402, IEEE, 2018, doi:<a href=\"https://doi.org/10.1109/SiPS.2018.8598402\">10.1109/SiPS.2018.8598402</a>.","ama":"Stojanov A, Smith TM, Alistarh D-A, Puschel M. Fast quantized arithmetic on x86: Trading compute for data movement. In: <i>2018 IEEE International Workshop on Signal Processing Systems</i>. Vol 2018-October. IEEE; 2018. doi:<a href=\"https://doi.org/10.1109/SiPS.2018.8598402\">10.1109/SiPS.2018.8598402</a>","chicago":"Stojanov, Alen, Tyler Michael Smith, Dan-Adrian Alistarh, and Markus Puschel. “Fast Quantized Arithmetic on X86: Trading Compute for Data Movement.” In <i>2018 IEEE International Workshop on Signal Processing Systems</i>, Vol. 2018–October. IEEE, 2018. <a href=\"https://doi.org/10.1109/SiPS.2018.8598402\">https://doi.org/10.1109/SiPS.2018.8598402</a>.","ieee":"A. Stojanov, T. M. Smith, D.-A. Alistarh, and M. Puschel, “Fast quantized arithmetic on x86: Trading compute for data movement,” in <i>2018 IEEE International Workshop on Signal Processing Systems</i>, Cape Town, South Africa, 2018, vol. 2018–October."},"department":[{"_id":"DaAl"}],"user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","article_processing_charge":"No","conference":{"location":"Cape Town, South Africa","start_date":"2018-10-21","end_date":"2018-10-24","name":"SiPS: Workshop on Signal Processing Systems"},"scopus_import":"1","month":"12","quality_controlled":"1","publication":"2018 IEEE International Workshop on Signal Processing Systems","isi":1,"volume":"2018-October","external_id":{"isi":["000465106800060"]},"title":"Fast quantized arithmetic on x86: Trading compute for data movement"},{"_id":"6558","oa":1,"author":[{"first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Allen-Zhu","full_name":"Allen-Zhu, Zeyuan","first_name":"Zeyuan"},{"first_name":"Jerry","full_name":"Li, Jerry","last_name":"Li"}],"date_published":"2018-12-01T00:00:00Z","year":"2018","publisher":"Neural Information Processing Systems Foundation","language":[{"iso":"eng"}],"type":"conference","quality_controlled":"1","month":"12","volume":2018,"isi":1,"department":[{"_id":"DaAl"}],"date_created":"2019-06-13T08:22:37Z","intvolume":"      2018","conference":{"start_date":"2018-12-02","end_date":"2018-12-08","name":"NeurIPS: Conference on Neural Information Processing Systems","location":"Montreal, Canada"},"article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","abstract":[{"text":"This paper studies the problem of distributed stochastic optimization in an adversarial setting where, out of m machines which allegedly compute stochastic gradients every iteration, an α-fraction are Byzantine, and may behave adversarially. Our main result is a variant of stochastic gradient descent (SGD) which finds ε-approximate minimizers of convex functions in T=O~(1/ε²m+α²/ε²) iterations. In contrast, traditional mini-batch SGD needs T=O(1/ε²m) iterations, but cannot tolerate Byzantine failures. Further, we provide a lower bound showing that, up to logarithmic factors, our algorithm is information-theoretically optimal both in terms of sample complexity and time complexity.","lang":"eng"}],"day":"01","oa_version":"Published Version","publication_status":"published","page":"4613-4623","date_updated":"2023-09-19T15:12:45Z","status":"public","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1803.08917"}],"title":"Byzantine stochastic gradient descent","external_id":{"arxiv":["1803.08917"],"isi":["000461823304061"]},"publication":"Advances in Neural Information Processing Systems","arxiv":1,"citation":{"ieee":"D.-A. Alistarh, Z. Allen-Zhu, and J. Li, “Byzantine stochastic gradient descent,” in <i>Advances in Neural Information Processing Systems</i>, Montreal, Canada, 2018, vol. 2018, pp. 4613–4623.","chicago":"Alistarh, Dan-Adrian, Zeyuan Allen-Zhu, and Jerry Li. “Byzantine Stochastic Gradient Descent.” In <i>Advances in Neural Information Processing Systems</i>, 2018:4613–23. Neural Information Processing Systems Foundation, 2018.","ama":"Alistarh D-A, Allen-Zhu Z, Li J. Byzantine stochastic gradient descent. In: <i>Advances in Neural Information Processing Systems</i>. Vol 2018. Neural Information Processing Systems Foundation; 2018:4613-4623.","apa":"Alistarh, D.-A., Allen-Zhu, Z., &#38; Li, J. (2018). Byzantine stochastic gradient descent. In <i>Advances in Neural Information Processing Systems</i> (Vol. 2018, pp. 4613–4623). Montreal, Canada: Neural Information Processing Systems Foundation.","ista":"Alistarh D-A, Allen-Zhu Z, Li J. 2018. Byzantine stochastic gradient descent. Advances in Neural Information Processing Systems. NeurIPS: Conference on Neural Information Processing Systems vol. 2018, 4613–4623.","mla":"Alistarh, Dan-Adrian, et al. “Byzantine Stochastic Gradient Descent.” <i>Advances in Neural Information Processing Systems</i>, vol. 2018, Neural Information Processing Systems Foundation, 2018, pp. 4613–23.","short":"D.-A. Alistarh, Z. Allen-Zhu, J. Li, in:, Advances in Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2018, pp. 4613–4623."},"scopus_import":"1"},{"title":"The convergence of sparsified gradient methods","external_id":{"isi":["000461852000047"],"arxiv":["1809.10505"]},"publication":"Advances in Neural Information Processing Systems 31","main_file_link":[{"url":"https://arxiv.org/abs/1809.10505","open_access":"1"}],"scopus_import":"1","citation":{"short":"D.-A. Alistarh, T. Hoefler, M. Johansson, N.H. Konstantinov, S. Khirirat, C. Renggli, in:, Advances in Neural Information Processing Systems 31, Neural Information Processing Systems Foundation, 2018, pp. 5973–5983.","ista":"Alistarh D-A, Hoefler T, Johansson M, Konstantinov NH, Khirirat S, Renggli C. 2018. The convergence of sparsified gradient methods. Advances in Neural Information Processing Systems 31. NeurIPS: Conference on Neural Information Processing Systems vol. Volume 2018, 5973–5983.","mla":"Alistarh, Dan-Adrian, et al. “The Convergence of Sparsified Gradient Methods.” <i>Advances in Neural Information Processing Systems 31</i>, vol. Volume 2018, Neural Information Processing Systems Foundation, 2018, pp. 5973–83.","apa":"Alistarh, D.-A., Hoefler, T., Johansson, M., Konstantinov, N. H., Khirirat, S., &#38; Renggli, C. (2018). The convergence of sparsified gradient methods. In <i>Advances in Neural Information Processing Systems 31</i> (Vol. Volume 2018, pp. 5973–5983). Montreal, Canada: Neural Information Processing Systems Foundation.","ama":"Alistarh D-A, Hoefler T, Johansson M, Konstantinov NH, Khirirat S, Renggli C. The convergence of sparsified gradient methods. In: <i>Advances in Neural Information Processing Systems 31</i>. Vol Volume 2018. Neural Information Processing Systems Foundation; 2018:5973-5983.","chicago":"Alistarh, Dan-Adrian, Torsten Hoefler, Mikael Johansson, Nikola H Konstantinov, Sarit Khirirat, and Cedric Renggli. “The Convergence of Sparsified Gradient Methods.” In <i>Advances in Neural Information Processing Systems 31</i>, Volume 2018:5973–83. Neural Information Processing Systems Foundation, 2018.","ieee":"D.-A. Alistarh, T. Hoefler, M. Johansson, N. H. Konstantinov, S. Khirirat, and C. Renggli, “The convergence of sparsified gradient methods,” in <i>Advances in Neural Information Processing Systems 31</i>, Montreal, Canada, 2018, vol. Volume 2018, pp. 5973–5983."},"arxiv":1,"project":[{"name":"International IST Doctoral Program","_id":"2564DBCA-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","grant_number":"665385"}],"abstract":[{"lang":"eng","text":"Distributed training of massive machine learning models, in particular deep neural networks, via Stochastic Gradient Descent (SGD) is becoming commonplace. Several families of communication-reduction methods, such as quantization, large-batch methods, and gradient sparsification, have been proposed. To date, gradient sparsification methods--where each node sorts gradients by magnitude, and only communicates a subset of the components, accumulating the rest locally--are known to yield some of the largest practical gains. Such methods can reduce the amount of communication per step by up to \\emph{three orders of magnitude}, while preserving model accuracy. Yet, this family of methods currently has no theoretical justification. This is the question we address in this paper. We prove that, under analytic assumptions, sparsifying gradients by magnitude with local error correction provides convergence guarantees, for both convex and non-convex smooth objectives, for data-parallel SGD. The main insight is that sparsification methods implicitly maintain bounds on the maximum impact of stale updates, thanks to selection by magnitude. Our analysis and empirical validation also reveal that these methods do require analytical conditions to converge well, justifying existing heuristics."}],"ec_funded":1,"day":"01","publication_status":"published","oa_version":"Preprint","page":"5973-5983","status":"public","date_updated":"2023-10-17T11:47:20Z","volume":"Volume 2018","isi":1,"quality_controlled":"1","month":"12","conference":{"location":"Montreal, Canada","start_date":"2018-12-02","end_date":"2018-12-08","name":"NeurIPS: Conference on Neural Information Processing Systems"},"article_processing_charge":"No","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","department":[{"_id":"DaAl"},{"_id":"ChLa"}],"date_created":"2019-06-27T09:32:55Z","date_published":"2018-12-01T00:00:00Z","_id":"6589","oa":1,"author":[{"orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian"},{"last_name":"Hoefler","full_name":"Hoefler, Torsten","first_name":"Torsten"},{"last_name":"Johansson","first_name":"Mikael","full_name":"Johansson, Mikael"},{"full_name":"Konstantinov, Nikola H","first_name":"Nikola H","last_name":"Konstantinov","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87"},{"full_name":"Khirirat, Sarit","first_name":"Sarit","last_name":"Khirirat"},{"full_name":"Renggli, Cedric","first_name":"Cedric","last_name":"Renggli"}],"type":"conference","year":"2018","publisher":"Neural Information Processing Systems Foundation","language":[{"iso":"eng"}]},{"citation":{"ieee":"M. Arbel Raviv and T. A. Brown, “Harnessing epoch-based reclamation for efficient range queries,” presented at the PPoPP: Principles and Practice of Parallel Programming, Vienna, Austria, 2018, vol. 53, no. 1, pp. 14–27.","chicago":"Arbel Raviv, Maya, and Trevor A Brown. “Harnessing Epoch-Based Reclamation for Efficient Range Queries,” 53:14–27. ACM, 2018. <a href=\"https://doi.org/10.1145/3178487.3178489\">https://doi.org/10.1145/3178487.3178489</a>.","ama":"Arbel Raviv M, Brown TA. Harnessing epoch-based reclamation for efficient range queries. In: Vol 53. ACM; 2018:14-27. doi:<a href=\"https://doi.org/10.1145/3178487.3178489\">10.1145/3178487.3178489</a>","short":"M. Arbel Raviv, T.A. Brown, in:, ACM, 2018, pp. 14–27.","ista":"Arbel Raviv M, Brown TA. 2018. Harnessing epoch-based reclamation for efficient range queries. PPoPP: Principles and Practice of Parallel Programming, PPoPP, vol. 53, 14–27.","mla":"Arbel Raviv, Maya, and Trevor A. Brown. <i>Harnessing Epoch-Based Reclamation for Efficient Range Queries</i>. Vol. 53, no. 1, ACM, 2018, pp. 14–27, doi:<a href=\"https://doi.org/10.1145/3178487.3178489\">10.1145/3178487.3178489</a>.","apa":"Arbel Raviv, M., &#38; Brown, T. A. (2018). Harnessing epoch-based reclamation for efficient range queries (Vol. 53, pp. 14–27). Presented at the PPoPP: Principles and Practice of Parallel Programming, Vienna, Austria: ACM. <a href=\"https://doi.org/10.1145/3178487.3178489\">https://doi.org/10.1145/3178487.3178489</a>"},"alternative_title":["PPoPP"],"scopus_import":"1","issue":"1","title":"Harnessing epoch-based reclamation for efficient range queries","external_id":{"isi":["000446161100002"]},"date_updated":"2023-09-11T14:10:25Z","status":"public","abstract":[{"lang":"eng","text":"Concurrent sets with range query operations are highly desirable in applications such as in-memory databases. However, few set implementations offer range queries. Known techniques for augmenting data structures with range queries (or operations that can be used to build range queries) have numerous problems that limit their usefulness. For example, they impose high overhead or rely heavily on garbage collection. In this work, we show how to augment data structures with highly efficient range queries, without relying on garbage collection. We identify a property of epoch-based memory reclamation algorithms that makes them ideal for implementing range queries, and produce three algorithms, which use locks, transactional memory and lock-free techniques, respectively. Our algorithms are applicable to more data structures than previous work, and are shown to be highly efficient on a large scale Intel system. "}],"day":"10","page":"14 - 27","publication_status":"published","oa_version":"None","department":[{"_id":"DaAl"}],"date_created":"2018-12-11T11:46:14Z","intvolume":"        53","conference":{"location":"Vienna, Austria","name":"PPoPP: Principles and Practice of Parallel Programming","end_date":"2018-02-28","start_date":"2018-02-24"},"article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","publist_id":"7430","publication_identifier":{"isbn":["978-1-4503-4982-6"]},"quality_controlled":"1","month":"02","volume":53,"isi":1,"year":"2018","publisher":"ACM","doi":"10.1145/3178487.3178489","language":[{"iso":"eng"}],"type":"conference","_id":"397","author":[{"last_name":"Arbel Raviv","full_name":"Arbel Raviv, Maya","first_name":"Maya"},{"last_name":"Brown","id":"3569F0A0-F248-11E8-B48F-1D18A9856A87","full_name":"Brown, Trevor A","first_name":"Trevor A"}],"date_published":"2018-02-10T00:00:00Z"},{"date_updated":"2023-09-13T08:57:38Z","status":"public","abstract":[{"text":"The initial amount of pathogens required to start an infection within a susceptible host is called the infective dose and is known to vary to a large extent between different pathogen species. We investigate the hypothesis that the differences in infective doses are explained by the mode of action in the underlying mechanism of pathogenesis: Pathogens with locally acting mechanisms tend to have smaller infective doses than pathogens with distantly acting mechanisms. While empirical evidence tends to support the hypothesis, a formal theoretical explanation has been lacking. We give simple analytical models to gain insight into this phenomenon and also investigate a stochastic, spatially explicit, mechanistic within-host model for toxin-dependent bacterial infections. The model shows that pathogens secreting locally acting toxins have smaller infective doses than pathogens secreting diffusive toxins, as hypothesized. While local pathogenetic mechanisms require smaller infective doses, pathogens with distantly acting toxins tend to spread faster and may cause more damage to the host. The proposed model can serve as a basis for the spatially explicit analysis of various virulence factors also in the context of other problems in infection dynamics.","lang":"eng"}],"ec_funded":1,"day":"02","publication_status":"published","oa_version":"Submitted Version","page":"10690 - 10695","citation":{"short":"J. Rybicki, E. Kisdi, J. Anttila, PNAS 115 (2018) 10690–10695.","apa":"Rybicki, J., Kisdi, E., &#38; Anttila, J. (2018). Model of bacterial toxin-dependent pathogenesis explains infective dose. <i>PNAS</i>. National Academy of Sciences. <a href=\"https://doi.org/10.1073/pnas.1721061115\">https://doi.org/10.1073/pnas.1721061115</a>","ista":"Rybicki J, Kisdi E, Anttila J. 2018. Model of bacterial toxin-dependent pathogenesis explains infective dose. PNAS. 115(42), 10690–10695.","mla":"Rybicki, Joel, et al. “Model of Bacterial Toxin-Dependent Pathogenesis Explains Infective Dose.” <i>PNAS</i>, vol. 115, no. 42, National Academy of Sciences, 2018, pp. 10690–95, doi:<a href=\"https://doi.org/10.1073/pnas.1721061115\">10.1073/pnas.1721061115</a>.","ama":"Rybicki J, Kisdi E, Anttila J. Model of bacterial toxin-dependent pathogenesis explains infective dose. <i>PNAS</i>. 2018;115(42):10690-10695. doi:<a href=\"https://doi.org/10.1073/pnas.1721061115\">10.1073/pnas.1721061115</a>","chicago":"Rybicki, Joel, Eva Kisdi, and Jani Anttila. “Model of Bacterial Toxin-Dependent Pathogenesis Explains Infective Dose.” <i>PNAS</i>. National Academy of Sciences, 2018. <a href=\"https://doi.org/10.1073/pnas.1721061115\">https://doi.org/10.1073/pnas.1721061115</a>.","ieee":"J. Rybicki, E. Kisdi, and J. Anttila, “Model of bacterial toxin-dependent pathogenesis explains infective dose,” <i>PNAS</i>, vol. 115, no. 42. National Academy of Sciences, pp. 10690–10695, 2018."},"project":[{"call_identifier":"H2020","grant_number":"754411","name":"ISTplus - Postdoctoral Fellowships","_id":"260C2330-B435-11E9-9278-68D0E5697425"}],"pubrep_id":"1063","has_accepted_license":"1","scopus_import":"1","issue":"42","title":"Model of bacterial toxin-dependent pathogenesis explains infective dose","file":[{"relation":"main_file","content_type":"application/pdf","access_level":"open_access","creator":"dernst","date_updated":"2020-07-14T12:46:26Z","checksum":"df7ac544a587c06b75692653b9fabd18","date_created":"2019-04-09T08:02:50Z","file_id":"6258","file_size":4070777,"file_name":"2018_PNAS_Rybicki.pdf"}],"external_id":{"isi":["000447491300057"]},"publication":"PNAS","year":"2018","publisher":"National Academy of Sciences","doi":"10.1073/pnas.1721061115","language":[{"iso":"eng"}],"type":"journal_article","_id":"43","oa":1,"author":[{"id":"334EFD2E-F248-11E8-B48F-1D18A9856A87","last_name":"Rybicki","orcid":"0000-0002-6432-6646","first_name":"Joel","full_name":"Rybicki, Joel"},{"last_name":"Kisdi","first_name":"Eva","full_name":"Kisdi, Eva"},{"first_name":"Jani","full_name":"Anttila, Jani","last_name":"Anttila"}],"date_published":"2018-10-02T00:00:00Z","acknowledgement":"J.R. and J.V.A. were also supported by the Academy of Finland Grants 1273253 and 267541.","department":[{"_id":"DaAl"}],"date_created":"2018-12-11T11:44:19Z","intvolume":"       115","file_date_updated":"2020-07-14T12:46:26Z","article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","ddc":["570","577"],"publist_id":"8011","quality_controlled":"1","month":"10","volume":115,"isi":1},{"main_file_link":[{"url":"https://arxiv.org/abs/1706.04178","open_access":"1"}],"publication":"Proceedings of the ACM Symposium on Principles of Distributed Computing","title":"The power of choice in priority scheduling","external_id":{"isi":["000462995000035"]},"citation":{"ieee":"D.-A. Alistarh, J. Kopinsky, J. Li, and G. Nadiradze, “The power of choice in priority scheduling,” in <i>Proceedings of the ACM Symposium on Principles of Distributed Computing</i>, Washington, WA, USA, 2017, vol. Part F129314, pp. 283–292.","chicago":"Alistarh, Dan-Adrian, Justin Kopinsky, Jerry Li, and Giorgi Nadiradze. “The Power of Choice in Priority Scheduling.” In <i>Proceedings of the ACM Symposium on Principles of Distributed Computing</i>, Part F129314:283–92. ACM, 2017. <a href=\"https://doi.org/10.1145/3087801.3087810\">https://doi.org/10.1145/3087801.3087810</a>.","ama":"Alistarh D-A, Kopinsky J, Li J, Nadiradze G. The power of choice in priority scheduling. In: <i>Proceedings of the ACM Symposium on Principles of Distributed Computing</i>. Vol Part F129314. ACM; 2017:283-292. doi:<a href=\"https://doi.org/10.1145/3087801.3087810\">10.1145/3087801.3087810</a>","mla":"Alistarh, Dan-Adrian, et al. “The Power of Choice in Priority Scheduling.” <i>Proceedings of the ACM Symposium on Principles of Distributed Computing</i>, vol. Part F129314, ACM, 2017, pp. 283–92, doi:<a href=\"https://doi.org/10.1145/3087801.3087810\">10.1145/3087801.3087810</a>.","ista":"Alistarh D-A, Kopinsky J, Li J, Nadiradze G. 2017. The power of choice in priority scheduling. Proceedings of the ACM Symposium on Principles of Distributed Computing. PODC: Principles of Distributed Computing vol. Part F129314, 283–292.","apa":"Alistarh, D.-A., Kopinsky, J., Li, J., &#38; Nadiradze, G. (2017). The power of choice in priority scheduling. In <i>Proceedings of the ACM Symposium on Principles of Distributed Computing</i> (Vol. Part F129314, pp. 283–292). Washington, WA, USA: ACM. <a href=\"https://doi.org/10.1145/3087801.3087810\">https://doi.org/10.1145/3087801.3087810</a>","short":"D.-A. Alistarh, J. Kopinsky, J. Li, G. Nadiradze, in:, Proceedings of the ACM Symposium on Principles of Distributed Computing, ACM, 2017, pp. 283–292."},"scopus_import":"1","day":"26","page":"283 - 292","oa_version":"Submitted Version","publication_status":"published","abstract":[{"text":"Consider the following random process: we are given n queues, into which elements of increasing labels are inserted uniformly at random. To remove an element, we pick two queues at random, and remove the element of lower label (higher priority) among the two. The cost of a removal is the rank of the label removed, among labels still present in any of the queues, that is, the distance from the optimal choice at each step. Variants of this strategy are prevalent in state-of-the-art concurrent priority queue implementations. Nonetheless, it is not known whether such implementations provide any rank guarantees, even in a sequential model. We answer this question, showing that this strategy provides surprisingly strong guarantees: Although the single-choice process, where we always insert and remove from a single randomly chosen queue, has degrading cost, going to infinity as we increase the number of steps, in the two choice process, the expected rank of a removed element is O(n) while the expected worst-case cost is O(n log n). These bounds are tight, and hold irrespective of the number of steps for which we run the process. The argument is based on a new technical connection between &quot;heavily loaded&quot; balls-into-bins processes and priority scheduling. Our analytic results inspire a new concurrent priority queue implementation, which improves upon the state of the art in terms of practical performance.","lang":"eng"}],"date_updated":"2023-09-27T12:17:59Z","status":"public","month":"07","publication_identifier":{"isbn":["978-145034992-5"]},"publist_id":"6864","quality_controlled":"1","isi":1,"volume":"Part F129314","date_created":"2018-12-11T11:48:31Z","department":[{"_id":"DaAl"}],"conference":{"name":"PODC: Principles of Distributed Computing","end_date":"2017-07-27","start_date":"2017-07-25","location":"Washington, WA, USA"},"article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","author":[{"first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","orcid":"0000-0003-3650-940X"},{"last_name":"Kopinsky","first_name":"Justin","full_name":"Kopinsky, Justin"},{"first_name":"Jerry","full_name":"Li, Jerry","last_name":"Li"},{"id":"3279A00C-F248-11E8-B48F-1D18A9856A87","last_name":"Nadiradze","orcid":"0000-0001-5634-0731","full_name":"Nadiradze, Giorgi","first_name":"Giorgi"}],"_id":"791","oa":1,"date_published":"2017-07-26T00:00:00Z","doi":"10.1145/3087801.3087810","language":[{"iso":"eng"}],"year":"2017","publisher":"ACM","type":"conference"},{"day":"28","page":"2 - 14","date_published":"2017-11-28T00:00:00Z","publication_status":"published","oa_version":"None","abstract":[{"text":"In this paper we study network architecture for unlicensed cellular networking for outdoor coverage in TV white spaces. The main technology proposed for TV white spaces is 802.11af, a Wi-Fi variant adapted for TV frequencies. However, 802.11af is originally designed for improved indoor propagation. We show that long links, typical for outdoor use, exacerbate known Wi-Fi issues, such as hidden and exposed terminal, and significantly reduce its efficiency. Instead, we propose CellFi, an alternative architecture based on LTE. LTE is designed for long-range coverage and throughput efficiency, but it is also designed to operate in tightly controlled and centrally managed networks. CellFi overcomes these problems by designing an LTE-compatible spectrum database component, mandatory for TV white space networking, and introducing an interference management component for distributed coordination. CellFi interference management is compatible with existing LTE mechanisms, requires no explicit communication between base stations, and is more efficient than CSMA for long links. We evaluate our design through extensive real world evaluation on of-the-shelf LTE equipment and simulations. We show that, compared to 802.11af, it increases coverage by 40% and reduces median flow completion times by 2.3x.","lang":"eng"}],"author":[{"full_name":"Baig, Ghufran","first_name":"Ghufran","last_name":"Baig"},{"last_name":"Radunovic","first_name":"Bozidar","full_name":"Radunovic, Bozidar"},{"orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian"},{"last_name":"Balkwill","full_name":"Balkwill, Matthew","first_name":"Matthew"},{"full_name":"Karagiannis, Thomas","first_name":"Thomas","last_name":"Karagiannis"},{"first_name":"Lili","full_name":"Qiu, Lili","last_name":"Qiu"}],"_id":"487","status":"public","type":"conference","doi":"10.1145/3143361.3143367","language":[{"iso":"eng"}],"year":"2017","date_updated":"2023-02-23T12:21:11Z","publisher":"ACM","publication":"Proceedings of the 2017 13th International Conference on emerging Networking EXperiments and Technologies","title":"Towards unlicensed cellular networks in TV white spaces","month":"11","publication_identifier":{"isbn":["978-145035422-6"]},"publist_id":"7333","quality_controlled":"1","scopus_import":1,"conference":{"location":"Incheon, South Korea","start_date":"2017-12-12","end_date":"2017-12-15","name":"CoNEXT: Conference on emerging Networking EXperiments and Technologies"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","date_created":"2018-12-11T11:46:45Z","citation":{"ieee":"G. Baig, B. Radunovic, D.-A. Alistarh, M. Balkwill, T. Karagiannis, and L. Qiu, “Towards unlicensed cellular networks in TV white spaces,” in <i>Proceedings of the 2017 13th International Conference on emerging Networking EXperiments and Technologies</i>, Incheon, South Korea, 2017, pp. 2–14.","chicago":"Baig, Ghufran, Bozidar Radunovic, Dan-Adrian Alistarh, Matthew Balkwill, Thomas Karagiannis, and Lili Qiu. “Towards Unlicensed Cellular Networks in TV White Spaces.” In <i>Proceedings of the 2017 13th International Conference on Emerging Networking EXperiments and Technologies</i>, 2–14. ACM, 2017. <a href=\"https://doi.org/10.1145/3143361.3143367\">https://doi.org/10.1145/3143361.3143367</a>.","ama":"Baig G, Radunovic B, Alistarh D-A, Balkwill M, Karagiannis T, Qiu L. Towards unlicensed cellular networks in TV white spaces. In: <i>Proceedings of the 2017 13th International Conference on Emerging Networking EXperiments and Technologies</i>. ACM; 2017:2-14. doi:<a href=\"https://doi.org/10.1145/3143361.3143367\">10.1145/3143361.3143367</a>","ista":"Baig G, Radunovic B, Alistarh D-A, Balkwill M, Karagiannis T, Qiu L. 2017. Towards unlicensed cellular networks in TV white spaces. Proceedings of the 2017 13th International Conference on emerging Networking EXperiments and Technologies. CoNEXT: Conference on emerging Networking EXperiments and Technologies, 2–14.","apa":"Baig, G., Radunovic, B., Alistarh, D.-A., Balkwill, M., Karagiannis, T., &#38; Qiu, L. (2017). Towards unlicensed cellular networks in TV white spaces. In <i>Proceedings of the 2017 13th International Conference on emerging Networking EXperiments and Technologies</i> (pp. 2–14). Incheon, South Korea: ACM. <a href=\"https://doi.org/10.1145/3143361.3143367\">https://doi.org/10.1145/3143361.3143367</a>","short":"G. Baig, B. Radunovic, D.-A. Alistarh, M. Balkwill, T. Karagiannis, L. Qiu, in:, Proceedings of the 2017 13th International Conference on Emerging Networking EXperiments and Technologies, ACM, 2017, pp. 2–14.","mla":"Baig, Ghufran, et al. “Towards Unlicensed Cellular Networks in TV White Spaces.” <i>Proceedings of the 2017 13th International Conference on Emerging Networking EXperiments and Technologies</i>, ACM, 2017, pp. 2–14, doi:<a href=\"https://doi.org/10.1145/3143361.3143367\">10.1145/3143361.3143367</a>."},"department":[{"_id":"DaAl"}]},{"citation":{"apa":"Alistarh, D.-A., Grubic, D., Li, J., Tomioka, R., &#38; Vojnović, M. (2017). QSGD: Communication-efficient SGD via gradient quantization and encoding (Vol. 2017, pp. 1710–1721). Presented at the NIPS: Neural Information Processing System, Long Beach, CA, United States: Neural Information Processing Systems Foundation.","short":"D.-A. Alistarh, D. Grubic, J. Li, R. Tomioka, M. Vojnović, in:, Neural Information Processing Systems Foundation, 2017, pp. 1710–1721.","mla":"Alistarh, Dan-Adrian, et al. <i>QSGD: Communication-Efficient SGD via Gradient Quantization and Encoding</i>. Vol. 2017, Neural Information Processing Systems Foundation, 2017, pp. 1710–21.","ista":"Alistarh D-A, Grubic D, Li J, Tomioka R, Vojnović M. 2017. QSGD: Communication-efficient SGD via gradient quantization and encoding. NIPS: Neural Information Processing System, Advances in Neural Information Processing Systems, vol. 2017, 1710–1721.","ama":"Alistarh D-A, Grubic D, Li J, Tomioka R, Vojnović M. QSGD: Communication-efficient SGD via gradient quantization and encoding. In: Vol 2017. Neural Information Processing Systems Foundation; 2017:1710-1721.","chicago":"Alistarh, Dan-Adrian, Demjan Grubic, Jerry Li, Ryota Tomioka, and Milan Vojnović. “QSGD: Communication-Efficient SGD via Gradient Quantization and Encoding,” 2017:1710–21. Neural Information Processing Systems Foundation, 2017.","ieee":"D.-A. Alistarh, D. Grubic, J. Li, R. Tomioka, and M. Vojnović, “QSGD: Communication-efficient SGD via gradient quantization and encoding,” presented at the NIPS: Neural Information Processing System, Long Beach, CA, United States, 2017, vol. 2017, pp. 1710–1721."},"arxiv":1,"alternative_title":["Advances in Neural Information Processing Systems"],"main_file_link":[{"url":"https://arxiv.org/abs/1610.02132","open_access":"1"}],"title":"QSGD: Communication-efficient SGD via gradient quantization and encoding","external_id":{"arxiv":["1610.02132"]},"date_updated":"2023-10-17T11:48:03Z","status":"public","abstract":[{"lang":"eng","text":"Parallel implementations of stochastic gradient descent (SGD) have received significant research attention, thanks to its excellent scalability properties. A fundamental barrier when parallelizing SGD is the high bandwidth cost of communicating gradient updates between nodes; consequently, several lossy compresion heuristics have been proposed, by which nodes only communicate quantized gradients. Although effective in practice, these heuristics do not always converge. In this paper, we propose Quantized SGD (QSGD), a family of compression schemes with convergence guarantees and good practical performance. QSGD allows the user to smoothly trade off communication bandwidth and convergence time: nodes can adjust the number of bits sent per iteration, at the cost of possibly higher variance. We show that this trade-off is inherent, in the sense that improving it past some threshold would violate information-theoretic lower bounds. QSGD guarantees convergence for convex and non-convex objectives, under asynchrony, and can be extended to stochastic variance-reduced techniques. When applied to training deep neural networks for image classification and automated speech recognition, QSGD leads to significant reductions in end-to-end training time. For instance, on 16GPUs, we can train the ResNet-152 network to full accuracy on ImageNet 1.8 × faster than the full-precision variant. "}],"day":"01","page":"1710-1721","oa_version":"Submitted Version","publication_status":"published","department":[{"_id":"DaAl"}],"date_created":"2018-12-11T11:46:26Z","intvolume":"      2017","conference":{"location":"Long Beach, CA, United States","end_date":"2017-12-09","start_date":"2017-12-04","name":"NIPS: Neural Information Processing System"},"article_processing_charge":"No","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publication_identifier":{"issn":["10495258"]},"publist_id":"7392","quality_controlled":"1","month":"01","volume":2017,"year":"2017","publisher":"Neural Information Processing Systems Foundation","language":[{"iso":"eng"}],"type":"conference","_id":"431","oa":1,"author":[{"full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","orcid":"0000-0003-3650-940X"},{"last_name":"Grubic","first_name":"Demjan","full_name":"Grubic, Demjan"},{"last_name":"Li","first_name":"Jerry","full_name":"Li, Jerry"},{"last_name":"Tomioka","full_name":"Tomioka, Ryota","first_name":"Ryota"},{"first_name":"Milan","full_name":"Vojnović, Milan","last_name":"Vojnović"}],"date_published":"2017-01-01T00:00:00Z"},{"abstract":[{"lang":"eng","text":"Recently there has been significant interest in training machine-learning models at low precision: by reducing precision, one can reduce computation and communication by one order of magnitude. We examine training at reduced precision, both from a theoretical and practical perspective, and ask: is it possible to train models at end-to-end low precision with provable guarantees? Can this lead to consistent order-of-magnitude speedups? We mainly focus on linear models, and the answer is yes for linear models. We develop a simple framework called ZipML based on one simple but novel strategy called double sampling. Our ZipML framework is able to execute training at low precision with no bias, guaranteeing convergence, whereas naive quanti- zation would introduce significant bias. We val- idate our framework across a range of applica- tions, and show that it enables an FPGA proto- type that is up to 6.5 × faster than an implemen- tation using full 32-bit precision. We further de- velop a variance-optimal stochastic quantization strategy and show that it can make a significant difference in a variety of settings. When applied to linear models together with double sampling, we save up to another 1.7 × in data movement compared with uniform quantization. When training deep networks with quantized models, we achieve higher accuracy than the state-of-the- art XNOR-Net. "}],"day":"01","oa_version":"Submitted Version","publication_status":"published","page":"4035 - 4043","date_updated":"2023-10-17T12:31:15Z","status":"public","title":"ZipML: Training linear models with end-to-end low precision, and a little bit of deep learning","file":[{"relation":"main_file","content_type":"application/pdf","access_level":"open_access","creator":"dernst","date_updated":"2020-07-14T12:46:26Z","file_id":"5869","checksum":"86156ba7f4318e47cef3eb9092593c10","date_created":"2019-01-22T08:23:58Z","file_size":849345,"file_name":"2017_ICML_Zhang.pdf"}],"publication":"Proceedings of Machine Learning Research","citation":{"short":"H. Zhang, J. Li, K. Kara, D.-A. Alistarh, J. Liu, C. Zhang, in:, Proceedings of Machine Learning Research, ML Research Press, 2017, pp. 4035–4043.","mla":"Zhang, Hantian, et al. “ZipML: Training Linear Models with End-to-End Low Precision, and a Little Bit of Deep Learning.” <i>Proceedings of Machine Learning Research</i>, vol. 70, ML Research Press, 2017, pp. 4035–43.","apa":"Zhang, H., Li, J., Kara, K., Alistarh, D.-A., Liu, J., &#38; Zhang, C. (2017). ZipML: Training linear models with end-to-end low precision, and a little bit of deep learning. In <i>Proceedings of Machine Learning Research</i> (Vol. 70, pp. 4035–4043). Sydney, Australia: ML Research Press.","ista":"Zhang H, Li J, Kara K, Alistarh D-A, Liu J, Zhang C. 2017. ZipML: Training linear models with end-to-end low precision, and a little bit of deep learning. Proceedings of Machine Learning Research. ICML: International  Conference  on  Machine Learning, PMLR Press, vol. 70, 4035–4043.","ama":"Zhang H, Li J, Kara K, Alistarh D-A, Liu J, Zhang C. ZipML: Training linear models with end-to-end low precision, and a little bit of deep learning. In: <i>Proceedings of Machine Learning Research</i>. Vol 70. ML Research Press; 2017:4035-4043.","chicago":"Zhang, Hantian, Jerry Li, Kaan Kara, Dan-Adrian Alistarh, Ji Liu, and Ce Zhang. “ZipML: Training Linear Models with End-to-End Low Precision, and a Little Bit of Deep Learning.” In <i>Proceedings of Machine Learning Research</i>, 70:4035–43. ML Research Press, 2017.","ieee":"H. Zhang, J. Li, K. Kara, D.-A. Alistarh, J. Liu, and C. Zhang, “ZipML: Training linear models with end-to-end low precision, and a little bit of deep learning,” in <i>Proceedings of Machine Learning Research</i>, Sydney, Australia, 2017, vol. 70, pp. 4035–4043."},"has_accepted_license":"1","alternative_title":["PMLR Press"],"scopus_import":"1","_id":"432","oa":1,"author":[{"last_name":"Zhang","first_name":"Hantian","full_name":"Zhang, Hantian"},{"last_name":"Li","first_name":"Jerry","full_name":"Li, Jerry"},{"last_name":"Kara","full_name":"Kara, Kaan","first_name":"Kaan"},{"last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian"},{"full_name":"Liu, Ji","first_name":"Ji","last_name":"Liu"},{"last_name":"Zhang","full_name":"Zhang, Ce","first_name":"Ce"}],"date_published":"2017-01-01T00:00:00Z","year":"2017","publisher":"ML Research Press","language":[{"iso":"eng"}],"type":"conference","publication_identifier":{"isbn":["978-151085514-4"]},"publist_id":"7391","quality_controlled":"1","month":"01","volume":" 70","department":[{"_id":"DaAl"}],"date_created":"2018-12-11T11:46:26Z","file_date_updated":"2020-07-14T12:46:26Z","conference":{"location":"Sydney, Australia","name":"ICML: International  Conference  on  Machine Learning","end_date":"2017-08-11","start_date":"2017-08-06"},"ddc":["000"],"article_processing_charge":"No","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87"}]