[{"page":"487-488","external_id":{"isi":["000458186900063"]},"date_published":"2018-07-27T00:00:00Z","publisher":"ACM Press","scopus_import":"1","isi":1,"publication_identifier":{"isbn":["9781450357951"]},"status":"public","month":"07","conference":{"location":"Egham, United Kingdom","start_date":"2018-07-23","end_date":"2018-07-27","name":"PODC: Principles of Distributed Computing"},"date_created":"2019-02-13T09:48:55Z","quality_controlled":"1","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","article_processing_charge":"No","author":[{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","last_name":"Alistarh"}],"citation":{"mla":"Alistarh, Dan-Adrian. “A Brief Tutorial on Distributed and Concurrent Machine Learning.” <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, ACM Press, 2018, pp. 487–88, doi:<a href=\"https://doi.org/10.1145/3212734.3212798\">10.1145/3212734.3212798</a>.","apa":"Alistarh, D.-A. (2018). A brief tutorial on distributed and concurrent machine learning. In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i> (pp. 487–488). Egham, United Kingdom: ACM Press. <a href=\"https://doi.org/10.1145/3212734.3212798\">https://doi.org/10.1145/3212734.3212798</a>","ama":"Alistarh D-A. A brief tutorial on distributed and concurrent machine learning. In: <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>. ACM Press; 2018:487-488. doi:<a href=\"https://doi.org/10.1145/3212734.3212798\">10.1145/3212734.3212798</a>","ieee":"D.-A. Alistarh, “A brief tutorial on distributed and concurrent machine learning,” in <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, Egham, United Kingdom, 2018, pp. 487–488.","short":"D.-A. Alistarh, in:, Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18, ACM Press, 2018, pp. 487–488.","chicago":"Alistarh, Dan-Adrian. “A Brief Tutorial on Distributed and Concurrent Machine Learning.” In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, 487–88. ACM Press, 2018. <a href=\"https://doi.org/10.1145/3212734.3212798\">https://doi.org/10.1145/3212734.3212798</a>.","ista":"Alistarh D-A. 2018. A brief tutorial on distributed and concurrent machine learning. Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18. PODC: Principles of Distributed Computing, 487–488."},"year":"2018","day":"27","oa_version":"None","type":"conference","date_updated":"2023-09-19T10:42:28Z","department":[{"_id":"DaAl"}],"_id":"5961","doi":"10.1145/3212734.3212798","language":[{"iso":"eng"}],"title":"A brief tutorial on distributed and concurrent machine learning","publication":"Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC '18","abstract":[{"lang":"eng","text":"The area of machine learning has made considerable progress over the past decade, enabled by the widespread availability of large datasets, as well as by improved algorithms and models. Given the large computational demands of machine learning workloads, parallelism, implemented either through single-node concurrency or through multi-node distribution, has been a third key ingredient to advances in machine learning.\r\nThe goal of this tutorial is to provide the audience with an overview of standard distribution techniques in machine learning, with an eye towards the intriguing trade-offs between synchronization and communication costs of distributed machine learning algorithms, on the one hand, and their convergence, on the other.The tutorial will focus on parallelization strategies for the fundamental stochastic gradient descent (SGD) algorithm, which is a key tool when training machine learning models, from classical instances such as linear regression, to state-of-the-art neural network architectures.\r\nThe tutorial will describe the guarantees provided by this algorithm in the sequential case, and then move on to cover both shared-memory and message-passing parallelization strategies, together with the guarantees they provide, and corresponding trade-offs. The presentation will conclude with a broad overview of ongoing research in distributed and concurrent machine learning. The tutorial will assume no prior knowledge beyond familiarity with basic concepts in algebra and analysis.\r\n"}],"publication_status":"published"},{"conference":{"start_date":"2018-07-23","location":"Egham, United Kingdom","name":"PODC: Principles of Distributed Computing","end_date":"2018-07-27"},"month":"07","date_created":"2019-02-13T09:58:58Z","status":"public","isi":1,"publisher":"ACM Press","abstract":[{"lang":"eng","text":"Stochastic Gradient Descent (SGD) is a fundamental algorithm in machine learning, representing the optimization backbone for training several classic models, from regression to neural networks. Given the recent practical focus on distributed machine learning, significant work has been dedicated to the convergence properties of this algorithm under the inconsistent and noisy updates arising from execution in a distributed environment. However, surprisingly, the convergence properties of this classic algorithm in the standard shared-memory model are still not well-understood. In this work, we address this gap, and provide new convergence bounds for lock-free concurrent stochastic gradient descent, executing in the classic asynchronous shared memory model, against a strong adaptive adversary. Our results give improved upper and lower bounds on the \"price of asynchrony'' when executing the fundamental SGD algorithm in a concurrent setting. They show that this classic optimization tool can converge faster and with a wider range of parameters than previously known under asynchronous iterations. At the same time, we exhibit a fundamental trade-off between the maximum delay in the system and the rate at which SGD can converge, which governs the set of parameters under which this algorithm can still work efficiently."}],"publication_status":"published","publication":"Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC '18","title":"The convergence of stochastic gradient descent in asynchronous shared memory","_id":"5962","oa":1,"year":"2018","citation":{"ama":"Alistarh D-A, De Sa C, Konstantinov NH. The convergence of stochastic gradient descent in asynchronous shared memory. In: <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>. ACM Press; 2018:169-178. doi:<a href=\"https://doi.org/10.1145/3212734.3212763\">10.1145/3212734.3212763</a>","ieee":"D.-A. Alistarh, C. De Sa, and N. H. Konstantinov, “The convergence of stochastic gradient descent in asynchronous shared memory,” in <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, Egham, United Kingdom, 2018, pp. 169–178.","ista":"Alistarh D-A, De Sa C, Konstantinov NH. 2018. The convergence of stochastic gradient descent in asynchronous shared memory. Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18. PODC: Principles of Distributed Computing, 169–178.","short":"D.-A. Alistarh, C. De Sa, N.H. Konstantinov, in:, Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18, ACM Press, 2018, pp. 169–178.","chicago":"Alistarh, Dan-Adrian, Christopher De Sa, and Nikola H Konstantinov. “The Convergence of Stochastic Gradient Descent in Asynchronous Shared Memory.” In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, 169–78. ACM Press, 2018. <a href=\"https://doi.org/10.1145/3212734.3212763\">https://doi.org/10.1145/3212734.3212763</a>.","apa":"Alistarh, D.-A., De Sa, C., &#38; Konstantinov, N. H. (2018). The convergence of stochastic gradient descent in asynchronous shared memory. In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i> (pp. 169–178). Egham, United Kingdom: ACM Press. <a href=\"https://doi.org/10.1145/3212734.3212763\">https://doi.org/10.1145/3212734.3212763</a>","mla":"Alistarh, Dan-Adrian, et al. “The Convergence of Stochastic Gradient Descent in Asynchronous Shared Memory.” <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, ACM Press, 2018, pp. 169–78, doi:<a href=\"https://doi.org/10.1145/3212734.3212763\">10.1145/3212734.3212763</a>."},"main_file_link":[{"url":"https://arxiv.org/abs/1803.08841","open_access":"1"}],"author":[{"last_name":"Alistarh","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"full_name":"De Sa, Christopher","last_name":"De Sa","first_name":"Christopher"},{"id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87","full_name":"Konstantinov, Nikola H","first_name":"Nikola H","last_name":"Konstantinov"}],"quality_controlled":"1","arxiv":1,"scopus_import":"1","publication_identifier":{"isbn":["9781450357951"]},"external_id":{"arxiv":["1803.08841"],"isi":["000458186900022"]},"date_published":"2018-07-23T00:00:00Z","page":"169-178","department":[{"_id":"DaAl"}],"language":[{"iso":"eng"}],"doi":"10.1145/3212734.3212763","oa_version":"Preprint","date_updated":"2023-09-19T10:42:53Z","type":"conference","day":"23","article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1"},{"arxiv":1,"scopus_import":"1","publication_identifier":{"isbn":["9781450357951"]},"page":"377-386","external_id":{"arxiv":["1808.04155"],"isi":["000458186900048"]},"date_published":"2018-07-23T00:00:00Z","department":[{"_id":"DaAl"}],"doi":"10.1145/3212734.3212756","language":[{"iso":"eng"}],"day":"23","oa_version":"Preprint","date_updated":"2023-09-19T10:43:21Z","type":"conference","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","article_processing_charge":"No","status":"public","month":"07","conference":{"end_date":"2018-07-27","name":"PODC: Principles of Distributed Computing","start_date":"2018-07-23","location":"Egham, United Kingdom"},"date_created":"2019-02-13T10:03:25Z","isi":1,"publisher":"ACM Press","abstract":[{"lang":"eng","text":"There has been significant progress in understanding the parallelism inherent to iterative sequential algorithms: for many classic algorithms, the depth of the dependence structure is now well understood, and scheduling techniques have been developed to exploit this shallow dependence structure for efficient parallel implementations. A related, applied research strand has studied methods by which certain iterative task-based algorithms can be efficiently parallelized via relaxed concurrent priority schedulers. These allow for high concurrency when inserting and removing tasks, at the cost of executing superfluous work due to the relaxed semantics of the scheduler. In this work, we take a step towards unifying these two research directions, by showing that there exists a family of relaxed priority schedulers that can efficiently and deterministically execute classic iterative algorithms such as greedy maximal independent set (MIS) and matching. Our primary result shows that, given a randomized scheduler with an expected relaxation factor of k in terms of the maximum allowed priority inversions on a task, and any graph on n vertices, the scheduler is able to execute greedy MIS with only an additive factor of \\poly(k) expected additional iterations compared to an exact (but not scalable) scheduler. This counter-intuitive result demonstrates that the overhead of relaxation when computing MIS is not dependent on the input size or structure of the input graph. Experimental results show that this overhead can be clearly offset by the gain in performance due to the highly scalable scheduler. In sum, we present an efficient method to deterministically parallelize iterative sequential algorithms, with provable runtime guarantees in terms of the number of executed tasks to completion."}],"publication_status":"published","_id":"5963","oa":1,"publication":"Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC '18","title":"Relaxed schedulers can efficiently parallelize iterative algorithms","year":"2018","quality_controlled":"1","author":[{"first_name":"Dan-Adrian","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Brown","first_name":"Trevor A","id":"3569F0A0-F248-11E8-B48F-1D18A9856A87","full_name":"Brown, Trevor A"},{"first_name":"Justin","last_name":"Kopinsky","full_name":"Kopinsky, Justin"},{"first_name":"Giorgi","last_name":"Nadiradze","full_name":"Nadiradze, Giorgi"}],"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1808.04155"}],"citation":{"mla":"Alistarh, Dan-Adrian, et al. “Relaxed Schedulers Can Efficiently Parallelize Iterative Algorithms.” <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, ACM Press, 2018, pp. 377–86, doi:<a href=\"https://doi.org/10.1145/3212734.3212756\">10.1145/3212734.3212756</a>.","apa":"Alistarh, D.-A., Brown, T. A., Kopinsky, J., &#38; Nadiradze, G. (2018). Relaxed schedulers can efficiently parallelize iterative algorithms. In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i> (pp. 377–386). Egham, United Kingdom: ACM Press. <a href=\"https://doi.org/10.1145/3212734.3212756\">https://doi.org/10.1145/3212734.3212756</a>","chicago":"Alistarh, Dan-Adrian, Trevor A Brown, Justin Kopinsky, and Giorgi Nadiradze. “Relaxed Schedulers Can Efficiently Parallelize Iterative Algorithms.” In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, 377–86. ACM Press, 2018. <a href=\"https://doi.org/10.1145/3212734.3212756\">https://doi.org/10.1145/3212734.3212756</a>.","short":"D.-A. Alistarh, T.A. Brown, J. Kopinsky, G. Nadiradze, in:, Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18, ACM Press, 2018, pp. 377–386.","ista":"Alistarh D-A, Brown TA, Kopinsky J, Nadiradze G. 2018. Relaxed schedulers can efficiently parallelize iterative algorithms. Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18. PODC: Principles of Distributed Computing, 377–386.","ieee":"D.-A. Alistarh, T. A. Brown, J. Kopinsky, and G. Nadiradze, “Relaxed schedulers can efficiently parallelize iterative algorithms,” in <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, Egham, United Kingdom, 2018, pp. 377–386.","ama":"Alistarh D-A, Brown TA, Kopinsky J, Nadiradze G. Relaxed schedulers can efficiently parallelize iterative algorithms. In: <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>. ACM Press; 2018:377-386. doi:<a href=\"https://doi.org/10.1145/3212734.3212756\">10.1145/3212734.3212756</a>"}},{"page":"411-413","external_id":{"isi":["000458186900052"]},"date_published":"2018-07-23T00:00:00Z","scopus_import":"1","publication_identifier":{"isbn":["9781450357951"]},"article_processing_charge":"No","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","day":"23","oa_version":"Submitted Version","type":"conference","date_updated":"2023-09-19T10:43:45Z","department":[{"_id":"DaAl"}],"doi":"10.1145/3212734.3212785","language":[{"iso":"eng"}],"publisher":"ACM Press","isi":1,"status":"public","conference":{"location":"Egham, United Kingdom","start_date":"2018-07-23","end_date":"2018-07-27","name":"PODC: Principles of Distributed Computing"},"month":"07","date_created":"2019-02-13T10:08:19Z","quality_controlled":"1","author":[{"full_name":"Aksenov, Vitaly","last_name":"Aksenov","first_name":"Vitaly"},{"first_name":"Dan-Adrian","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"first_name":"Petr","last_name":"Kuznetsov","full_name":"Kuznetsov, Petr"}],"citation":{"apa":"Aksenov, V., Alistarh, D.-A., &#38; Kuznetsov, P. (2018). Brief Announcement: Performance prediction for coarse-grained locking. In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i> (pp. 411–413). Egham, United Kingdom: ACM Press. <a href=\"https://doi.org/10.1145/3212734.3212785\">https://doi.org/10.1145/3212734.3212785</a>","mla":"Aksenov, Vitaly, et al. “Brief Announcement: Performance Prediction for Coarse-Grained Locking.” <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, ACM Press, 2018, pp. 411–13, doi:<a href=\"https://doi.org/10.1145/3212734.3212785\">10.1145/3212734.3212785</a>.","ama":"Aksenov V, Alistarh D-A, Kuznetsov P. Brief Announcement: Performance prediction for coarse-grained locking. In: <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>. ACM Press; 2018:411-413. doi:<a href=\"https://doi.org/10.1145/3212734.3212785\">10.1145/3212734.3212785</a>","ieee":"V. Aksenov, D.-A. Alistarh, and P. Kuznetsov, “Brief Announcement: Performance prediction for coarse-grained locking,” in <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, Egham, United Kingdom, 2018, pp. 411–413.","chicago":"Aksenov, Vitaly, Dan-Adrian Alistarh, and Petr Kuznetsov. “Brief Announcement: Performance Prediction for Coarse-Grained Locking.” In <i>Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18</i>, 411–13. ACM Press, 2018. <a href=\"https://doi.org/10.1145/3212734.3212785\">https://doi.org/10.1145/3212734.3212785</a>.","short":"V. Aksenov, D.-A. Alistarh, P. Kuznetsov, in:, Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18, ACM Press, 2018, pp. 411–413.","ista":"Aksenov V, Alistarh D-A, Kuznetsov P. 2018. Brief Announcement: Performance prediction for coarse-grained locking. Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC ’18. PODC: Principles of Distributed Computing, 411–413."},"main_file_link":[{"open_access":"1","url":"https://hal-univ-lyon3.archives-ouvertes.fr/INRIA/hal-01887733v1"}],"year":"2018","_id":"5964","oa":1,"publication":"Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing  - PODC '18","title":"Brief Announcement: Performance prediction for coarse-grained locking","abstract":[{"lang":"eng","text":"A standard design pattern found in many concurrent data structures, such as hash tables or ordered containers, is an alternation of parallelizable sections that incur no data conflicts and critical sections that must run sequentially and are protected with locks. A lock can be viewed as a queue that arbitrates the order in which the critical sections are executed, and a natural question is whether we can use stochastic analysis to predict the resulting throughput. As a preliminary evidence to the affirmative, we describe a simple model that can be used to predict the throughput of coarse-grained lock-based algorithms. We show that our model works well for CLH lock, and we expect it to work for other popular lock designs such as TTAS, MCS, etc."}],"publication_status":"published"}]