[{"month":"02","date_published":"2022-02-01T00:00:00Z","scopus_import":"1","publisher":"Schloss Dagstuhl - Leibniz-Zentrum für Informatik","language":[{"iso":"eng"}],"has_accepted_license":"1","department":[{"_id":"DaAl"}],"conference":{"start_date":"2021-12-13","location":"Strasbourg, France","end_date":"2021-12-15","name":"OPODIS"},"file":[{"file_id":"11345","creator":"dernst","content_type":"application/pdf","relation":"main_file","success":1,"date_updated":"2022-05-02T07:53:00Z","access_level":"open_access","date_created":"2022-05-02T07:53:00Z","checksum":"626551c14de5d4091573200ed0535752","file_name":"2022_LIPICs_Nikabadi.pdf","file_size":790396}],"date_created":"2022-04-17T22:01:47Z","day":"01","type":"conference","intvolume":"       217","status":"public","publication":"25th International Conference on Principles of Distributed Systems","file_date_updated":"2022-05-02T07:53:00Z","year":"2022","doi":"10.4230/LIPIcs.OPODIS.2021.15","ec_funded":1,"title":"Beyond distributed subgraph detection: Induced subgraphs, multicolored problems and graph parameters","tmp":{"image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"alternative_title":["LIPIcs"],"article_number":"15","ddc":["510"],"citation":{"chicago":"Nikabadi, Amir, and Janne Korhonen. “Beyond Distributed Subgraph Detection: Induced Subgraphs, Multicolored Problems and Graph Parameters.” In <i>25th International Conference on Principles of Distributed Systems</i>, edited by Quentin Bramas, Vincent Gramoli, and Alessia Milani, Vol. 217. Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022. <a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.15\">https://doi.org/10.4230/LIPIcs.OPODIS.2021.15</a>.","apa":"Nikabadi, A., &#38; Korhonen, J. (2022). Beyond distributed subgraph detection: Induced subgraphs, multicolored problems and graph parameters. In Q. Bramas, V. Gramoli, &#38; A. Milani (Eds.), <i>25th International Conference on Principles of Distributed Systems</i> (Vol. 217). Strasbourg, France: Schloss Dagstuhl - Leibniz-Zentrum für Informatik. <a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.15\">https://doi.org/10.4230/LIPIcs.OPODIS.2021.15</a>","ieee":"A. Nikabadi and J. Korhonen, “Beyond distributed subgraph detection: Induced subgraphs, multicolored problems and graph parameters,” in <i>25th International Conference on Principles of Distributed Systems</i>, Strasbourg, France, 2022, vol. 217.","short":"A. Nikabadi, J. Korhonen, in:, Q. Bramas, V. Gramoli, A. Milani (Eds.), 25th International Conference on Principles of Distributed Systems, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022.","ista":"Nikabadi A, Korhonen J. 2022. Beyond distributed subgraph detection: Induced subgraphs, multicolored problems and graph parameters. 25th International Conference on Principles of Distributed Systems. OPODIS, LIPIcs, vol. 217, 15.","mla":"Nikabadi, Amir, and Janne Korhonen. “Beyond Distributed Subgraph Detection: Induced Subgraphs, Multicolored Problems and Graph Parameters.” <i>25th International Conference on Principles of Distributed Systems</i>, edited by Quentin Bramas et al., vol. 217, 15, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022, doi:<a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.15\">10.4230/LIPIcs.OPODIS.2021.15</a>.","ama":"Nikabadi A, Korhonen J. Beyond distributed subgraph detection: Induced subgraphs, multicolored problems and graph parameters. In: Bramas Q, Gramoli V, Milani A, eds. <i>25th International Conference on Principles of Distributed Systems</i>. Vol 217. Schloss Dagstuhl - Leibniz-Zentrum für Informatik; 2022. doi:<a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.15\">10.4230/LIPIcs.OPODIS.2021.15</a>"},"publication_status":"published","abstract":[{"text":"Subgraph detection has recently been one of the most studied problems in the CONGEST model of distributed computing. In this work, we study the distributed complexity of problems closely related to subgraph detection, mainly focusing on induced subgraph detection. The main line of this work presents lower bounds and parameterized algorithms w.r.t structural parameters of the input graph:\r\n- On general graphs, we give unconditional lower bounds for induced detection of cycles and patterns of treewidth 2 in CONGEST. Moreover, by adapting reductions from centralized parameterized complexity, we prove lower bounds in CONGEST for detecting patterns with a 4-clique, and for induced path detection conditional on the hardness of triangle detection in the congested clique.\r\n- On graphs of bounded degeneracy, we show that induced paths can be detected fast in CONGEST using techniques from parameterized algorithms, while detecting cycles and patterns of treewidth 2 is hard.\r\n- On graphs of bounded vertex cover number, we show that induced subgraph detection is easy in CONGEST for any pattern graph. More specifically, we adapt a centralized parameterized algorithm for a more general maximum common induced subgraph detection problem to the distributed setting. In addition to these induced subgraph detection results, we study various related problems in the CONGEST and congested clique models, including for multicolored versions of subgraph-detection-like problems.","lang":"eng"}],"author":[{"full_name":"Nikabadi, Amir","last_name":"Nikabadi","first_name":"Amir"},{"first_name":"Janne","last_name":"Korhonen","full_name":"Korhonen, Janne","id":"C5402D42-15BC-11E9-A202-CA2BE6697425"}],"editor":[{"first_name":"Quentin","full_name":"Bramas, Quentin","last_name":"Bramas"},{"last_name":"Gramoli","full_name":"Gramoli, Vincent","first_name":"Vincent"},{"last_name":"Milani","full_name":"Milani, Alessia","first_name":"Alessia"}],"article_processing_charge":"No","date_updated":"2022-05-02T07:56:35Z","oa":1,"volume":217,"publication_identifier":{"isbn":["9783959772198"],"issn":["1868-8969"]},"_id":"11183","project":[{"grant_number":"805223","call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning"}],"oa_version":"Published Version","quality_controlled":"1","acknowledgement":"Amir Nikabadi: Supported by the LABEX MILYON (ANR-10-LABX-0070) of Université de Lyon, within the program “Investissements d’Avenir” (ANR-11-IDEX-0007) operated by the French National Research Agency (ANR). Janne H. Korhonen: Supported by the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).\r\nWe thank François Le Gall and Masayuki Miyamoto for sharing their work on lower bounds for induced subgraph detection [36].","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87"},{"ddc":["510"],"tmp":{"image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"alternative_title":["LIPIcs"],"article_number":"14","external_id":{"arxiv":["2102.08808"]},"title":"Fast graphical population protocols","year":"2022","doi":"10.4230/LIPIcs.OPODIS.2021.14","ec_funded":1,"publication_identifier":{"isbn":["9783959772198"],"issn":["1868-8969"]},"_id":"11184","quality_controlled":"1","project":[{"grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning","_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020"},{"grant_number":"840605","call_identifier":"H2020","_id":"26A5D39A-B435-11E9-9278-68D0E5697425","name":"Coordination in constrained and natural distributed systems"}],"oa_version":"Published Version","acknowledgement":"Dan Alistarh: This project has received funding from the European Research Council (ERC)\r\nunder the European Union’s Horizon 2020 research and innovation programme (grant agreement No.805223 ScaleML).\r\nJoel Rybicki: This project has received from the European Union’s Horizon 2020 research and\r\ninnovation programme under the Marie Skłodowska-Curie grant agreement No. 840605.\r\nAcknowledgements We grateful to Giorgi Nadiradze for pointing out a generalisation of the phase clock construction to non-regular graphs. We also thank anonymous reviewers for their useful comments on earlier versions of this manuscript.","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","editor":[{"first_name":"Quentin","full_name":"Bramas, Quentin","last_name":"Bramas"},{"last_name":"Gramoli","full_name":"Gramoli, Vincent","first_name":"Vincent"},{"first_name":"Alessia","full_name":"Milani, Alessia","last_name":"Milani"}],"arxiv":1,"article_processing_charge":"No","volume":217,"oa":1,"date_updated":"2022-05-02T08:09:39Z","abstract":[{"lang":"eng","text":"Let G be a graph on n nodes. In the stochastic population protocol model, a collection of n indistinguishable, resource-limited nodes collectively solve tasks via pairwise interactions. In each interaction, two randomly chosen neighbors first read each other’s states, and then update their local states. A rich line of research has established tight upper and lower bounds on the complexity of fundamental tasks, such as majority and leader election, in this model, when G is a clique. Specifically, in the clique, these tasks can be solved fast, i.e., in n polylog n pairwise interactions, with high probability, using at most polylog n states per node.\r\nIn this work, we consider the more general setting where G is an arbitrary regular graph, and present a technique for simulating protocols designed for fully-connected networks in any connected regular graph. Our main result is a simulation that is efficient on many interesting graph families: roughly, the simulation overhead is polylogarithmic in the number of nodes, and quadratic in the conductance of the graph. As a sample application, we show that, in any regular graph with conductance φ, both leader election and exact majority can be solved in φ^{-2} ⋅ n polylog n pairwise interactions, with high probability, using at most φ^{-2} ⋅ polylog n states per node. This shows that there are fast and space-efficient population protocols for leader election and exact majority on graphs with good expansion properties. We believe our results will prove generally useful, as they allow efficient technology transfer between the well-mixed (clique) case, and the under-explored spatial setting."}],"author":[{"first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Gelashvili","full_name":"Gelashvili, Rati","first_name":"Rati"},{"id":"334EFD2E-F248-11E8-B48F-1D18A9856A87","full_name":"Rybicki, Joel","last_name":"Rybicki","orcid":"0000-0002-6432-6646","first_name":"Joel"}],"citation":{"ama":"Alistarh D-A, Gelashvili R, Rybicki J. Fast graphical population protocols. In: Bramas Q, Gramoli V, Milani A, eds. <i>25th International Conference on Principles of Distributed Systems</i>. Vol 217. Schloss Dagstuhl - Leibniz-Zentrum für Informatik; 2022. doi:<a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.14\">10.4230/LIPIcs.OPODIS.2021.14</a>","mla":"Alistarh, Dan-Adrian, et al. “Fast Graphical Population Protocols.” <i>25th International Conference on Principles of Distributed Systems</i>, edited by Quentin Bramas et al., vol. 217, 14, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022, doi:<a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.14\">10.4230/LIPIcs.OPODIS.2021.14</a>.","short":"D.-A. Alistarh, R. Gelashvili, J. Rybicki, in:, Q. Bramas, V. Gramoli, A. Milani (Eds.), 25th International Conference on Principles of Distributed Systems, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022.","ista":"Alistarh D-A, Gelashvili R, Rybicki J. 2022. Fast graphical population protocols. 25th International Conference on Principles of Distributed Systems. OPODIS, LIPIcs, vol. 217, 14.","ieee":"D.-A. Alistarh, R. Gelashvili, and J. Rybicki, “Fast graphical population protocols,” in <i>25th International Conference on Principles of Distributed Systems</i>, Strasbourg, France, 2022, vol. 217.","apa":"Alistarh, D.-A., Gelashvili, R., &#38; Rybicki, J. (2022). Fast graphical population protocols. In Q. Bramas, V. Gramoli, &#38; A. Milani (Eds.), <i>25th International Conference on Principles of Distributed Systems</i> (Vol. 217). Strasbourg, France: Schloss Dagstuhl - Leibniz-Zentrum für Informatik. <a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.14\">https://doi.org/10.4230/LIPIcs.OPODIS.2021.14</a>","chicago":"Alistarh, Dan-Adrian, Rati Gelashvili, and Joel Rybicki. “Fast Graphical Population Protocols.” In <i>25th International Conference on Principles of Distributed Systems</i>, edited by Quentin Bramas, Vincent Gramoli, and Alessia Milani, Vol. 217. Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022. <a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.14\">https://doi.org/10.4230/LIPIcs.OPODIS.2021.14</a>."},"publication_status":"published","conference":{"end_date":"2021-12-15","name":"OPODIS","location":"Strasbourg, France","start_date":"2021-12-13"},"file":[{"success":1,"relation":"main_file","content_type":"application/pdf","file_id":"11346","creator":"dernst","file_size":959406,"file_name":"2022_LIPICs_Alistarh.pdf","checksum":"2c7c982174c6f98c4ca6e92539d15086","date_created":"2022-05-02T08:06:33Z","access_level":"open_access","date_updated":"2022-05-02T08:06:33Z"}],"date_created":"2022-04-17T22:01:47Z","has_accepted_license":"1","department":[{"_id":"DaAl"}],"scopus_import":"1","publisher":"Schloss Dagstuhl - Leibniz-Zentrum für Informatik","language":[{"iso":"eng"}],"month":"02","date_published":"2022-02-01T00:00:00Z","publication":"25th International Conference on Principles of Distributed Systems","file_date_updated":"2022-05-02T08:06:33Z","intvolume":"       217","status":"public","day":"01","type":"conference"},{"publication_status":"published","citation":{"short":"A. Shevchenko, V. Kungurtsev, M. Mondelli, Journal of Machine Learning Research 23 (2022) 1–55.","ista":"Shevchenko A, Kungurtsev V, Mondelli M. 2022. Mean-field analysis of piecewise linear solutions for wide ReLU networks. Journal of Machine Learning Research. 23(130), 1–55.","ama":"Shevchenko A, Kungurtsev V, Mondelli M. Mean-field analysis of piecewise linear solutions for wide ReLU networks. <i>Journal of Machine Learning Research</i>. 2022;23(130):1-55.","mla":"Shevchenko, Aleksandr, et al. “Mean-Field Analysis of Piecewise Linear Solutions for Wide ReLU Networks.” <i>Journal of Machine Learning Research</i>, vol. 23, no. 130, Journal of Machine Learning Research, 2022, pp. 1–55.","chicago":"Shevchenko, Aleksandr, Vyacheslav Kungurtsev, and Marco Mondelli. “Mean-Field Analysis of Piecewise Linear Solutions for Wide ReLU Networks.” <i>Journal of Machine Learning Research</i>. Journal of Machine Learning Research, 2022.","ieee":"A. Shevchenko, V. Kungurtsev, and M. Mondelli, “Mean-field analysis of piecewise linear solutions for wide ReLU networks,” <i>Journal of Machine Learning Research</i>, vol. 23, no. 130. Journal of Machine Learning Research, pp. 1–55, 2022.","apa":"Shevchenko, A., Kungurtsev, V., &#38; Mondelli, M. (2022). Mean-field analysis of piecewise linear solutions for wide ReLU networks. <i>Journal of Machine Learning Research</i>. Journal of Machine Learning Research."},"author":[{"first_name":"Aleksandr","last_name":"Shevchenko","full_name":"Shevchenko, Aleksandr","id":"F2B06EC2-C99E-11E9-89F0-752EE6697425"},{"first_name":"Vyacheslav","full_name":"Kungurtsev, Vyacheslav","last_name":"Kungurtsev"},{"id":"27EB676C-8706-11E9-9510-7717E6697425","full_name":"Mondelli, Marco","last_name":"Mondelli","orcid":"0000-0002-3242-7020","first_name":"Marco"}],"abstract":[{"text":"Understanding the properties of neural networks trained via stochastic gradient descent (SGD) is at the heart of the theory of deep learning. In this work, we take a mean-field view, and consider a two-layer ReLU network trained via noisy-SGD for a univariate regularized regression problem. Our main result is that SGD with vanishingly small noise injected in the gradients is biased towards a simple solution: at convergence, the ReLU network implements a piecewise linear map of the inputs, and the number of “knot” points -- i.e., points where the tangent of the ReLU network estimator changes -- between two consecutive training inputs is at most three. In particular, as the number of neurons of the network grows, the SGD dynamics is captured by the solution of a gradient flow and, at convergence, the distribution of the weights approaches the unique minimizer of a related free energy, which has a Gibbs form. Our key technical contribution consists in the analysis of the estimator resulting from this minimizer: we show that its second derivative vanishes everywhere, except at some specific locations which represent the “knot” points. We also provide empirical evidence that knots at locations distinct from the data points might occur, as predicted by our theory.","lang":"eng"}],"volume":23,"oa":1,"date_updated":"2024-09-10T13:03:17Z","article_processing_charge":"No","arxiv":1,"user_id":"8b945eb4-e2f2-11eb-945a-df72226e66a9","acknowledgement":"We would like to thank Mert Pilanci for several exploratory discussions in the early stage\r\nof the project, Jan Maas for clarifications about Jordan et al. (1998), and Max Zimmer for\r\nsuggestive numerical experiments. A. Shevchenko and M. Mondelli are partially supported\r\nby the 2019 Lopez-Loreta Prize. V. Kungurtsev acknowledges support to the OP VVV\r\nproject CZ.02.1.01/0.0/0.0/16 019/0000765 Research Center for Informatics.\r\n","project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"}],"quality_controlled":"1","oa_version":"Published Version","_id":"11420","publication_identifier":{"issn":["1532-4435"],"eissn":["1533-7928"]},"year":"2022","external_id":{"arxiv":["2111.02278"]},"title":"Mean-field analysis of piecewise linear solutions for wide ReLU networks","tmp":{"image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"ddc":["000"],"related_material":{"link":[{"relation":"other","url":"https://www.jmlr.org/papers/v23/21-1365.html"}]},"type":"journal_article","day":"01","status":"public","intvolume":"        23","file_date_updated":"2022-05-30T08:22:55Z","page":"1-55","issue":"130","publication":"Journal of Machine Learning Research","article_type":"original","date_published":"2022-04-01T00:00:00Z","month":"04","language":[{"iso":"eng"}],"publisher":"Journal of Machine Learning Research","scopus_import":"1","department":[{"_id":"MaMo"},{"_id":"DaAl"}],"has_accepted_license":"1","date_created":"2022-05-29T22:01:54Z","file":[{"success":1,"content_type":"application/pdf","relation":"main_file","creator":"cchlebak","file_id":"11422","file_name":"21-1365.pdf","file_size":1521701,"date_created":"2022-05-30T08:22:55Z","checksum":"d4ff5d1affb34848b5c5e4002483fc62","date_updated":"2022-05-30T08:22:55Z","access_level":"open_access"}]},{"intvolume":"     13298","status":"public","day":"25","series_title":"LNCS","type":"conference","publication":"International Colloquium on Structural Information and Communication Complexity","page":"1-20","scopus_import":"1","publisher":"Springer Nature","language":[{"iso":"eng"}],"month":"06","date_published":"2022-06-25T00:00:00Z","conference":{"end_date":"2022-06-29","name":"SIROCCO: Structural Information and Communication Complexity","location":"Paderborn, Germany","start_date":"2022-06-27"},"date_created":"2022-07-31T22:01:49Z","department":[{"_id":"DaAl"}],"abstract":[{"lang":"eng","text":"In this work we introduce the graph-theoretic notion of mendability: for each locally checkable graph problem we can define its mending radius, which captures the idea of how far one needs to modify a partial solution in order to “patch a hole.” We explore how mendability is connected to the existence of efficient algorithms, especially in distributed, parallel, and fault-tolerant settings. It is easy to see that O(1)-mendable problems are also solvable in O(log∗n) rounds in the LOCAL model of distributed computing. One of the surprises is that in paths and cycles, a converse also holds in the following sense: if a problem Π can be solved in O(log∗n), there is always a restriction Π′⊆Π that is still efficiently solvable but that is also O(1)-mendable. We also explore the structure of the landscape of mendability. For example, we show that in trees, the mending radius of any locally checkable problem is O(1), Θ(logn), or Θ(n), while in general graphs the structure is much more diverse."}],"author":[{"last_name":"Balliu","full_name":"Balliu, Alkida","first_name":"Alkida"},{"first_name":"Juho","last_name":"Hirvonen","full_name":"Hirvonen, Juho"},{"last_name":"Melnyk","full_name":"Melnyk, Darya","first_name":"Darya"},{"first_name":"Dennis","last_name":"Olivetti","full_name":"Olivetti, Dennis"},{"full_name":"Rybicki, Joel","last_name":"Rybicki","orcid":"0000-0002-6432-6646","first_name":"Joel","id":"334EFD2E-F248-11E8-B48F-1D18A9856A87"},{"full_name":"Suomela, Jukka","last_name":"Suomela","first_name":"Jukka"}],"citation":{"short":"A. Balliu, J. Hirvonen, D. Melnyk, D. Olivetti, J. Rybicki, J. Suomela, in:, M. Parter (Ed.), International Colloquium on Structural Information and Communication Complexity, Springer Nature, 2022, pp. 1–20.","ista":"Balliu A, Hirvonen J, Melnyk D, Olivetti D, Rybicki J, Suomela J. 2022. Local mending. International Colloquium on Structural Information and Communication Complexity. SIROCCO: Structural Information and Communication ComplexityLNCS vol. 13298, 1–20.","mla":"Balliu, Alkida, et al. “Local Mending.” <i>International Colloquium on Structural Information and Communication Complexity</i>, edited by Merav Parter, vol. 13298, Springer Nature, 2022, pp. 1–20, doi:<a href=\"https://doi.org/10.1007/978-3-031-09993-9_1\">10.1007/978-3-031-09993-9_1</a>.","ama":"Balliu A, Hirvonen J, Melnyk D, Olivetti D, Rybicki J, Suomela J. Local mending. In: Parter M, ed. <i>International Colloquium on Structural Information and Communication Complexity</i>. Vol 13298. LNCS. Springer Nature; 2022:1-20. doi:<a href=\"https://doi.org/10.1007/978-3-031-09993-9_1\">10.1007/978-3-031-09993-9_1</a>","chicago":"Balliu, Alkida, Juho Hirvonen, Darya Melnyk, Dennis Olivetti, Joel Rybicki, and Jukka Suomela. “Local Mending.” In <i>International Colloquium on Structural Information and Communication Complexity</i>, edited by Merav Parter, 13298:1–20. LNCS. Springer Nature, 2022. <a href=\"https://doi.org/10.1007/978-3-031-09993-9_1\">https://doi.org/10.1007/978-3-031-09993-9_1</a>.","apa":"Balliu, A., Hirvonen, J., Melnyk, D., Olivetti, D., Rybicki, J., &#38; Suomela, J. (2022). Local mending. In M. Parter (Ed.), <i>International Colloquium on Structural Information and Communication Complexity</i> (Vol. 13298, pp. 1–20). Paderborn, Germany: Springer Nature. <a href=\"https://doi.org/10.1007/978-3-031-09993-9_1\">https://doi.org/10.1007/978-3-031-09993-9_1</a>","ieee":"A. Balliu, J. Hirvonen, D. Melnyk, D. Olivetti, J. Rybicki, and J. Suomela, “Local mending,” in <i>International Colloquium on Structural Information and Communication Complexity</i>, Paderborn, Germany, 2022, vol. 13298, pp. 1–20."},"publication_status":"published","publication_identifier":{"issn":["0302-9743"],"isbn":["9783031099922"],"eissn":["1611-3349"]},"_id":"11707","project":[{"name":"Coordination in constrained and natural distributed systems","_id":"26A5D39A-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","grant_number":"840605"}],"quality_controlled":"1","oa_version":"Preprint","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","acknowledgement":"This project has received funding from the European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie grant agreement No 840605. This work was supported in part by the Academy of Finland, Grants 314888 and 333837. The authors would also like to thank David Harris, Neven Villani, and the anonymous reviewers for their very helpful comments and feedback on previous versions of this work.","arxiv":1,"editor":[{"first_name":"Merav","full_name":"Parter, Merav","last_name":"Parter"}],"article_processing_charge":"No","date_updated":"2023-08-03T12:16:29Z","volume":13298,"oa":1,"external_id":{"isi":["000876977400001"],"arxiv":["2102.08703"]},"title":"Local mending","year":"2022","doi":"10.1007/978-3-031-09993-9_1","ec_funded":1,"isi":1,"main_file_link":[{"url":"https://arxiv.org/abs/2102.08703","open_access":"1"}]},{"status":"public","author":[{"last_name":"Postnikova","full_name":"Postnikova, Anastasiia","first_name":"Anastasiia"},{"first_name":"Nikita","full_name":"Koval, Nikita","last_name":"Koval","id":"2F4DB10C-F248-11E8-B48F-1D18A9856A87"},{"id":"3279A00C-F248-11E8-B48F-1D18A9856A87","full_name":"Nadiradze, Giorgi","last_name":"Nadiradze","first_name":"Giorgi"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian"}],"abstract":[{"lang":"eng","text":"The source code for replicating experiments presented in the paper.\r\n\r\nThe implementation of the designed priority schedulers can be found in Galois-2.2.1/include/Galois/WorkList/:\r\nStealingMultiQueue.h is the StealingMultiQueue.\r\nMQOptimized/ contains MQ Optimized variants.\r\n\r\nWe provide images that contain all the dependencies and datasets. Images can be pulled from npostnikova/mq-based-schedulers repository, or downloaded from Zenodo. See readme for more detail."}],"type":"research_data_reference","citation":{"ama":"Postnikova A, Koval N, Nadiradze G, Alistarh D-A. Multi-queues can be state-of-the-art priority schedulers. 2022. doi:<a href=\"https://doi.org/10.5281/ZENODO.5733408\">10.5281/ZENODO.5733408</a>","mla":"Postnikova, Anastasiia, et al. <i>Multi-Queues Can Be State-of-the-Art Priority Schedulers</i>. Zenodo, 2022, doi:<a href=\"https://doi.org/10.5281/ZENODO.5733408\">10.5281/ZENODO.5733408</a>.","ista":"Postnikova A, Koval N, Nadiradze G, Alistarh D-A. 2022. Multi-queues can be state-of-the-art priority schedulers, Zenodo, <a href=\"https://doi.org/10.5281/ZENODO.5733408\">10.5281/ZENODO.5733408</a>.","short":"A. Postnikova, N. Koval, G. Nadiradze, D.-A. Alistarh, (2022).","ieee":"A. Postnikova, N. Koval, G. Nadiradze, and D.-A. Alistarh, “Multi-queues can be state-of-the-art priority schedulers.” Zenodo, 2022.","apa":"Postnikova, A., Koval, N., Nadiradze, G., &#38; Alistarh, D.-A. (2022). Multi-queues can be state-of-the-art priority schedulers. Zenodo. <a href=\"https://doi.org/10.5281/ZENODO.5733408\">https://doi.org/10.5281/ZENODO.5733408</a>","chicago":"Postnikova, Anastasiia, Nikita Koval, Giorgi Nadiradze, and Dan-Adrian Alistarh. “Multi-Queues Can Be State-of-the-Art Priority Schedulers.” Zenodo, 2022. <a href=\"https://doi.org/10.5281/ZENODO.5733408\">https://doi.org/10.5281/ZENODO.5733408</a>."},"day":"03","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Published Version","_id":"13076","oa":1,"date_updated":"2023-08-03T06:48:34Z","article_processing_charge":"No","publisher":"Zenodo","title":"Multi-queues can be state-of-the-art priority schedulers","date_published":"2022-01-03T00:00:00Z","doi":"10.5281/ZENODO.5733408","year":"2022","month":"01","date_created":"2023-05-23T17:05:40Z","ddc":["510"],"related_material":{"record":[{"status":"public","relation":"used_in_publication","id":"11180"}],"link":[{"url":"https://github.com/npostnikova/mq-based-schedulers/tree/v1.1","relation":"software"}]},"department":[{"_id":"DaAl"}],"main_file_link":[{"open_access":"1","url":"https://doi.org/10.5281/zenodo.5813846"}]},{"file":[{"relation":"main_file","content_type":"application/pdf","creator":"cchlebak","file_id":"11854","success":1,"access_level":"open_access","date_updated":"2022-08-16T08:05:15Z","file_size":1593474,"file_name":"2022_PODC_Alistarh.pdf","checksum":"4c6b29172b8e355b4fbc364a2e0827b2","date_created":"2022-08-16T08:05:15Z"}],"date_created":"2022-08-14T22:01:46Z","conference":{"start_date":"2022-07-25","location":"Salerno, Italy","end_date":"2022-07-29","name":"PODC: Symposium on Principles of Distributed Computing"},"department":[{"_id":"DaAl"}],"has_accepted_license":"1","language":[{"iso":"eng"}],"publisher":"Association for Computing Machinery","scopus_import":"1","date_published":"2022-07-21T00:00:00Z","month":"07","page":"246-256","file_date_updated":"2022-08-16T08:05:15Z","publication":"Proceedings of the Annual ACM Symposium on Principles of Distributed Computing","status":"public","type":"conference","day":"21","ddc":["000"],"tmp":{"image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"title":"Near-optimal leader election in population protocols on graphs","external_id":{"arxiv":["2205.12597"]},"ec_funded":1,"doi":"10.1145/3519270.3538435","year":"2022","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","acknowledgement":"We thank the anonymous reviewers for their helpful comments. We gratefully acknowledge funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).","oa_version":"Published Version","project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020","grant_number":"805223"}],"quality_controlled":"1","_id":"11844","publication_identifier":{"isbn":["9781450392624"]},"date_updated":"2023-06-14T12:06:01Z","oa":1,"article_processing_charge":"Yes (via OA deal)","arxiv":1,"author":[{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","first_name":"Dan-Adrian"},{"first_name":"Joel","full_name":"Rybicki, Joel","last_name":"Rybicki","orcid":"0000-0002-6432-6646","id":"334EFD2E-F248-11E8-B48F-1D18A9856A87"},{"full_name":"Voitovych, Sasha","last_name":"Voitovych","first_name":"Sasha"}],"abstract":[{"lang":"eng","text":"In the stochastic population protocol model, we are given a connected graph with n nodes, and in every time step, a scheduler samples an edge of the graph uniformly at random and the nodes connected by this edge interact. A fundamental task in this model is stable leader election, in which all nodes start in an identical state and the aim is to reach a configuration in which (1) exactly one node is elected as leader and (2) this node remains as the unique leader no matter what sequence of interactions follows. On cliques, the complexity of this problem has recently been settled: time-optimal protocols stabilize in Θ(n log n) expected steps using Θ(log log n) states, whereas protocols that use O(1) states require Θ(n2) expected steps.\r\n\r\nIn this work, we investigate the complexity of stable leader election on general graphs. We provide the first non-trivial time lower bounds for leader election on general graphs, showing that, when moving beyond cliques, the complexity landscape of leader election becomes very diverse: the time required to elect a leader can range from O(1) to Θ(n3) expected steps. On the upper bound side, we first observe that there exists a protocol that is time-optimal on many graph families, but uses polynomially-many states. In contrast, we give a near-time-optimal protocol that uses only O(log2n) states that is at most a factor log n slower. Finally, we show that the constant-state protocol of Beauquier et al. [OPODIS 2013] is at most a factor n log n slower than the fast polynomial-state protocol. Moreover, among constant-state protocols, this protocol has near-optimal average case complexity on dense random graphs."}],"publication_status":"published","citation":{"ista":"Alistarh D-A, Rybicki J, Voitovych S. 2022. Near-optimal leader election in population protocols on graphs. Proceedings of the Annual ACM Symposium on Principles of Distributed Computing. PODC: Symposium on Principles of Distributed Computing, 246–256.","short":"D.-A. Alistarh, J. Rybicki, S. Voitovych, in:, Proceedings of the Annual ACM Symposium on Principles of Distributed Computing, Association for Computing Machinery, 2022, pp. 246–256.","ama":"Alistarh D-A, Rybicki J, Voitovych S. Near-optimal leader election in population protocols on graphs. In: <i>Proceedings of the Annual ACM Symposium on Principles of Distributed Computing</i>. Association for Computing Machinery; 2022:246-256. doi:<a href=\"https://doi.org/10.1145/3519270.3538435\">10.1145/3519270.3538435</a>","mla":"Alistarh, Dan-Adrian, et al. “Near-Optimal Leader Election in Population Protocols on Graphs.” <i>Proceedings of the Annual ACM Symposium on Principles of Distributed Computing</i>, Association for Computing Machinery, 2022, pp. 246–56, doi:<a href=\"https://doi.org/10.1145/3519270.3538435\">10.1145/3519270.3538435</a>.","chicago":"Alistarh, Dan-Adrian, Joel Rybicki, and Sasha Voitovych. “Near-Optimal Leader Election in Population Protocols on Graphs.” In <i>Proceedings of the Annual ACM Symposium on Principles of Distributed Computing</i>, 246–56. Association for Computing Machinery, 2022. <a href=\"https://doi.org/10.1145/3519270.3538435\">https://doi.org/10.1145/3519270.3538435</a>.","apa":"Alistarh, D.-A., Rybicki, J., &#38; Voitovych, S. (2022). Near-optimal leader election in population protocols on graphs. In <i>Proceedings of the Annual ACM Symposium on Principles of Distributed Computing</i> (pp. 246–256). Salerno, Italy: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3519270.3538435\">https://doi.org/10.1145/3519270.3538435</a>","ieee":"D.-A. Alistarh, J. Rybicki, and S. Voitovych, “Near-optimal leader election in population protocols on graphs,” in <i>Proceedings of the Annual ACM Symposium on Principles of Distributed Computing</i>, Salerno, Italy, 2022, pp. 246–256."}},{"title":"Brief announcement: Temporal locality in online algorithms","ec_funded":1,"doi":"10.4230/LIPIcs.DISC.2022.52","year":"2022","ddc":["000"],"article_number":"52","tmp":{"image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"author":[{"last_name":"Pacut","full_name":"Pacut, Maciej","first_name":"Maciej"},{"first_name":"Mahmoud","last_name":"Parham","full_name":"Parham, Mahmoud"},{"id":"334EFD2E-F248-11E8-B48F-1D18A9856A87","first_name":"Joel","full_name":"Rybicki, Joel","last_name":"Rybicki","orcid":"0000-0002-6432-6646"},{"full_name":"Schmid, Stefan","last_name":"Schmid","first_name":"Stefan"},{"last_name":"Suomela","full_name":"Suomela, Jukka","first_name":"Jukka"},{"last_name":"Tereshchenko","full_name":"Tereshchenko, Aleksandr","first_name":"Aleksandr"}],"abstract":[{"text":"Online algorithms make decisions based on past inputs, with the goal of being competitive against an algorithm that sees also future inputs. In this work, we introduce time-local online algorithms; these are online algorithms in which the output at any given time is a function of only T latest inputs. Our main observation is that time-local online algorithms are closely connected to local distributed graph algorithms: distributed algorithms make decisions based on the local information in the spatial dimension, while time-local online algorithms make decisions based on the local information in the temporal dimension. We formalize this connection, and show how we can directly use the tools developed to study distributed approximability of graph optimization problems to prove upper and lower bounds on the competitive ratio achieved with time-local online algorithms. Moreover, we show how to use computational techniques to synthesize optimal time-local algorithms.","lang":"eng"}],"citation":{"mla":"Pacut, Maciej, et al. “Brief Announcement: Temporal Locality in Online Algorithms.” <i>36th International Symposium on Distributed Computing</i>, vol. 246, 52, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022, doi:<a href=\"https://doi.org/10.4230/LIPIcs.DISC.2022.52\">10.4230/LIPIcs.DISC.2022.52</a>.","ama":"Pacut M, Parham M, Rybicki J, Schmid S, Suomela J, Tereshchenko A. Brief announcement: Temporal locality in online algorithms. In: <i>36th International Symposium on Distributed Computing</i>. Vol 246. Schloss Dagstuhl - Leibniz-Zentrum für Informatik; 2022. doi:<a href=\"https://doi.org/10.4230/LIPIcs.DISC.2022.52\">10.4230/LIPIcs.DISC.2022.52</a>","short":"M. Pacut, M. Parham, J. Rybicki, S. Schmid, J. Suomela, A. Tereshchenko, in:, 36th International Symposium on Distributed Computing, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022.","ista":"Pacut M, Parham M, Rybicki J, Schmid S, Suomela J, Tereshchenko A. 2022. Brief announcement: Temporal locality in online algorithms. 36th International Symposium on Distributed Computing. DISC: Symposium on Distributed Computing vol. 246, 52.","ieee":"M. Pacut, M. Parham, J. Rybicki, S. Schmid, J. Suomela, and A. Tereshchenko, “Brief announcement: Temporal locality in online algorithms,” in <i>36th International Symposium on Distributed Computing</i>, Augusta, GA, United States, 2022, vol. 246.","apa":"Pacut, M., Parham, M., Rybicki, J., Schmid, S., Suomela, J., &#38; Tereshchenko, A. (2022). Brief announcement: Temporal locality in online algorithms. In <i>36th International Symposium on Distributed Computing</i> (Vol. 246). Augusta, GA, United States: Schloss Dagstuhl - Leibniz-Zentrum für Informatik. <a href=\"https://doi.org/10.4230/LIPIcs.DISC.2022.52\">https://doi.org/10.4230/LIPIcs.DISC.2022.52</a>","chicago":"Pacut, Maciej, Mahmoud Parham, Joel Rybicki, Stefan Schmid, Jukka Suomela, and Aleksandr Tereshchenko. “Brief Announcement: Temporal Locality in Online Algorithms.” In <i>36th International Symposium on Distributed Computing</i>, Vol. 246. Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022. <a href=\"https://doi.org/10.4230/LIPIcs.DISC.2022.52\">https://doi.org/10.4230/LIPIcs.DISC.2022.52</a>."},"publication_status":"published","project":[{"grant_number":"840605","call_identifier":"H2020","name":"Coordination in constrained and natural distributed systems","_id":"26A5D39A-B435-11E9-9278-68D0E5697425"}],"quality_controlled":"1","oa_version":"Published Version","acknowledgement":"This research has received funding from the German Research Foundation (DFG), grant\r\n470029389 (FlexNets), 2021-2024, and the Marie Skłodowska-Curie grant agreement No. 840605.","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publication_identifier":{"eissn":["1868-8969"],"eisbn":["9783959772556"]},"_id":"12182","article_processing_charge":"No","oa":1,"date_updated":"2023-01-27T06:59:29Z","volume":246,"language":[{"iso":"eng"}],"scopus_import":"1","publisher":"Schloss Dagstuhl - Leibniz-Zentrum für Informatik","date_published":"2022-10-17T00:00:00Z","month":"10","file":[{"success":1,"file_id":"12409","creator":"dernst","content_type":"application/pdf","relation":"main_file","date_created":"2023-01-27T06:58:02Z","checksum":"11bbb56f68a00f2cf6bcce6cc7f5c5f9","file_name":"2022_LIPICs_Pacut.pdf","file_size":524804,"date_updated":"2023-01-27T06:58:02Z","access_level":"open_access"}],"date_created":"2023-01-13T11:06:28Z","conference":{"start_date":"2022-10-25","end_date":"2022-10-27","location":"Augusta, GA, United States","name":"DISC: Symposium on Distributed Computing"},"department":[{"_id":"DaAl"}],"has_accepted_license":"1","status":"public","intvolume":"       246","type":"conference","day":"17","file_date_updated":"2023-01-27T06:58:02Z","publication":"36th International Symposium on Distributed Computing"},{"publication":"2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition","page":"12256-12266","day":"27","type":"conference","status":"public","department":[{"_id":"DaAl"},{"_id":"ChLa"}],"conference":{"location":"New Orleans, LA, United States","name":"CVPR: Computer Vision and Pattern Recognition","end_date":"2022-06-24","start_date":"2022-06-18"},"date_created":"2023-01-16T10:06:00Z","month":"09","date_published":"2022-09-27T00:00:00Z","scopus_import":"1","publisher":"Institute of Electrical and Electronics Engineers","language":[{"iso":"eng"}],"arxiv":1,"article_processing_charge":"No","oa":1,"date_updated":"2023-08-04T10:33:28Z","publication_identifier":{"eissn":["2575-7075"]},"_id":"12299","quality_controlled":"1","project":[{"_id":"9B9290DE-BA93-11EA-9121-9846C619BF3A","name":"Vienna Graduate School on Computational Optimization","grant_number":" W1260-N35"},{"name":"Elastic Coordination for Scalable Machine Learning","_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","grant_number":"805223"}],"oa_version":"Preprint","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","acknowledgement":"he authors would like to sincerely thank Christoph Lampert and Nir Shavit for fruitful discussions during the development of this work, and Eldar Kurtic for experimental support. EI was supported in part by the FWF DK VGSCO, grant agreement number W1260-N35, while AP and DA acknowledge generous support by the ERC, via Starting Grant 805223 ScaleML.","citation":{"chicago":"Iofinova, Eugenia B, Elena-Alexandra Peste, Mark Kurtz, and Dan-Adrian Alistarh. “How Well Do Sparse ImageNet Models Transfer?” In <i>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, 12256–66. Institute of Electrical and Electronics Engineers, 2022. <a href=\"https://doi.org/10.1109/cvpr52688.2022.01195\">https://doi.org/10.1109/cvpr52688.2022.01195</a>.","apa":"Iofinova, E. B., Peste, E.-A., Kurtz, M., &#38; Alistarh, D.-A. (2022). How well do sparse ImageNet models transfer? In <i>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i> (pp. 12256–12266). New Orleans, LA, United States: Institute of Electrical and Electronics Engineers. <a href=\"https://doi.org/10.1109/cvpr52688.2022.01195\">https://doi.org/10.1109/cvpr52688.2022.01195</a>","ieee":"E. B. Iofinova, E.-A. Peste, M. Kurtz, and D.-A. Alistarh, “How well do sparse ImageNet models transfer?,” in <i>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, New Orleans, LA, United States, 2022, pp. 12256–12266.","ista":"Iofinova EB, Peste E-A, Kurtz M, Alistarh D-A. 2022. How well do sparse ImageNet models transfer? 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition. CVPR: Computer Vision and Pattern Recognition, 12256–12266.","short":"E.B. Iofinova, E.-A. Peste, M. Kurtz, D.-A. Alistarh, in:, 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition, Institute of Electrical and Electronics Engineers, 2022, pp. 12256–12266.","ama":"Iofinova EB, Peste E-A, Kurtz M, Alistarh D-A. How well do sparse ImageNet models transfer? In: <i>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>. Institute of Electrical and Electronics Engineers; 2022:12256-12266. doi:<a href=\"https://doi.org/10.1109/cvpr52688.2022.01195\">10.1109/cvpr52688.2022.01195</a>","mla":"Iofinova, Eugenia B., et al. “How Well Do Sparse ImageNet Models Transfer?” <i>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, Institute of Electrical and Electronics Engineers, 2022, pp. 12256–66, doi:<a href=\"https://doi.org/10.1109/cvpr52688.2022.01195\">10.1109/cvpr52688.2022.01195</a>."},"publication_status":"published","abstract":[{"lang":"eng","text":"Transfer learning is a classic paradigm by which models pretrained on large “upstream” datasets are adapted to yield good results on “downstream” specialized datasets. Generally, more accurate models on the “upstream” dataset tend to provide better transfer accuracy “downstream”. In this work, we perform an in-depth investigation of this phenomenon in the context of convolutional neural networks (CNNs) trained on the ImageNet dataset, which have been pruned-that is, compressed by sparsifiying their connections. We consider transfer using unstructured pruned models obtained by applying several state-of-the-art pruning methods, including magnitude-based, second-order, regrowth, lottery-ticket, and regularization approaches, in the context of twelve standard transfer tasks. In a nutshell, our study shows that sparse models can match or even outperform the transfer performance of dense models, even at high sparsities, and, while doing so, can lead to significant inference and even training speedups. At the same time, we observe and analyze significant differences in the behaviour of different pruning methods. The code is available at: https://github.com/IST-DASLab/sparse-imagenet-transfer."}],"author":[{"first_name":"Eugenia B","full_name":"Iofinova, Eugenia B","last_name":"Iofinova","orcid":"0000-0002-7778-3221","id":"f9a17499-f6e0-11ea-865d-fdf9a3f77117"},{"first_name":"Elena-Alexandra","last_name":"Peste","full_name":"Peste, Elena-Alexandra","id":"32D78294-F248-11E8-B48F-1D18A9856A87"},{"first_name":"Mark","full_name":"Kurtz, Mark","last_name":"Kurtz"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian"}],"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2111.13445"}],"isi":1,"related_material":{"record":[{"relation":"dissertation_contains","id":"13074","status":"public"}]},"doi":"10.1109/cvpr52688.2022.01195","year":"2022","ec_funded":1,"title":"How well do sparse ImageNet models transfer?","external_id":{"arxiv":["2111.13445"],"isi":["000870759105034"]}},{"title":"CGX: Adaptive system support for communication-efficient deep learning","external_id":{"arxiv":["2111.08617"]},"year":"2022","doi":"10.1145/3528535.3565248","ddc":["000"],"tmp":{"image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"abstract":[{"lang":"eng","text":"The ability to scale out training workloads has been one of the key performance enablers of deep learning. The main scaling approach is data-parallel GPU-based training, which has been boosted by hardware and software support for highly efficient point-to-point communication, and in particular via hardware bandwidth over-provisioning. Overprovisioning comes at a cost: there is an order of magnitude price difference between \"cloud-grade\" servers with such support, relative to their popular \"consumer-grade\" counterparts, although single server-grade and consumer-grade GPUs can have similar computational envelopes.\r\n\r\nIn this paper, we show that the costly hardware overprovisioning approach can be supplanted via algorithmic and system design, and propose a framework called CGX, which provides efficient software support for compressed communication in ML applications, for both multi-GPU single-node training, as well as larger-scale multi-node training. CGX is based on two technical advances: At the system level, it relies on a re-developed communication stack for ML frameworks, which provides flexible, highly-efficient support for compressed communication. At the application level, it provides seamless, parameter-free integration with popular frameworks, so that end-users do not have to modify training recipes, nor significant training code. This is complemented by a layer-wise adaptive compression technique which dynamically balances compression gains with accuracy preservation. CGX integrates with popular ML frameworks, providing up to 3X speedups for multi-GPU nodes based on commodity hardware, and order-of-magnitude improvements in the multi-node setting, with negligible impact on accuracy."}],"author":[{"id":"D0CF4148-C985-11E9-8066-0BDEE5697425","first_name":"Ilia","last_name":"Markov","full_name":"Markov, Ilia"},{"first_name":"Hamidreza","full_name":"Ramezanikebrya, Hamidreza","last_name":"Ramezanikebrya"},{"full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"}],"citation":{"ista":"Markov I, Ramezanikebrya H, Alistarh D-A. 2022. CGX: Adaptive system support for communication-efficient deep learning. Proceedings of the 23rd ACM/IFIP International Middleware Conference. Middleware: International Middleware Conference, 241–254.","short":"I. Markov, H. Ramezanikebrya, D.-A. Alistarh, in:, Proceedings of the 23rd ACM/IFIP International Middleware Conference, Association for Computing Machinery, 2022, pp. 241–254.","ama":"Markov I, Ramezanikebrya H, Alistarh D-A. CGX: Adaptive system support for communication-efficient deep learning. In: <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>. Association for Computing Machinery; 2022:241-254. doi:<a href=\"https://doi.org/10.1145/3528535.3565248\">10.1145/3528535.3565248</a>","mla":"Markov, Ilia, et al. “CGX: Adaptive System Support for Communication-Efficient Deep Learning.” <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>, Association for Computing Machinery, 2022, pp. 241–54, doi:<a href=\"https://doi.org/10.1145/3528535.3565248\">10.1145/3528535.3565248</a>.","chicago":"Markov, Ilia, Hamidreza Ramezanikebrya, and Dan-Adrian Alistarh. “CGX: Adaptive System Support for Communication-Efficient Deep Learning.” In <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>, 241–54. Association for Computing Machinery, 2022. <a href=\"https://doi.org/10.1145/3528535.3565248\">https://doi.org/10.1145/3528535.3565248</a>.","apa":"Markov, I., Ramezanikebrya, H., &#38; Alistarh, D.-A. (2022). CGX: Adaptive system support for communication-efficient deep learning. In <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i> (pp. 241–254). Quebec, QC, Canada: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3528535.3565248\">https://doi.org/10.1145/3528535.3565248</a>","ieee":"I. Markov, H. Ramezanikebrya, and D.-A. Alistarh, “CGX: Adaptive system support for communication-efficient deep learning,” in <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>, Quebec, QC, Canada, 2022, pp. 241–254."},"publication_status":"published","publication_identifier":{"isbn":["9781450393409"]},"_id":"12780","oa_version":"Published Version","quality_controlled":"1","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","acknowledgement":"The authors sincerely thank Nikoli Dryden, Tal Ben-Nun, Torsten Hoefler and Bapi Chatterjee for useful discussions throughout the development of this project.","arxiv":1,"article_processing_charge":"Yes (via OA deal)","date_updated":"2023-04-03T06:21:04Z","oa":1,"publisher":"Association for Computing Machinery","language":[{"iso":"eng"}],"month":"11","date_published":"2022-11-01T00:00:00Z","conference":{"end_date":"2022-11-11","name":"Middleware: International Middleware Conference","location":"Quebec, QC, Canada","start_date":"2022-11-07"},"date_created":"2023-03-31T06:17:00Z","file":[{"creator":"dernst","file_id":"12795","relation":"main_file","content_type":"application/pdf","success":1,"access_level":"open_access","date_updated":"2023-04-03T06:17:58Z","checksum":"1a397746235f245da5468819247ff663","date_created":"2023-04-03T06:17:58Z","file_size":1514169,"file_name":"2022_ACMMiddleware_Markov.pdf"}],"has_accepted_license":"1","department":[{"_id":"DaAl"}],"status":"public","day":"01","type":"conference","publication":"Proceedings of the 23rd ACM/IFIP International Middleware Conference","page":"241-254","file_date_updated":"2023-04-03T06:17:58Z"},{"department":[{"_id":"DaAl"}],"conference":{"start_date":"2021-07-06","location":"Virtual, Online","end_date":"2021-07-08","name":"SPAA: Symposium on Parallelism in Algorithms and Architectures"},"date_created":"2022-03-18T08:21:47Z","month":"07","date_published":"2021-07-01T00:00:00Z","scopus_import":"1","publisher":"Association for Computing Machinery","language":[{"iso":"eng"}],"publication":"Proceedings of the 33rd ACM Symposium on Parallelism in Algorithms and Architectures","page":"208-220","day":"01","type":"conference","status":"public","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2105.08098"}],"year":"2021","doi":"10.1145/3409964.3461810","external_id":{"arxiv":["2105.08098"]},"title":"A scalable concurrent algorithm for dynamic connectivity","arxiv":1,"article_processing_charge":"No","oa":1,"date_updated":"2022-03-18T08:45:46Z","publication_identifier":{"isbn":["9781450380706"]},"_id":"10853","quality_controlled":"1","oa_version":"Preprint","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","citation":{"chicago":"Fedorov, Alexander, Nikita Koval, and Dan-Adrian Alistarh. “A Scalable Concurrent Algorithm for Dynamic Connectivity.” In <i>Proceedings of the 33rd ACM Symposium on Parallelism in Algorithms and Architectures</i>, 208–20. Association for Computing Machinery, 2021. <a href=\"https://doi.org/10.1145/3409964.3461810\">https://doi.org/10.1145/3409964.3461810</a>.","ieee":"A. Fedorov, N. Koval, and D.-A. Alistarh, “A scalable concurrent algorithm for dynamic connectivity,” in <i>Proceedings of the 33rd ACM Symposium on Parallelism in Algorithms and Architectures</i>, Virtual, Online, 2021, pp. 208–220.","apa":"Fedorov, A., Koval, N., &#38; Alistarh, D.-A. (2021). A scalable concurrent algorithm for dynamic connectivity. In <i>Proceedings of the 33rd ACM Symposium on Parallelism in Algorithms and Architectures</i> (pp. 208–220). Virtual, Online: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3409964.3461810\">https://doi.org/10.1145/3409964.3461810</a>","short":"A. Fedorov, N. Koval, D.-A. Alistarh, in:, Proceedings of the 33rd ACM Symposium on Parallelism in Algorithms and Architectures, Association for Computing Machinery, 2021, pp. 208–220.","ista":"Fedorov A, Koval N, Alistarh D-A. 2021. A scalable concurrent algorithm for dynamic connectivity. Proceedings of the 33rd ACM Symposium on Parallelism in Algorithms and Architectures. SPAA: Symposium on Parallelism in Algorithms and Architectures, 208–220.","mla":"Fedorov, Alexander, et al. “A Scalable Concurrent Algorithm for Dynamic Connectivity.” <i>Proceedings of the 33rd ACM Symposium on Parallelism in Algorithms and Architectures</i>, Association for Computing Machinery, 2021, pp. 208–20, doi:<a href=\"https://doi.org/10.1145/3409964.3461810\">10.1145/3409964.3461810</a>.","ama":"Fedorov A, Koval N, Alistarh D-A. A scalable concurrent algorithm for dynamic connectivity. In: <i>Proceedings of the 33rd ACM Symposium on Parallelism in Algorithms and Architectures</i>. Association for Computing Machinery; 2021:208-220. doi:<a href=\"https://doi.org/10.1145/3409964.3461810\">10.1145/3409964.3461810</a>"},"publication_status":"published","abstract":[{"lang":"eng","text":"Dynamic Connectivity is a fundamental algorithmic graph problem, motivated by a wide range of applications to social and communication networks and used as a building block in various other algorithms, such as the bi-connectivity and the dynamic minimal spanning tree problems. In brief, we wish to maintain the connected components of the graph under dynamic edge insertions and deletions. In the sequential case, the problem has been well-studied from both theoretical and practical perspectives. However, much less is known about efficient concurrent solutions to this problem. This is the gap we address in this paper. We start from one of the classic data structures used to solve this problem, the Euler Tour Tree. Our first contribution is a non-blocking single-writer implementation of it. We leverage this data structure to obtain the first truly concurrent generalization of dynamic connectivity, which preserves the time complexity of its sequential counterpart, but is also scalable in practice. To achieve this, we rely on three main techniques. The first is to ensure that connectivity queries, which usually dominate real-world workloads, are non-blocking. The second non-trivial technique expands the above idea by making all queries that do not change the connectivity structure non-blocking. The third ingredient is applying fine-grained locking for updating the connected components, which allows operations on disjoint components to occur in parallel. We evaluate the resulting algorithm on various workloads, executing on both real and synthetic graphs. The results show the efficiency of each of the proposed optimizations; the most efficient variant improves the performance of a coarse-grained based implementation on realistic scenarios up to 6x on average and up to 30x when connectivity queries dominate."}],"author":[{"full_name":"Fedorov, Alexander","last_name":"Fedorov","first_name":"Alexander"},{"full_name":"Koval, Nikita","last_name":"Koval","first_name":"Nikita"},{"orcid":"0000-0003-3650-940X","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"}]},{"date_published":"2021-05-01T00:00:00Z","month":"05","language":[{"iso":"eng"}],"scopus_import":"1","publisher":"Association for Computing Machinery","department":[{"_id":"DaAl"}],"date_created":"2022-03-18T08:48:41Z","conference":{"name":"SIGMETRICS: International Conference on Measurement and Modeling of Computer Systems","end_date":"2021-06-18","location":"Virtual, Online","start_date":"2021-06-14"},"type":"conference","day":"01","status":"public","page":"71-72","publication":"Abstract Proceedings of the 2021 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems","ec_funded":1,"doi":"10.1145/3410220.3453923","year":"2021","title":"Input-dynamic distributed algorithms for communication networks","external_id":{"arxiv":["2005.07637"]},"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2005.07637"}],"related_material":{"record":[{"id":"10855","relation":"extended_version","status":"public"}]},"citation":{"chicago":"Foerster, Klaus-Tycho, Janne Korhonen, Ami Paz, Joel Rybicki, and Stefan Schmid. “Input-Dynamic Distributed Algorithms for Communication Networks.” In <i>Abstract Proceedings of the 2021 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems</i>, 71–72. Association for Computing Machinery, 2021. <a href=\"https://doi.org/10.1145/3410220.3453923\">https://doi.org/10.1145/3410220.3453923</a>.","ieee":"K.-T. Foerster, J. Korhonen, A. Paz, J. Rybicki, and S. Schmid, “Input-dynamic distributed algorithms for communication networks,” in <i>Abstract Proceedings of the 2021 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems</i>, Virtual, Online, 2021, pp. 71–72.","apa":"Foerster, K.-T., Korhonen, J., Paz, A., Rybicki, J., &#38; Schmid, S. (2021). Input-dynamic distributed algorithms for communication networks. In <i>Abstract Proceedings of the 2021 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems</i> (pp. 71–72). Virtual, Online: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3410220.3453923\">https://doi.org/10.1145/3410220.3453923</a>","short":"K.-T. Foerster, J. Korhonen, A. Paz, J. Rybicki, S. Schmid, in:, Abstract Proceedings of the 2021 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems, Association for Computing Machinery, 2021, pp. 71–72.","ista":"Foerster K-T, Korhonen J, Paz A, Rybicki J, Schmid S. 2021. Input-dynamic distributed algorithms for communication networks. Abstract Proceedings of the 2021 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems. SIGMETRICS: International Conference on Measurement and Modeling of Computer Systems, 71–72.","mla":"Foerster, Klaus-Tycho, et al. “Input-Dynamic Distributed Algorithms for Communication Networks.” <i>Abstract Proceedings of the 2021 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems</i>, Association for Computing Machinery, 2021, pp. 71–72, doi:<a href=\"https://doi.org/10.1145/3410220.3453923\">10.1145/3410220.3453923</a>.","ama":"Foerster K-T, Korhonen J, Paz A, Rybicki J, Schmid S. Input-dynamic distributed algorithms for communication networks. In: <i>Abstract Proceedings of the 2021 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems</i>. Association for Computing Machinery; 2021:71-72. doi:<a href=\"https://doi.org/10.1145/3410220.3453923\">10.1145/3410220.3453923</a>"},"publication_status":"published","author":[{"first_name":"Klaus-Tycho","full_name":"Foerster, Klaus-Tycho","last_name":"Foerster"},{"id":"C5402D42-15BC-11E9-A202-CA2BE6697425","full_name":"Korhonen, Janne","last_name":"Korhonen","first_name":"Janne"},{"first_name":"Ami","last_name":"Paz","full_name":"Paz, Ami"},{"first_name":"Joel","orcid":"0000-0002-6432-6646","last_name":"Rybicki","full_name":"Rybicki, Joel","id":"334EFD2E-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Schmid","full_name":"Schmid, Stefan","first_name":"Stefan"}],"abstract":[{"text":"Consider a distributed task where the communication network is fixed but the local inputs given to the nodes of the distributed system may change over time. In this work, we explore the following question: if some of the local inputs change, can an existing solution be updated efficiently, in a dynamic and distributed manner?\r\nTo address this question, we define the batch dynamic CONGEST model in which we are given a bandwidth-limited communication network and a dynamic edge labelling defines the problem input. The task is to maintain a solution to a graph problem on the labelled graph under batch changes. We investigate, when a batch of alpha edge label changes arrive, - how much time as a function of alpha we need to update an existing solution, and - how much information the nodes have to keep in local memory between batches in order to update the solution quickly.\r\nOur work lays the foundations for the theory of input-dynamic distributed network algorithms. We give a general picture of the complexity landscape in this model, design both universal algorithms and algorithms for concrete problems, and present a general framework for lower bounds. The diverse time complexity of our model spans from constant time, through time polynomial in alpha, and to alpha time, which we show to be enough for any task.","lang":"eng"}],"article_processing_charge":"No","date_updated":"2023-09-26T10:40:55Z","oa":1,"arxiv":1,"quality_controlled":"1","project":[{"grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning","_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020"},{"name":"Coordination in constrained and natural distributed systems","_id":"26A5D39A-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","grant_number":"840605"}],"oa_version":"Preprint","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","acknowledgement":"We thank Jukka Suomela for discussions. We also thank our shepherd Mohammad Hajiesmaili and the reviewers for their time and suggestions on how to improve the paper. This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML), from the European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska–Curie grant agreement No. 840605, from the Vienna Science and Technology Fund (WWTF) project WHATIF, ICT19-045, 2020-2024, and from the Austrian Science Fund (FWF) and netIDEE SCIENCE project P 33775-N.","publication_identifier":{"isbn":["9781450380720"]},"_id":"10854"},{"article_type":"original","date_published":"2021-03-01T00:00:00Z","month":"03","language":[{"iso":"eng"}],"publisher":"Association for Computing Machinery","scopus_import":"1","department":[{"_id":"DaAl"}],"date_created":"2022-03-18T09:10:27Z","type":"journal_article","day":"01","status":"public","intvolume":"         5","page":"1-33","issue":"1","publication":"Proceedings of the ACM on Measurement and Analysis of Computing Systems","ec_funded":1,"doi":"10.1145/3447384","year":"2021","title":"Input-dynamic distributed algorithms for communication networks","external_id":{"arxiv":["2005.07637"]},"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2005.07637"}],"related_material":{"record":[{"status":"public","id":"10854","relation":"shorter_version"}]},"publication_status":"published","citation":{"chicago":"Foerster, Klaus-Tycho, Janne Korhonen, Ami Paz, Joel Rybicki, and Stefan Schmid. “Input-Dynamic Distributed Algorithms for Communication Networks.” <i>Proceedings of the ACM on Measurement and Analysis of Computing Systems</i>. Association for Computing Machinery, 2021. <a href=\"https://doi.org/10.1145/3447384\">https://doi.org/10.1145/3447384</a>.","ieee":"K.-T. Foerster, J. Korhonen, A. Paz, J. Rybicki, and S. Schmid, “Input-dynamic distributed algorithms for communication networks,” <i>Proceedings of the ACM on Measurement and Analysis of Computing Systems</i>, vol. 5, no. 1. Association for Computing Machinery, pp. 1–33, 2021.","apa":"Foerster, K.-T., Korhonen, J., Paz, A., Rybicki, J., &#38; Schmid, S. (2021). Input-dynamic distributed algorithms for communication networks. <i>Proceedings of the ACM on Measurement and Analysis of Computing Systems</i>. Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3447384\">https://doi.org/10.1145/3447384</a>","short":"K.-T. Foerster, J. Korhonen, A. Paz, J. Rybicki, S. Schmid, Proceedings of the ACM on Measurement and Analysis of Computing Systems 5 (2021) 1–33.","ista":"Foerster K-T, Korhonen J, Paz A, Rybicki J, Schmid S. 2021. Input-dynamic distributed algorithms for communication networks. Proceedings of the ACM on Measurement and Analysis of Computing Systems. 5(1), 1–33.","mla":"Foerster, Klaus-Tycho, et al. “Input-Dynamic Distributed Algorithms for Communication Networks.” <i>Proceedings of the ACM on Measurement and Analysis of Computing Systems</i>, vol. 5, no. 1, Association for Computing Machinery, 2021, pp. 1–33, doi:<a href=\"https://doi.org/10.1145/3447384\">10.1145/3447384</a>.","ama":"Foerster K-T, Korhonen J, Paz A, Rybicki J, Schmid S. Input-dynamic distributed algorithms for communication networks. <i>Proceedings of the ACM on Measurement and Analysis of Computing Systems</i>. 2021;5(1):1-33. doi:<a href=\"https://doi.org/10.1145/3447384\">10.1145/3447384</a>"},"keyword":["Computer Networks and Communications","Hardware and Architecture","Safety","Risk","Reliability and Quality","Computer Science (miscellaneous)"],"author":[{"first_name":"Klaus-Tycho","last_name":"Foerster","full_name":"Foerster, Klaus-Tycho"},{"last_name":"Korhonen","full_name":"Korhonen, Janne","first_name":"Janne","id":"C5402D42-15BC-11E9-A202-CA2BE6697425"},{"last_name":"Paz","full_name":"Paz, Ami","first_name":"Ami"},{"id":"334EFD2E-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-6432-6646","full_name":"Rybicki, Joel","last_name":"Rybicki","first_name":"Joel"},{"full_name":"Schmid, Stefan","last_name":"Schmid","first_name":"Stefan"}],"abstract":[{"text":"Consider a distributed task where the communication network is fixed but the local inputs given to the nodes of the distributed system may change over time. In this work, we explore the following question: if some of the local inputs change, can an existing solution be updated efficiently, in a dynamic and distributed manner? To address this question, we define the batch dynamic \\congest model in which we are given a bandwidth-limited communication network and a dynamic edge labelling defines the problem input. The task is to maintain a solution to a graph problem on the labeled graph under batch changes. We investigate, when a batch of α edge label changes arrive, \\beginitemize \\item how much time as a function of α we need to update an existing solution, and \\item how much information the nodes have to keep in local memory between batches in order to update the solution quickly. \\enditemize Our work lays the foundations for the theory of input-dynamic distributed network algorithms. We give a general picture of the complexity landscape in this model, design both universal algorithms and algorithms for concrete problems, and present a general framework for lower bounds. In particular, we derive non-trivial upper bounds for two selected, contrasting problems: maintaining a minimum spanning tree and detecting cliques.","lang":"eng"}],"oa":1,"volume":5,"date_updated":"2023-09-26T10:40:55Z","article_processing_charge":"No","arxiv":1,"acknowledgement":"We thank Jukka Suomela for discussions. We also thank our shepherd Mohammad Hajiesmaili\r\nand the reviewers for their time and suggestions on how to improve the paper. This project\r\nhas received funding from the European Research Council (ERC) under the European Union’s\r\nHorizon 2020 research and innovation programme (grant agreement No 805223 ScaleML), from the European Union’s Horizon 2020 research and innovation programme under the Marie\r\nSk lodowska–Curie grant agreement No. 840605, from the Vienna Science and Technology Fund (WWTF) project WHATIF, ICT19-045, 2020-2024, and from the Austrian Science Fund (FWF) and netIDEE SCIENCE project P 33775-N.","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","project":[{"grant_number":"840605","_id":"26A5D39A-B435-11E9-9278-68D0E5697425","name":"Coordination in constrained and natural distributed systems","call_identifier":"H2020"},{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020","grant_number":"805223"}],"oa_version":"Preprint","quality_controlled":"1","_id":"10855","publication_identifier":{"issn":["2476-1249"]}},{"_id":"11436","publication_identifier":{"isbn":["9781713835974"],"issn":["2159-5399"],"eissn":["2374-3468"]},"acknowledgement":"Vyacheslav Kungurtsev was supported by the OP VVV project CZ.02.1.01/0.0/0.0/16 019/0000765 “Research Center for Informatics. Bapi Chatterjee was supported by the European Union’s Horizon 2020 research and innovation programme under the Marie Sklodowska-Curie grant agreement No. 754411 (ISTPlus). Dan Alistarh has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Preprint","project":[{"call_identifier":"H2020","_id":"260C2330-B435-11E9-9278-68D0E5697425","name":"ISTplus - Postdoctoral Fellowships","grant_number":"754411"},{"name":"Elastic Coordination for Scalable Machine Learning","_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","grant_number":"805223"}],"quality_controlled":"1","arxiv":1,"date_updated":"2022-06-07T06:53:36Z","volume":35,"oa":1,"article_processing_charge":"No","abstract":[{"lang":"eng","text":"Asynchronous distributed algorithms are a popular way to reduce synchronization costs in large-scale optimization, and in particular for neural network training. However, for nonsmooth and nonconvex objectives, few convergence guarantees exist beyond cases where closed-form proximal operator solutions are available. As training most popular deep neural networks corresponds to optimizing nonsmooth and nonconvex objectives, there is a pressing need for such convergence guarantees. In this paper, we analyze for the first time the convergence of stochastic asynchronous optimization for this general class of objectives. In particular, we focus on stochastic subgradient methods allowing for block variable partitioning, where the shared model is asynchronously updated by concurrent processes. To this end, we use a probabilistic model which captures key features of real asynchronous scheduling between concurrent processes. Under this model, we establish convergence with probability one to an invariant set for stochastic subgradient methods with momentum. From a practical perspective, one issue with the family of algorithms that we consider is that they are not efficiently supported by machine learning frameworks, which mostly focus on distributed data-parallel strategies. To address this, we propose a new implementation strategy for shared-memory based training of deep neural networks for a partitioned but shared model in single- and multi-GPU settings. Based on this implementation, we achieve on average1.2x speed-up in comparison to state-of-the-art training methods for popular image classification tasks, without compromising accuracy."}],"author":[{"first_name":"Vyacheslav","full_name":"Kungurtsev, Vyacheslav","last_name":"Kungurtsev"},{"last_name":"Egan","full_name":"Egan, Malcolm","first_name":"Malcolm"},{"id":"3C41A08A-F248-11E8-B48F-1D18A9856A87","full_name":"Chatterjee, Bapi","last_name":"Chatterjee","first_name":"Bapi"},{"first_name":"Dan-Adrian","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"}],"publication_status":"published","citation":{"mla":"Kungurtsev, Vyacheslav, et al. “Asynchronous Optimization Methods for Efficient Training of Deep Neural Networks with Guarantees.” <i>35th AAAI Conference on Artificial Intelligence, AAAI 2021</i>, vol. 35, no. 9B, AAAI Press, 2021, pp. 8209–16.","ama":"Kungurtsev V, Egan M, Chatterjee B, Alistarh D-A. Asynchronous optimization methods for efficient training of deep neural networks with guarantees. In: <i>35th AAAI Conference on Artificial Intelligence, AAAI 2021</i>. Vol 35. AAAI Press; 2021:8209-8216.","ista":"Kungurtsev V, Egan M, Chatterjee B, Alistarh D-A. 2021. Asynchronous optimization methods for efficient training of deep neural networks with guarantees. 35th AAAI Conference on Artificial Intelligence, AAAI 2021. AAAI: Conference on Artificial Intelligence vol. 35, 8209–8216.","short":"V. Kungurtsev, M. Egan, B. Chatterjee, D.-A. Alistarh, in:, 35th AAAI Conference on Artificial Intelligence, AAAI 2021, AAAI Press, 2021, pp. 8209–8216.","ieee":"V. Kungurtsev, M. Egan, B. Chatterjee, and D.-A. Alistarh, “Asynchronous optimization methods for efficient training of deep neural networks with guarantees,” in <i>35th AAAI Conference on Artificial Intelligence, AAAI 2021</i>, Virtual, Online, 2021, vol. 35, no. 9B, pp. 8209–8216.","apa":"Kungurtsev, V., Egan, M., Chatterjee, B., &#38; Alistarh, D.-A. (2021). Asynchronous optimization methods for efficient training of deep neural networks with guarantees. In <i>35th AAAI Conference on Artificial Intelligence, AAAI 2021</i> (Vol. 35, pp. 8209–8216). Virtual, Online: AAAI Press.","chicago":"Kungurtsev, Vyacheslav, Malcolm Egan, Bapi Chatterjee, and Dan-Adrian Alistarh. “Asynchronous Optimization Methods for Efficient Training of Deep Neural Networks with Guarantees.” In <i>35th AAAI Conference on Artificial Intelligence, AAAI 2021</i>, 35:8209–16. AAAI Press, 2021."},"main_file_link":[{"url":" https://doi.org/10.48550/arXiv.1905.11845","open_access":"1"}],"external_id":{"arxiv":["1905.11845"]},"title":"Asynchronous optimization methods for efficient training of deep neural networks with guarantees","year":"2021","ec_funded":1,"issue":"9B","publication":"35th AAAI Conference on Artificial Intelligence, AAAI 2021","page":"8209-8216","intvolume":"        35","status":"public","day":"18","type":"conference","conference":{"end_date":"2021-02-09","name":"AAAI: Conference on Artificial Intelligence","location":"Virtual, Online","start_date":"2021-02-02"},"date_created":"2022-06-05T22:01:52Z","department":[{"_id":"DaAl"}],"publisher":"AAAI Press","scopus_import":"1","language":[{"iso":"eng"}],"month":"05","date_published":"2021-05-18T00:00:00Z"},{"main_file_link":[{"open_access":"1","url":"https://proceedings.neurips.cc/paper/2021/file/1680e9fa7b4dd5d62ece800239bb53bd-Paper.pdf"}],"year":"2021","ec_funded":1,"title":"Distributed principal component analysis with limited communication","external_id":{"arxiv":["2110.14391"]},"arxiv":1,"article_processing_charge":"No","volume":4,"date_updated":"2022-06-20T08:31:52Z","oa":1,"publication_identifier":{"issn":["1049-5258"],"isbn":["9781713845393"]},"_id":"11452","oa_version":"Published Version","quality_controlled":"1","project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020","grant_number":"805223"},{"grant_number":"754411","call_identifier":"H2020","name":"ISTplus - Postdoctoral Fellowships","_id":"260C2330-B435-11E9-9278-68D0E5697425"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","acknowledgement":"We would like to thank the anonymous reviewers for helpful comments and suggestions. We also thank Aurelien Lucchi and Antonio Orvieto for fruitful discussions at an early stage of this work. FA is partially supported by the SNSF under research project No. 192363 and conducted part of this work while at IST Austria under the European Union’s Horizon 2020 research and innovation programme (grant agreement No. 805223 ScaleML). PD partly conducted this work while at IST Austria and was supported by the European Union’s Horizon 2020 programme under the Marie Skłodowska-Curie grant agreement No. 754411.","citation":{"chicago":"Alimisis, Foivos, Peter Davies, Bart Vandereycken, and Dan-Adrian Alistarh. “Distributed Principal Component Analysis with Limited Communication.” In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, 4:2823–34. Neural Information Processing Systems Foundation, 2021.","ieee":"F. Alimisis, P. Davies, B. Vandereycken, and D.-A. Alistarh, “Distributed principal component analysis with limited communication,” in <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 4, pp. 2823–2834.","apa":"Alimisis, F., Davies, P., Vandereycken, B., &#38; Alistarh, D.-A. (2021). Distributed principal component analysis with limited communication. In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i> (Vol. 4, pp. 2823–2834). Virtual, Online: Neural Information Processing Systems Foundation.","short":"F. Alimisis, P. Davies, B. Vandereycken, D.-A. Alistarh, in:, Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 2823–2834.","ista":"Alimisis F, Davies P, Vandereycken B, Alistarh D-A. 2021. Distributed principal component analysis with limited communication. Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 4, 2823–2834.","ama":"Alimisis F, Davies P, Vandereycken B, Alistarh D-A. Distributed principal component analysis with limited communication. In: <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>. Vol 4. Neural Information Processing Systems Foundation; 2021:2823-2834.","mla":"Alimisis, Foivos, et al. “Distributed Principal Component Analysis with Limited Communication.” <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, vol. 4, Neural Information Processing Systems Foundation, 2021, pp. 2823–34."},"publication_status":"published","abstract":[{"text":"We study efficient distributed algorithms for the fundamental problem of principal component analysis and leading eigenvector computation on the sphere, when the data are randomly distributed among a set of computational nodes. We propose a new quantized variant of Riemannian gradient descent to solve this problem, and prove that the algorithm converges with high probability under a set of necessary spherical-convexity properties. We give bounds on the number of bits transmitted by the algorithm under common initialization schemes, and investigate the dependency on the problem dimension in each case.","lang":"eng"}],"author":[{"full_name":"Alimisis, Foivos","last_name":"Alimisis","first_name":"Foivos"},{"first_name":"Peter","orcid":"0000-0002-5646-9524","full_name":"Davies, Peter","last_name":"Davies","id":"11396234-BB50-11E9-B24C-90FCE5697425"},{"first_name":"Bart","full_name":"Vandereycken, Bart","last_name":"Vandereycken"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh"}],"department":[{"_id":"DaAl"}],"conference":{"start_date":"2021-12-06","name":"NeurIPS: Neural Information Processing Systems","end_date":"2021-12-14","location":"Virtual, Online"},"date_created":"2022-06-19T22:01:58Z","month":"12","date_published":"2021-12-01T00:00:00Z","scopus_import":"1","publisher":"Neural Information Processing Systems Foundation","language":[{"iso":"eng"}],"publication":"Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems","page":"2823-2834","day":"01","type":"conference","intvolume":"         4","status":"public"},{"main_file_link":[{"open_access":"1","url":"https://proceedings.neurips.cc/paper/2021/file/48000647b315f6f00f913caa757a70b3-Paper.pdf"}],"related_material":{"record":[{"status":"public","relation":"dissertation_contains","id":"13074"}]},"ec_funded":1,"acknowledged_ssus":[{"_id":"ScienComp"}],"year":"2021","title":"AC/DC: Alternating Compressed/DeCompressed training of deep neural networks","external_id":{"arxiv":["2106.12379"]},"oa":1,"volume":34,"date_updated":"2023-06-01T12:54:45Z","article_processing_charge":"No","arxiv":1,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","acknowledgement":"This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML), and a CNRS PEPS grant. This research was supported by the Scientific Service Units (SSU) of IST Austria through resources provided by Scientific Computing (SciComp). We would also like to thank Christoph Lampert for his feedback on an earlier version of this work, as well as for providing hardware for the Transformer-XL experiments.","oa_version":"Published Version","project":[{"grant_number":"805223","call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning"}],"quality_controlled":"1","_id":"11458","publication_identifier":{"issn":["1049-5258"],"isbn":["9781713845393"]},"publication_status":"published","citation":{"ieee":"E.-A. Peste, E. B. Iofinova, A. Vladu, and D.-A. Alistarh, “AC/DC: Alternating Compressed/DeCompressed training of deep neural networks,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 8557–8570.","apa":"Peste, E.-A., Iofinova, E. B., Vladu, A., &#38; Alistarh, D.-A. (2021). AC/DC: Alternating Compressed/DeCompressed training of deep neural networks. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 8557–8570). Virtual, Online: Curran Associates.","chicago":"Peste, Elena-Alexandra, Eugenia B Iofinova, Adrian Vladu, and Dan-Adrian Alistarh. “AC/DC: Alternating Compressed/DeCompressed Training of Deep Neural Networks.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:8557–70. Curran Associates, 2021.","mla":"Peste, Elena-Alexandra, et al. “AC/DC: Alternating Compressed/DeCompressed Training of Deep Neural Networks.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Curran Associates, 2021, pp. 8557–70.","ama":"Peste E-A, Iofinova EB, Vladu A, Alistarh D-A. AC/DC: Alternating Compressed/DeCompressed training of deep neural networks. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Curran Associates; 2021:8557-8570.","short":"E.-A. Peste, E.B. Iofinova, A. Vladu, D.-A. Alistarh, in:, 35th Conference on Neural Information Processing Systems, Curran Associates, 2021, pp. 8557–8570.","ista":"Peste E-A, Iofinova EB, Vladu A, Alistarh D-A. 2021. AC/DC: Alternating Compressed/DeCompressed training of deep neural networks. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 8557–8570."},"author":[{"last_name":"Peste","full_name":"Peste, Elena-Alexandra","first_name":"Elena-Alexandra","id":"32D78294-F248-11E8-B48F-1D18A9856A87"},{"first_name":"Eugenia B","full_name":"Iofinova, Eugenia B","last_name":"Iofinova","orcid":"0000-0002-7778-3221","id":"f9a17499-f6e0-11ea-865d-fdf9a3f77117"},{"full_name":"Vladu, Adrian","last_name":"Vladu","first_name":"Adrian"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X"}],"abstract":[{"text":"The increasing computational requirements of deep neural networks (DNNs) have led to significant interest in obtaining DNN models that are sparse, yet accurate. Recent work has investigated the even harder case of sparse training, where the DNN weights are, for as much as possible, already sparse to reduce computational costs during training. Existing sparse training methods are often empirical and can have lower accuracy relative to the dense baseline. In this paper, we present a general approach called Alternating Compressed/DeCompressed (AC/DC) training of DNNs, demonstrate convergence for a variant of the algorithm, and show that AC/DC outperforms existing sparse training methods in accuracy at similar computational budgets; at high sparsity levels, AC/DC even outperforms existing methods that rely on accurate pre-trained dense models. An important property of AC/DC is that it allows co-training of dense and sparse models, yielding accurate sparse–dense model pairs at the end of the training process. This is useful in practice, where compressed variants may be desirable for deployment in resource-constrained settings without re-doing the entire training flow, and also provides us with insights into the accuracy gap between dense and compressed models. The code is available at: https://github.com/IST-DASLab/ACDC.","lang":"eng"}],"department":[{"_id":"GradSch"},{"_id":"DaAl"}],"date_created":"2022-06-20T12:11:53Z","conference":{"start_date":"2021-12-06","location":"Virtual, Online","name":"NeurIPS: Neural Information Processing Systems","end_date":"2021-12-14"},"date_published":"2021-12-06T00:00:00Z","month":"12","language":[{"iso":"eng"}],"publisher":"Curran Associates","scopus_import":"1","page":"8557-8570","publication":"35th Conference on Neural Information Processing Systems","type":"conference","day":"6","status":"public","intvolume":"        34"},{"intvolume":"        34","status":"public","day":"06","type":"conference","publication":"35th Conference on Neural Information Processing Systems","page":"14873-14886","publisher":"Curran Associates","scopus_import":"1","language":[{"iso":"eng"}],"month":"12","date_published":"2021-12-06T00:00:00Z","conference":{"start_date":"2021-12-06","location":"Virtual, Online","name":"NeurIPS: Neural Information Processing Systems","end_date":"2021-12-14"},"date_created":"2022-06-26T22:01:35Z","department":[{"_id":"DaAl"}],"abstract":[{"text":"Efficiently approximating local curvature information of the loss function is a key tool for optimization and compression of deep neural networks. Yet, most existing methods to approximate second-order information have high computational\r\nor storage costs, which limits their practicality. In this work, we investigate matrix-free, linear-time approaches for estimating Inverse-Hessian Vector Products (IHVPs) for the case when the Hessian can be approximated as a sum of rank-one matrices, as in the classic approximation of the Hessian by the empirical Fisher matrix. We propose two new algorithms: the first is tailored towards network compression and can compute the IHVP for dimension d, if the Hessian is given as a sum of m rank-one matrices, using O(dm2) precomputation, O(dm) cost for computing the IHVP, and query cost O(m) for any single element of the inverse Hessian. The second algorithm targets an optimization setting, where we wish to compute the product between the inverse Hessian, estimated over a sliding window of optimization steps, and a given gradient direction, as required for preconditioned SGD. We give an algorithm with cost O(dm + m2) for computing the IHVP and O(dm + m3) for adding or removing any gradient from the sliding window. These\r\ntwo algorithms yield state-of-the-art results for network pruning and optimization with lower computational overhead relative to existing second-order methods. Implementations are available at [9] and [17].","lang":"eng"}],"author":[{"first_name":"Elias","last_name":"Frantar","full_name":"Frantar, Elias","id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f"},{"id":"47beb3a5-07b5-11eb-9b87-b108ec578218","last_name":"Kurtic","full_name":"Kurtic, Eldar","first_name":"Eldar"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","first_name":"Dan-Adrian"}],"publication_status":"published","citation":{"chicago":"Frantar, Elias, Eldar Kurtic, and Dan-Adrian Alistarh. “M-FAC: Efficient Matrix-Free Approximations of Second-Order Information.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:14873–86. Curran Associates, 2021.","apa":"Frantar, E., Kurtic, E., &#38; Alistarh, D.-A. (2021). M-FAC: Efficient matrix-free approximations of second-order information. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 14873–14886). Virtual, Online: Curran Associates.","ieee":"E. Frantar, E. Kurtic, and D.-A. Alistarh, “M-FAC: Efficient matrix-free approximations of second-order information,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 14873–14886.","ista":"Frantar E, Kurtic E, Alistarh D-A. 2021. M-FAC: Efficient matrix-free approximations of second-order information. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 14873–14886.","short":"E. Frantar, E. Kurtic, D.-A. Alistarh, in:, 35th Conference on Neural Information Processing Systems, Curran Associates, 2021, pp. 14873–14886.","ama":"Frantar E, Kurtic E, Alistarh D-A. M-FAC: Efficient matrix-free approximations of second-order information. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Curran Associates; 2021:14873-14886.","mla":"Frantar, Elias, et al. “M-FAC: Efficient Matrix-Free Approximations of Second-Order Information.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Curran Associates, 2021, pp. 14873–86."},"_id":"11463","publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"acknowledgement":"We gratefully acknowledge funding the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML), as well as computational support from Amazon Web Services (AWS) EC2.","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","project":[{"grant_number":"805223","_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020"}],"oa_version":"Published Version","quality_controlled":"1","arxiv":1,"volume":34,"date_updated":"2022-06-27T07:05:12Z","oa":1,"article_processing_charge":"No","title":"M-FAC: Efficient matrix-free approximations of second-order information","external_id":{"arxiv":["2010.08222"]},"year":"2021","ec_funded":1,"main_file_link":[{"open_access":"1","url":"https://proceedings.neurips.cc/paper/2021/file/7cfd5df443b4eb0d69886a583b33de4c-Paper.pdf"}]},{"publication_status":"published","citation":{"ama":"Alistarh D-A, Korhonen J. Towards tight communication lower bounds for distributed optimisation. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Curran Associates; 2021:7254-7266.","mla":"Alistarh, Dan-Adrian, and Janne Korhonen. “Towards Tight Communication Lower Bounds for Distributed Optimisation.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Curran Associates, 2021, pp. 7254–66.","ista":"Alistarh D-A, Korhonen J. 2021. Towards tight communication lower bounds for distributed optimisation. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 7254–7266.","short":"D.-A. Alistarh, J. Korhonen, in:, 35th Conference on Neural Information Processing Systems, Curran Associates, 2021, pp. 7254–7266.","ieee":"D.-A. Alistarh and J. Korhonen, “Towards tight communication lower bounds for distributed optimisation,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 7254–7266.","apa":"Alistarh, D.-A., &#38; Korhonen, J. (2021). Towards tight communication lower bounds for distributed optimisation. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 7254–7266). Virtual, Online: Curran Associates.","chicago":"Alistarh, Dan-Adrian, and Janne Korhonen. “Towards Tight Communication Lower Bounds for Distributed Optimisation.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:7254–66. Curran Associates, 2021."},"author":[{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh"},{"last_name":"Korhonen","full_name":"Korhonen, Janne","first_name":"Janne","id":"C5402D42-15BC-11E9-A202-CA2BE6697425"}],"abstract":[{"text":"We consider a standard distributed optimisation setting where N machines, each holding a d-dimensional function\r\nfi, aim to jointly minimise the sum of the functions ∑Ni=1fi(x). This problem arises naturally in large-scale distributed optimisation, where a standard solution is to apply variants of (stochastic) gradient descent. We focus on the communication complexity of this problem: our main result provides the first fully unconditional bounds on total number of bits which need to be sent and received by the N machines to solve this problem under point-to-point communication, within a given error-tolerance. Specifically, we show that Ω(Ndlogd/Nε) total bits need to be communicated between the machines to find an additive ϵ-approximation to the minimum of ∑Ni=1fi(x). The result holds for both deterministic and randomised algorithms, and, importantly, requires no assumptions on the algorithm structure. The lower bound is tight under certain restrictions on parameter values, and is matched within constant factors for quadratic objectives by a new variant of quantised gradient descent, which we describe and analyse. Our results bring over tools from communication complexity to distributed optimisation, which has potential for further applications.","lang":"eng"}],"volume":34,"oa":1,"date_updated":"2022-06-27T06:54:31Z","article_processing_charge":"No","arxiv":1,"acknowledgement":"We thank the NeurIPS reviewers for insightful comments that helped us improve the positioning of our results, as well as for pointing out the subsampling approach for complementing the randomised lower bound. We also thank Foivos Alimisis and Peter Davies for useful discussions. This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","project":[{"call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning","_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223"}],"quality_controlled":"1","oa_version":"Published Version","_id":"11464","publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"ec_funded":1,"year":"2021","external_id":{"arxiv":["2010.08222"]},"title":"Towards tight communication lower bounds for distributed optimisation","main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2021/file/3b92d18aa7a6176dd37d372bc2f1eb71-Paper.pdf","open_access":"1"}],"type":"conference","day":"06","status":"public","intvolume":"        34","page":"7254-7266","publication":"35th Conference on Neural Information Processing Systems","date_published":"2021-12-06T00:00:00Z","month":"12","language":[{"iso":"eng"}],"publisher":"Curran Associates","scopus_import":"1","department":[{"_id":"DaAl"}],"date_created":"2022-06-26T22:01:35Z","conference":{"name":"NeurIPS: Neural Information Processing Systems","end_date":"2021-12-14","location":"Virtual, Online","start_date":"2021-12-06"}},{"related_material":{"record":[{"id":"6933","relation":"earlier_version","status":"public"}]},"isi":1,"main_file_link":[{"url":"https://doi.org/10.1007/s00446-020-00380-5","open_access":"1"}],"title":"Fast approximate shortest paths in the congested clique","external_id":{"isi":["000556444600001"],"arxiv":["1903.05956"]},"doi":"10.1007/s00446-020-00380-5","year":"2021","_id":"7939","publication_identifier":{"issn":["0178-2770"],"eissn":["1432-0452"]},"user_id":"3E5EF7F0-F248-11E8-B48F-1D18A9856A87","acknowledgement":"Open access funding provided by Institute of Science and Technology (IST Austria). We thank Mohsen Ghaffari, Michael Elkin and Merav Parter for fruitful discussions. This project has received funding from the European Union’s Horizon 2020 Research And Innovation Program under Grant Agreement No. 755839.","oa_version":"Published Version","project":[{"name":"IST Austria Open Access Fund","_id":"B67AFEDC-15C9-11EA-A837-991A96BB2854"}],"quality_controlled":"1","arxiv":1,"oa":1,"volume":34,"date_updated":"2024-03-07T14:43:39Z","article_processing_charge":"Yes (via OA deal)","abstract":[{"lang":"eng","text":"We design fast deterministic algorithms for distance computation in the Congested Clique model. Our key contributions include:\r\n    A (2+ϵ)-approximation for all-pairs shortest paths in O(log2n/ϵ) rounds on unweighted undirected graphs. With a small additional additive factor, this also applies for weighted graphs. This is the first sub-polynomial constant-factor approximation for APSP in this model.\r\n    A (1+ϵ)-approximation for multi-source shortest paths from O(n−−√) sources in O(log2n/ϵ) rounds on weighted undirected graphs. This is the first sub-polynomial algorithm obtaining this approximation for a set of sources of polynomial size.\r\n\r\nOur main techniques are new distance tools that are obtained via improved algorithms for sparse matrix multiplication, which we leverage to construct efficient hopsets and shortest paths. Furthermore, our techniques extend to additional distance problems for which we improve upon the state-of-the-art, including diameter approximation, and an exact single-source shortest paths algorithm for weighted undirected graphs in O~(n1/6) rounds. "}],"author":[{"last_name":"Censor-Hillel","full_name":"Censor-Hillel, Keren","first_name":"Keren"},{"first_name":"Michal","last_name":"Dory","full_name":"Dory, Michal"},{"id":"C5402D42-15BC-11E9-A202-CA2BE6697425","first_name":"Janne","full_name":"Korhonen, Janne","last_name":"Korhonen"},{"full_name":"Leitersdorf, Dean","last_name":"Leitersdorf","first_name":"Dean"}],"publication_status":"published","citation":{"short":"K. Censor-Hillel, M. Dory, J. Korhonen, D. Leitersdorf, Distributed Computing 34 (2021) 463–487.","ista":"Censor-Hillel K, Dory M, Korhonen J, Leitersdorf D. 2021. Fast approximate shortest paths in the congested clique. Distributed Computing. 34, 463–487.","mla":"Censor-Hillel, Keren, et al. “Fast Approximate Shortest Paths in the Congested Clique.” <i>Distributed Computing</i>, vol. 34, Springer Nature, 2021, pp. 463–87, doi:<a href=\"https://doi.org/10.1007/s00446-020-00380-5\">10.1007/s00446-020-00380-5</a>.","ama":"Censor-Hillel K, Dory M, Korhonen J, Leitersdorf D. Fast approximate shortest paths in the congested clique. <i>Distributed Computing</i>. 2021;34:463-487. doi:<a href=\"https://doi.org/10.1007/s00446-020-00380-5\">10.1007/s00446-020-00380-5</a>","chicago":"Censor-Hillel, Keren, Michal Dory, Janne Korhonen, and Dean Leitersdorf. “Fast Approximate Shortest Paths in the Congested Clique.” <i>Distributed Computing</i>. Springer Nature, 2021. <a href=\"https://doi.org/10.1007/s00446-020-00380-5\">https://doi.org/10.1007/s00446-020-00380-5</a>.","apa":"Censor-Hillel, K., Dory, M., Korhonen, J., &#38; Leitersdorf, D. (2021). Fast approximate shortest paths in the congested clique. <i>Distributed Computing</i>. Springer Nature. <a href=\"https://doi.org/10.1007/s00446-020-00380-5\">https://doi.org/10.1007/s00446-020-00380-5</a>","ieee":"K. Censor-Hillel, M. Dory, J. Korhonen, and D. Leitersdorf, “Fast approximate shortest paths in the congested clique,” <i>Distributed Computing</i>, vol. 34. Springer Nature, pp. 463–487, 2021."},"date_created":"2020-06-07T22:00:54Z","department":[{"_id":"DaAl"}],"publisher":"Springer Nature","scopus_import":"1","language":[{"iso":"eng"}],"month":"12","article_type":"original","date_published":"2021-12-01T00:00:00Z","publication":"Distributed Computing","page":"463-487","intvolume":"        34","status":"public","day":"01","type":"journal_article"},{"doi":"10.1007/s00453-021-00905-9","year":"2021","ec_funded":1,"title":"Dynamic averaging load balancing on cycles","external_id":{"arxiv":["2003.09297"],"isi":["000734004600001"]},"tmp":{"image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"isi":1,"related_material":{"link":[{"relation":"earlier_version","url":"https://doi.org/10.4230/LIPIcs.ICALP.2020.7"}],"record":[{"status":"public","id":"15077","relation":"earlier_version"}]},"ddc":["000"],"publication_status":"published","citation":{"ama":"Alistarh D-A, Nadiradze G, Sabour A. Dynamic averaging load balancing on cycles. <i>Algorithmica</i>. 2021. doi:<a href=\"https://doi.org/10.1007/s00453-021-00905-9\">10.1007/s00453-021-00905-9</a>","mla":"Alistarh, Dan-Adrian, et al. “Dynamic Averaging Load Balancing on Cycles.” <i>Algorithmica</i>, Springer Nature, 2021, doi:<a href=\"https://doi.org/10.1007/s00453-021-00905-9\">10.1007/s00453-021-00905-9</a>.","ista":"Alistarh D-A, Nadiradze G, Sabour A. 2021. Dynamic averaging load balancing on cycles. Algorithmica.","short":"D.-A. Alistarh, G. Nadiradze, A. Sabour, Algorithmica (2021).","ieee":"D.-A. Alistarh, G. Nadiradze, and A. Sabour, “Dynamic averaging load balancing on cycles,” <i>Algorithmica</i>. Springer Nature, 2021.","apa":"Alistarh, D.-A., Nadiradze, G., &#38; Sabour, A. (2021). Dynamic averaging load balancing on cycles. <i>Algorithmica</i>. Virtual, Online; Germany: Springer Nature. <a href=\"https://doi.org/10.1007/s00453-021-00905-9\">https://doi.org/10.1007/s00453-021-00905-9</a>","chicago":"Alistarh, Dan-Adrian, Giorgi Nadiradze, and Amirmojtaba Sabour. “Dynamic Averaging Load Balancing on Cycles.” <i>Algorithmica</i>. Springer Nature, 2021. <a href=\"https://doi.org/10.1007/s00453-021-00905-9\">https://doi.org/10.1007/s00453-021-00905-9</a>."},"abstract":[{"lang":"eng","text":"We consider the following dynamic load-balancing process: given an underlying graph G with n nodes, in each step t≥ 0, one unit of load is created, and placed at a randomly chosen graph node. In the same step, the chosen node picks a random neighbor, and the two nodes balance their loads by averaging them. We are interested in the expected gap between the minimum and maximum loads at nodes as the process progresses, and its dependence on n and on the graph structure. Variants of the above graphical balanced allocation process have been studied previously by Peres, Talwar, and Wieder [Peres et al., 2015], and by Sauerwald and Sun [Sauerwald and Sun, 2015]. These authors left as open the question of characterizing the gap in the case of cycle graphs in the dynamic case, where weights are created during the algorithm’s execution. For this case, the only known upper bound is of 𝒪(n log n), following from a majorization argument due to [Peres et al., 2015], which analyzes a related graphical allocation process. In this paper, we provide an upper bound of 𝒪 (√n log n) on the expected gap of the above process for cycles of length n. We introduce a new potential analysis technique, which enables us to bound the difference in load between k-hop neighbors on the cycle, for any k ≤ n/2. We complement this with a \"gap covering\" argument, which bounds the maximum value of the gap by bounding its value across all possible subsets of a certain structure, and recursively bounding the gaps within each subset. We provide analytical and experimental evidence that our upper bound on the gap is tight up to a logarithmic factor. "}],"author":[{"first_name":"Dan-Adrian","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"full_name":"Nadiradze, Giorgi","last_name":"Nadiradze","orcid":"0000-0001-5634-0731","first_name":"Giorgi","id":"3279A00C-F248-11E8-B48F-1D18A9856A87"},{"id":"bcc145fd-e77f-11ea-ae8b-80d661dbff67","full_name":"Sabour, Amirmojtaba","last_name":"Sabour","first_name":"Amirmojtaba"}],"arxiv":1,"date_updated":"2024-03-05T07:35:53Z","oa":1,"article_processing_charge":"Yes (via OA deal)","_id":"8286","publication_identifier":{"issn":["0178-4617"],"eissn":["1432-0541"]},"acknowledgement":"The authors sincerely thank Thomas Sauerwald and George Giakkoupis for insightful discussions, and Mohsen Ghaffari, Yuval Peres, and Udi Wieder for feedback on earlier versions of this draft. We also thank the ICALP anonymous reviewers for their very useful comments. Open access funding provided by Institute of Science and Technology (IST Austria). Funding was provided by European Research Council (Grant No. PR1042ERC01).","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020","grant_number":"805223"},{"name":"IST Austria Open Access Fund","_id":"B67AFEDC-15C9-11EA-A837-991A96BB2854"}],"quality_controlled":"1","oa_version":"Published Version","month":"12","article_type":"original","date_published":"2021-12-24T00:00:00Z","publisher":"Springer Nature","scopus_import":"1","language":[{"iso":"eng"}],"has_accepted_license":"1","department":[{"_id":"DaAl"}],"conference":{"start_date":"2020-07-08","name":"ICALP: International Colloquium on Automata, Languages, and Programming ","end_date":"2020-07-11","location":"Virtual, Online; Germany"},"date_created":"2020-08-24T06:24:04Z","file":[{"date_updated":"2021-12-27T10:36:40Z","access_level":"open_access","file_name":"2021_Algorithmica_Alistarh.pdf","file_size":525950,"date_created":"2021-12-27T10:36:40Z","checksum":"21169b25b0c8e17b21e12af22bff9870","content_type":"application/pdf","relation":"main_file","creator":"cchlebak","file_id":"10577","success":1}],"day":"24","type":"journal_article","status":"public","publication":"Algorithmica","file_date_updated":"2021-12-27T10:36:40Z"},{"status":"public","intvolume":"        32","type":"journal_article","day":"01","publication":"IEEE Transactions on Parallel and Distributed Systems","issue":"7","language":[{"iso":"eng"}],"scopus_import":"1","publisher":"IEEE","article_type":"original","date_published":"2021-07-01T00:00:00Z","month":"07","date_created":"2020-11-05T15:25:43Z","department":[{"_id":"DaAl"}],"author":[{"first_name":"Shigang","last_name":"Li","full_name":"Li, Shigang"},{"first_name":"Tal Ben-Nun","full_name":"Tal Ben-Nun, Tal Ben-Nun","last_name":"Tal Ben-Nun"},{"id":"3279A00C-F248-11E8-B48F-1D18A9856A87","first_name":"Giorgi","full_name":"Nadiradze, Giorgi","last_name":"Nadiradze"},{"last_name":"Girolamo","full_name":"Girolamo, Salvatore Di","first_name":"Salvatore Di"},{"full_name":"Dryden, Nikoli","last_name":"Dryden","first_name":"Nikoli"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X"},{"last_name":"Hoefler","full_name":"Hoefler, Torsten","first_name":"Torsten"}],"abstract":[{"lang":"eng","text":"Deep learning at scale is dominated by communication time. Distributing samples across nodes usually yields the best performance, but poses scaling challenges due to global information dissemination and load imbalance across uneven sample lengths. State-of-the-art decentralized optimizers mitigate the problem, but require more iterations to achieve the same accuracy as their globally-communicating counterparts. We present Wait-Avoiding Group Model Averaging (WAGMA) SGD, a wait-avoiding stochastic optimizer that reduces global communication via subgroup weight exchange. The key insight is a combination of algorithmic changes to the averaging scheme and the use of a group allreduce operation. We prove the convergence of WAGMA-SGD, and empirically show that it retains convergence rates similar to Allreduce-SGD. For evaluation, we train ResNet-50 on ImageNet; Transformer for machine translation; and deep reinforcement learning for navigation at scale. Compared with state-of-the-art decentralized SGD variants, WAGMA-SGD significantly improves training throughput (e.g., 2.1× on 1,024 GPUs for reinforcement learning), and achieves the fastest time-to-solution (e.g., the highest score using the shortest training time for Transformer)."}],"citation":{"short":"S. Li, T.B.-N. Tal Ben-Nun, G. Nadiradze, S.D. Girolamo, N. Dryden, D.-A. Alistarh, T. Hoefler, IEEE Transactions on Parallel and Distributed Systems 32 (2021).","ista":"Li S, Tal Ben-Nun TB-N, Nadiradze G, Girolamo SD, Dryden N, Alistarh D-A, Hoefler T. 2021. Breaking (global) barriers in parallel stochastic optimization with wait-avoiding group averaging. IEEE Transactions on Parallel and Distributed Systems. 32(7), 9271898.","ama":"Li S, Tal Ben-Nun TB-N, Nadiradze G, et al. Breaking (global) barriers in parallel stochastic optimization with wait-avoiding group averaging. <i>IEEE Transactions on Parallel and Distributed Systems</i>. 2021;32(7). doi:<a href=\"https://doi.org/10.1109/TPDS.2020.3040606\">10.1109/TPDS.2020.3040606</a>","mla":"Li, Shigang, et al. “Breaking (Global) Barriers in Parallel Stochastic Optimization with Wait-Avoiding Group Averaging.” <i>IEEE Transactions on Parallel and Distributed Systems</i>, vol. 32, no. 7, 9271898, IEEE, 2021, doi:<a href=\"https://doi.org/10.1109/TPDS.2020.3040606\">10.1109/TPDS.2020.3040606</a>.","chicago":"Li, Shigang, Tal Ben-Nun Tal Ben-Nun, Giorgi Nadiradze, Salvatore Di Girolamo, Nikoli Dryden, Dan-Adrian Alistarh, and Torsten Hoefler. “Breaking (Global) Barriers in Parallel Stochastic Optimization with Wait-Avoiding Group Averaging.” <i>IEEE Transactions on Parallel and Distributed Systems</i>. IEEE, 2021. <a href=\"https://doi.org/10.1109/TPDS.2020.3040606\">https://doi.org/10.1109/TPDS.2020.3040606</a>.","ieee":"S. Li <i>et al.</i>, “Breaking (global) barriers in parallel stochastic optimization with wait-avoiding group averaging,” <i>IEEE Transactions on Parallel and Distributed Systems</i>, vol. 32, no. 7. IEEE, 2021.","apa":"Li, S., Tal Ben-Nun, T. B.-N., Nadiradze, G., Girolamo, S. D., Dryden, N., Alistarh, D.-A., &#38; Hoefler, T. (2021). Breaking (global) barriers in parallel stochastic optimization with wait-avoiding group averaging. <i>IEEE Transactions on Parallel and Distributed Systems</i>. IEEE. <a href=\"https://doi.org/10.1109/TPDS.2020.3040606\">https://doi.org/10.1109/TPDS.2020.3040606</a>"},"publication_status":"published","project":[{"grant_number":"805223","call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning"}],"oa_version":"Preprint","quality_controlled":"1","acknowledgement":"This project has received funding from the European Research Council (ERC) under the European Union’s Hori-\r\nzon 2020 programme under Grant DAPP, Grant 678880; EPi-GRAM-HS, Grant 801039; and ERC Starting Grant ScaleML, Grant 805223. The work of Tal Ben-Nun is supported by the Swiss National Science Foundation (Ambizione Project No. 185778). The work of Nikoli Dryden is supported by the ETH Postdoctoral Fellowship. The authors would like to thank the Swiss National Supercomputing Center for providing the computing resources and technical support.","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","publication_identifier":{"issn":["10459219"]},"_id":"8723","article_processing_charge":"No","oa":1,"date_updated":"2023-08-04T11:08:52Z","volume":32,"arxiv":1,"title":"Breaking (global) barriers in parallel stochastic optimization with wait-avoiding group averaging","external_id":{"arxiv":["2005.00124"],"isi":["000621405200019"]},"ec_funded":1,"year":"2021","doi":"10.1109/TPDS.2020.3040606","article_number":"9271898","isi":1,"main_file_link":[{"url":"https://arxiv.org/abs/2005.00124","open_access":"1"}]}]
