@inproceedings{13147,
  abstract     = {We investigate fast and communication-efficient algorithms for the classic problem of minimizing a sum of strongly convex and smooth functions that are distributed among n
 different nodes, which can communicate using a limited number of bits. Most previous communication-efficient approaches for this problem are limited to first-order optimization, and therefore have \emph{linear} dependence on the condition number in their communication complexity. We show that this dependence is not inherent: communication-efficient methods can in fact have sublinear dependence on the condition number. For this, we design and analyze the first communication-efficient distributed variants of preconditioned gradient descent for Generalized Linear Models, and for Newton’s method. Our results rely on a new technique for quantizing both the preconditioner and the descent direction at each step of the algorithms, while controlling their convergence rate. We also validate our findings experimentally, showing faster convergence and reduced communication relative to previous methods.},
  author       = {Alimisis, Foivos and Davies, Peter and Alistarh, Dan-Adrian},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning},
  isbn         = {9781713845065},
  issn         = {2640-3498},
  location     = {Virtual},
  pages        = {196--206},
  publisher    = {ML Research Press},
  title        = {{Communication-efficient distributed optimization with quantized preconditioners}},
  volume       = {139},
  year         = {2021},
}

@article{9541,
  abstract     = {The Massively Parallel Computation (MPC) model is an emerging model that distills core aspects of distributed and parallel computation, developed as a tool to solve combinatorial (typically graph) problems in systems of many machines with limited space. Recent work has focused on the regime in which machines have sublinear (in n, the number of nodes in the input graph) space, with randomized algorithms presented for the fundamental problems of Maximal Matching and Maximal Independent Set. However, there have been no prior corresponding deterministic algorithms. A major challenge underlying the sublinear space setting is that the local space of each machine might be too small to store all edges incident to a single node. This poses a considerable obstacle compared to classical models in which each node is assumed to know and have easy access to its incident edges. To overcome this barrier, we introduce a new graph sparsification technique that deterministically computes a low-degree subgraph, with the additional property that solving the problem on this subgraph provides significant progress towards solving the problem for the original input graph. Using this framework to derandomize the well-known algorithm of Luby [SICOMP’86], we obtain O(log Δ + log log n)-round deterministic MPC algorithms for solving the problems of Maximal Matching and Maximal Independent Set with O(nɛ) space on each machine for any constant ɛ > 0. These algorithms also run in O(log Δ) rounds in the closely related model of CONGESTED CLIQUE, improving upon the state-of-the-art bound of O(log 2Δ) rounds by Censor-Hillel et al. [DISC’17].},
  author       = {Czumaj, Artur and Davies, Peter and Parter, Merav},
  issn         = {1549-6333},
  journal      = {ACM Transactions on Algorithms},
  number       = {2},
  publisher    = {Association for Computing Machinery},
  title        = {{Graph sparsification for derandomizing massively parallel computation with low space}},
  doi          = {10.1145/3451992},
  volume       = {17},
  year         = {2021},
}

@inproceedings{9543,
  abstract     = {We consider the problem ofdistributed mean estimation (DME), in which n machines are each given a local d-dimensional vector xv∈Rd, and must cooperate to estimate the mean of their inputs μ=1n∑nv=1xv, while minimizing total communication cost. DME is a fundamental construct in distributed machine learning, and there has been considerable work on variants of this problem, especially in the context of distributed variance reduction for stochastic gradients in parallel SGD. Previous work typically assumes an upper bound on the norm of the input vectors, and achieves an error bound in terms of this norm. However, in many real applications, the input vectors are concentrated around the correct output μ, but μ itself has large norm. In such cases, previous output error bounds perform poorly. In this paper, we show that output error bounds need not depend on input norm. We provide a method of quantization which allows distributed mean estimation to be performed with solution quality dependent only on the distance between inputs, not on input norm, and show an analogous result for distributed variance reduction. The technique is based on a new connection with lattice theory. We also provide lower bounds showing that the communication to error trade-off of our algorithms is asymptotically optimal. As the lattices achieving optimal bounds under l2-norm can be computationally impractical, we also present an extension which leverages easy-to-use cubic lattices, and is loose only up to a logarithmic factor ind. We show experimentally that our method yields practical improvements for common applications, relative to prior approaches.},
  author       = {Davies, Peter and Gurunanthan, Vijaykrishna and Moshrefi, Niusha  and Ashkboos, Saleh and Alistarh, Dan-Adrian},
  booktitle    = {9th International Conference on Learning Representations},
  location     = {Virtual},
  title        = {{New bounds for distributed mean estimation and variance reduction}},
  year         = {2021},
}

@article{9571,
  abstract     = {As the size and complexity of models and datasets grow, so does the need for communication-efficient variants of stochastic gradient descent that can be deployed to perform parallel model training. One popular communication-compression method for data-parallel SGD is QSGD (Alistarh et al., 2017), which quantizes and encodes gradients to reduce communication costs. The baseline variant of QSGD provides strong theoretical guarantees, however, for practical purposes, the authors proposed a heuristic variant which we call QSGDinf, which demonstrated impressive empirical gains for distributed training of large neural networks. In this paper, we build on this work to propose a new gradient quantization scheme, and show that it has both stronger theoretical guarantees than QSGD, and matches and exceeds the empirical performance of the QSGDinf heuristic and of other compression methods.},
  author       = {Ramezani-Kebrya, Ali and Faghri, Fartash and Markov, Ilya and Aksenov, Vitalii and Alistarh, Dan-Adrian and Roy, Daniel M.},
  issn         = {15337928},
  journal      = {Journal of Machine Learning Research},
  number       = {114},
  pages        = {1−43},
  publisher    = {Journal of Machine Learning Research},
  title        = {{NUQSGD: Provably communication-efficient data-parallel SGD via nonuniform quantization}},
  volume       = {22},
  year         = {2021},
}

@inproceedings{9620,
  abstract     = {In this note, we introduce a distributed twist on the classic coupon collector problem: a set of m collectors wish to each obtain a set of n coupons; for this, they can each sample coupons uniformly at random, but can also meet in pairwise interactions, during which they can exchange coupons. By doing so, they hope to reduce the number of coupons that must be sampled by each collector in order to obtain a full set. This extension is natural when considering real-world manifestations of the coupon collector phenomenon, and has been remarked upon and studied empirically (Hayes and Hannigan 2006, Ahmad et al. 2014, Delmarcelle 2019).

We provide the first theoretical analysis for such a scenario. We find that “coupon collecting with friends” can indeed significantly reduce the number of coupons each collector must sample, and raises interesting connections to the more traditional variants of the problem. While our analysis is in most cases asymptotically tight, there are several open questions raised, regarding finer-grained analysis of both “coupon collecting with friends,” and of a long-studied variant of the original problem in which a collector requires multiple full sets of coupons.},
  author       = {Alistarh, Dan-Adrian and Davies, Peter},
  booktitle    = {Structural Information and Communication Complexity},
  isbn         = {9783030795269},
  issn         = {1611-3349},
  location     = {Wrocław, Poland},
  pages        = {3--12},
  publisher    = {Springer Nature},
  title        = {{Collecting coupons is faster with friends}},
  doi          = {10.1007/978-3-030-79527-6_1},
  volume       = {12810},
  year         = {2021},
}

@inproceedings{9678,
  abstract     = {We introduce a new graph problem, the token dropping game, and we show how to solve it efficiently in a distributed setting. We use the token dropping game as a tool to design an efficient distributed algorithm for stable orientations and more generally for locally optimal semi-matchings. The prior work by Czygrinow et al. (DISC 2012) finds a stable orientation in O(Δ^5) rounds in graphs of maximum degree Δ, while we improve it to O(Δ^4) and also prove a lower bound of Ω(Δ). For the more general problem of locally optimal semi-matchings, the prior upper bound is O(S^5) and our new algorithm runs in O(C · S^4) rounds, which is an improvement for C = o(S); here C and S are the maximum degrees of customers and servers, respectively.},
  author       = {Brandt, Sebastian and Keller, Barbara and Rybicki, Joel and Suomela, Jukka and Uitto, Jara},
  booktitle    = {Annual ACM Symposium on Parallelism in Algorithms and Architectures},
  isbn         = {9781450380706},
  location     = { Virtual Event, United States},
  pages        = {129--139},
  title        = {{Efficient load-balancing through distributed token dropping}},
  doi          = {10.1145/3409964.3461785},
  year         = {2021},
}

@inproceedings{10049,
  abstract     = {While messaging systems with strong security guarantees are widely used in practice, designing a protocol that scales efficiently to large groups and enjoys similar security guarantees remains largely open. The two existing proposals to date are ART (Cohn-Gordon et al., CCS18) and TreeKEM (IETF, The Messaging Layer Security Protocol, draft). TreeKEM is the currently considered candidate by the IETF MLS working group, but dynamic group operations (i.e. adding and removing users) can cause efficiency issues. In this paper we formalize and analyze a variant of TreeKEM which we term Tainted TreeKEM (TTKEM for short). The basic idea underlying TTKEM was suggested by Millican (MLS mailing list, February 2018). This version is more efficient than TreeKEM for some natural distributions of group operations, we quantify this through simulations.Our second contribution is two security proofs for TTKEM which establish post compromise and forward secrecy even against adaptive attackers. The security loss (to the underlying PKE) in the Random Oracle Model is a polynomial factor, and a quasipolynomial one in the Standard Model. Our proofs can be adapted to TreeKEM as well. Before our work no security proof for any TreeKEM-like protocol establishing tight security against an adversary who can adaptively choose the sequence of operations was known. We also are the first to prove (or even formalize) active security where the server can arbitrarily deviate from the protocol specification. Proving fully active security – where also the users can arbitrarily deviate – remains open.},
  author       = {Klein, Karen and Pascual Perez, Guillermo and Walter, Michael and Kamath Hosdurg, Chethan and Capretto, Margarita and Cueto Noval, Miguel and Markov, Ilia and Yeo, Michelle X and Alwen, Joel F and Pietrzak, Krzysztof Z},
  booktitle    = {2021 IEEE Symposium on Security and Privacy },
  location     = {San Francisco, CA, United States},
  pages        = {268--284},
  publisher    = {IEEE},
  title        = {{Keep the dirt: tainted TreeKEM, adaptively and actively secure continuous group key agreement}},
  doi          = {10.1109/sp40001.2021.00035},
  year         = {2021},
}

@article{10180,
  abstract     = {The growing energy and performance costs of deep learning have driven the community to reduce the size of neural networks by selectively pruning components. Similarly to their biological counterparts, sparse networks generalize just as well, sometimes even better than, the original dense networks. Sparsity promises to reduce the memory footprint of regular networks to fit mobile devices, as well as shorten training time for ever growing networks. In this paper, we survey prior work on sparsity in deep learning and provide an extensive tutorial of sparsification for both inference and training. We describe approaches to remove and add elements of neural networks, different training strategies to achieve model sparsity, and mechanisms to exploit sparsity in practice. Our work distills ideas from more than 300 research papers and provides guidance to practitioners who wish to utilize sparsity today, as well as to researchers whose goal is to push the frontier forward. We include the necessary background on mathematical methods in sparsification, describe phenomena such as early structure adaptation, the intricate relations between sparsity and the training process, and show techniques for achieving acceleration on real hardware. We also define a metric of pruned parameter efficiency that could serve as a baseline for comparison of different sparse networks. We close by speculating on how sparsity can improve future workloads and outline major open problems in the field.},
  author       = {Hoefler, Torsten and Alistarh, Dan-Adrian and Ben-Nun, Tal and Dryden, Nikoli and Peste, Elena-Alexandra},
  issn         = {1533-7928},
  journal      = {Journal of Machine Learning Research},
  number       = {241},
  pages        = {1--124},
  publisher    = {Journal of Machine Learning Research},
  title        = {{Sparsity in deep learning: Pruning and growth for efficient inference and training in neural networks}},
  volume       = {22},
  year         = {2021},
}

@inproceedings{10216,
  abstract     = {This paper reports a new concurrent graph data structure that supports updates of both edges and vertices and queries: Breadth-first search, Single-source shortest-path, and Betweenness centrality. The operations are provably linearizable and non-blocking.},
  author       = {Chatterjee, Bapi and Peri, Sathya and Sa, Muktikanta},
  booktitle    = {35th International Symposium on Distributed Computing},
  isbn         = {9-783-9597-7210-5},
  issn         = {1868-8969},
  location     = {Freiburg, Germany},
  publisher    = {Schloss Dagstuhl - Leibniz Zentrum für Informatik},
  title        = {{Brief announcement: Non-blocking dynamic unbounded graphs with worst-case amortized bounds}},
  doi          = {10.4230/LIPIcs.DISC.2021.52},
  volume       = {209},
  year         = {2021},
}

@inproceedings{10217,
  abstract     = {This paper gives tight logarithmic lower bounds on the solo step complexity of leader election in an asynchronous shared-memory model with single-writer multi-reader (SWMR) registers, for both deterministic and randomized obstruction-free algorithms. The approach extends to lower bounds for deterministic and randomized obstruction-free algorithms using multi-writer registers under bounded write concurrency, showing a trade-off between the solo step complexity of a leader election algorithm, and the worst-case number of stalls incurred by a processor in an execution.},
  author       = {Alistarh, Dan-Adrian and Gelashvili, Rati and Nadiradze, Giorgi},
  booktitle    = {35th International Symposium on Distributed Computing},
  isbn         = {9-783-9597-7210-5},
  issn         = {1868-8969},
  location     = {Freiburg, Germany},
  publisher    = {Schloss Dagstuhl - Leibniz Zentrum für Informatik},
  title        = {{Lower bounds for shared-memory leader election under bounded write contention}},
  doi          = {10.4230/LIPIcs.DISC.2021.4},
  volume       = {209},
  year         = {2021},
}

@inproceedings{10218,
  abstract     = {Let G be a graph on n nodes. In the stochastic population protocol model, a collection of n indistinguishable, resource-limited nodes collectively solve tasks via pairwise interactions. In each interaction, two randomly chosen neighbors first read each other’s states, and then update their local states. A rich line of research has established tight upper and lower bounds on the complexity of fundamental tasks, such as majority and leader election, in this model, when G is a clique. Specifically, in the clique, these tasks can be solved fast, i.e., in n polylog n pairwise interactions, with high probability, using at most polylog n states per node. In this work, we consider the more general setting where G is an arbitrary graph, and present a technique for simulating protocols designed for fully-connected networks in any connected regular graph. Our main result is a simulation that is efficient on many interesting graph families: roughly, the simulation overhead is polylogarithmic in the number of nodes, and quadratic in the conductance of the graph. As an example, this implies that, in any regular graph with conductance φ, both leader election and exact majority can be solved in φ^{-2} ⋅ n polylog n pairwise interactions, with high probability, using at most φ^{-2} ⋅ polylog n states per node. This shows that there are fast and space-efficient population protocols for leader election and exact majority on graphs with good expansion properties.},
  author       = {Alistarh, Dan-Adrian and Gelashvili, Rati and Rybicki, Joel},
  booktitle    = {35th International Symposium on Distributed Computing},
  isbn         = {9-783-9597-7210-5},
  issn         = {1868-8969},
  location     = {Freiburg, Germany},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Brief announcement: Fast graphical population protocols}},
  doi          = {10.4230/LIPIcs.DISC.2021.43},
  volume       = {209},
  year         = {2021},
}

@inproceedings{10219,
  abstract     = {We show that any algorithm that solves the sinkless orientation problem in the supported LOCAL model requires Ω(log n) rounds, and this is tight. The supported LOCAL is at least as strong as the usual LOCAL model, and as a corollary this also gives a new, short and elementary proof that shows that the round complexity of the sinkless orientation problem in the deterministic LOCAL model is Ω(log n).},
  author       = {Korhonen, Janne and Paz, Ami and Rybicki, Joel and Schmid, Stefan and Suomela, Jukka},
  booktitle    = {35th International Symposium on Distributed Computing},
  isbn         = {9-783-9597-7210-5},
  issn         = {1868-8969},
  location     = {Freiburg, Germany},
  publisher    = {Schloss Dagstuhl - Leibniz Zentrum für Informatik},
  title        = {{Brief announcement: Sinkless orientation is hard also in the supported LOCAL model}},
  doi          = {10.4230/LIPIcs.DISC.2021.58},
  volume       = {209},
  year         = {2021},
}

@phdthesis{10429,
  abstract     = {The scalability of concurrent data structures and distributed algorithms strongly depends on
reducing the contention for shared resources and the costs of synchronization and communication. We show how such cost reductions can be attained by relaxing the strict consistency conditions required by sequential implementations. In the first part of the thesis, we consider relaxation in the context of concurrent data structures. Specifically, in data structures 
such as priority queues, imposing strong semantics renders scalability impossible, since a correct implementation of the remove operation should return only the element with highest priority. Intuitively, attempting to invoke remove operations concurrently  creates a race condition. This bottleneck  can be circumvented by relaxing semantics of the affected data structure, thus allowing removal of the elements which are no longer required to have the highest priority. We prove that the randomized implementations of relaxed data structures provide provable guarantees on the priority of the removed elements even under concurrency. Additionally, we show that in some cases the relaxed data structures can be used to scale the classical algorithms which are usually implemented with the exact ones. In the second part, we study parallel variants of the  stochastic gradient descent (SGD) algorithm, which distribute computation  among the multiple processors, thus reducing the running time. Unfortunately, in order for standard parallel SGD to succeed, each processor has to maintain a local copy of the necessary model parameter, which is identical to the local copies of other processors; the overheads from this perfect consistency in terms of communication and synchronization can negate the speedup gained by distributing the computation. We show that the consistency conditions required by SGD can be  relaxed, allowing the algorithm to be more flexible in terms of tolerating quantized communication, asynchrony, or even crash faults, while its convergence remains asymptotically the same.},
  author       = {Nadiradze, Giorgi},
  issn         = {2663-337X},
  pages        = {132},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{On achieving scalability through relaxation}},
  doi          = {10.15479/at:ista:10429},
  year         = {2021},
}

@inproceedings{10432,
  abstract     = {One key element behind the recent progress of machine learning has been the ability to train machine learning models in large-scale distributed shared-memory and message-passing environments. Most of these models are trained employing variants of stochastic gradient descent (SGD) based optimization, but most methods involve some type of consistency relaxation relative to sequential SGD, to mitigate its large communication or synchronization costs at scale. In this paper, we introduce a general consistency condition covering communication-reduced and asynchronous distributed SGD implementations. Our framework, called elastic consistency, decouples the system-specific aspects of the implementation from the SGD convergence requirements, giving a general way to obtain convergence bounds for a wide variety of distributed SGD methods used in practice. Elastic consistency can be used to re-derive or improve several previous convergence bounds in message-passing and shared-memory settings, but also to analyze new models and distribution schemes. As a direct application, we propose and analyze a new synchronization-avoiding scheduling scheme for distributed SGD, and show that it can be used to efficiently train deep convolutional models for image classification.},
  author       = {Nadiradze, Giorgi and Markov, Ilia and Chatterjee, Bapi and Kungurtsev, Vyacheslav  and Alistarh, Dan-Adrian},
  booktitle    = {Proceedings of the AAAI Conference on Artificial Intelligence},
  location     = {Virtual},
  number       = {10},
  pages        = {9037--9045},
  title        = {{Elastic consistency: A practical consistency model for distributed stochastic gradient descent}},
  volume       = {35},
  year         = {2021},
}

@inproceedings{10435,
  abstract     = {Decentralized optimization is emerging as a viable alternative for scalable distributed machine learning, but also introduces new challenges in terms of synchronization costs. To this end, several communication-reduction techniques, such as non-blocking communication, quantization, and local steps, have been explored in the decentralized setting. Due to the complexity of analyzing optimization in such a relaxed setting, this line of work often assumes \emph{global} communication rounds, which require additional synchronization. In this paper, we consider decentralized optimization in the simpler, but harder to analyze, \emph{asynchronous gossip} model, in which communication occurs in discrete, randomly chosen pairings among nodes. Perhaps surprisingly, we show that a variant of SGD called \emph{SwarmSGD} still converges in this setting, even if \emph{non-blocking communication}, \emph{quantization}, and \emph{local steps} are all applied \emph{in conjunction}, and even if the node data distributions and underlying graph topology are both \emph{heterogenous}. Our analysis is based on a new connection with multi-dimensional load-balancing processes. We implement this algorithm and deploy it in a super-computing environment, showing that it can outperform previous decentralized methods in terms of end-to-end training time, and that it can even rival carefully-tuned large-batch SGD for certain tasks.},
  author       = {Nadiradze, Giorgi and Sabour, Amirmojtaba and Davies, Peter and Li, Shigang and Alistarh, Dan-Adrian},
  booktitle    = {35th Conference on Neural Information Processing Systems},
  location     = {Sydney, Australia},
  publisher    = {Neural Information Processing Systems Foundation},
  title        = {{Asynchronous decentralized SGD with quantized and local updates}},
  year         = {2021},
}

@inproceedings{9823,
  abstract     = {Approximate agreement is one of the few variants of consensus that can be solved in a wait-free manner in asynchronous systems where processes communicate by reading and writing to shared memory. In this work, we consider a natural generalisation of approximate agreement on arbitrary undirected connected graphs. Each process is given a vertex of the graph as input and, if non-faulty, must output a vertex such that
all the outputs are within distance 1 of one another, and

each output value lies on a shortest path between two input values.

From prior work, it is known that there is no wait-free algorithm among   𝑛≥3  processes for this problem on any cycle of length   𝑐≥4 , by reduction from 2-set agreement (Castañeda et al. 2018).

In this work, we investigate the solvability and complexity of this task on general graphs. We give a new, direct proof of the impossibility of approximate agreement on cycles of length   𝑐≥4 , via a generalisation of Sperner’s Lemma to convex polygons. We also extend the reduction from 2-set agreement to a larger class of graphs, showing that approximate agreement on these graphs is unsolvable. On the positive side, we present a wait-free algorithm for a class of graphs that properly contains the class of chordal graphs.},
  author       = {Alistarh, Dan-Adrian and Ellen, Faith and Rybicki, Joel},
  booktitle    = {Structural Information and Communication Complexity},
  isbn         = {9783030795269},
  issn         = {16113349},
  location     = {Wrocław, Poland},
  pages        = {87--105},
  publisher    = {Springer Nature},
  title        = {{Wait-free approximate agreement on graphs}},
  doi          = {10.1007/978-3-030-79527-6_6},
  volume       = {12810},
  year         = {2021},
}

@article{9827,
  abstract     = {The Nearest neighbour search (NNS) is a fundamental problem in many application domains dealing with multidimensional data. In a concurrent setting, where dynamic modifications are allowed, a linearizable implementation of the NNS is highly desirable.This paper introduces the LockFree-kD-tree (LFkD-tree ): a lock-free concurrent kD-tree, which implements an abstract data type (ADT) that provides the operations Add, Remove, Contains, and NNS. Our implementation is linearizable. The operations in the LFkD-tree use single-word read and compare-and-swap (Image 1 ) atomic primitives, which are readily supported on available multi-core processors. We experimentally evaluate the LFkD-tree using several benchmarks comprising real-world and synthetic datasets. The experiments show that the presented design is scalable and achieves significant speed-up compared to the implementations of an existing sequential kD-tree and a recently proposed multidimensional indexing structure, PH-tree.},
  author       = {Chatterjee, Bapi and Walulya, Ivan and Tsigas, Philippas},
  issn         = {0304-3975},
  journal      = {Theoretical Computer Science},
  keywords     = {Concurrent data structure, kD-tree, Nearest neighbor search, Similarity search, Lock-free, Linearizability},
  pages        = {27--48},
  publisher    = {Elsevier},
  title        = {{Concurrent linearizable nearest neighbour search in LockFree-kD-tree}},
  doi          = {10.1016/j.tcs.2021.06.041},
  volume       = {886},
  year         = {2021},
}

@inproceedings{9933,
  abstract     = {In this paper, we study the power and limitations of component-stable algorithms in the low-space model of Massively Parallel Computation (MPC). Recently Ghaffari, Kuhn and Uitto (FOCS 2019) introduced the class of component-stable low-space MPC algorithms, which are, informally, defined as algorithms for which the outputs reported by the nodes in different connected components are required to be independent. This very natural notion was introduced to capture most (if not all) of the known efficient MPC algorithms to date, and it was the first general class of MPC algorithms for which one can show non-trivial conditional lower bounds. In this paper we enhance the framework of component-stable algorithms and investigate its effect on the complexity of randomized and deterministic low-space MPC. Our key contributions include: 1) We revise and formalize the lifting approach of Ghaffari, Kuhn and Uitto. This requires a very delicate amendment of the notion of component stability, which allows us to fill in gaps in the earlier arguments. 2) We also extend the framework to obtain conditional lower bounds for deterministic algorithms and fine-grained lower bounds that depend on the maximum degree Δ. 3) We demonstrate a collection of natural graph problems for which non-component-stable algorithms break the conditional lower bound obtained for component-stable algorithms. This implies that, for both deterministic and randomized algorithms, component-stable algorithms are conditionally weaker than the non-component-stable ones.

Altogether our results imply that component-stability might limit the computational power of the low-space MPC model, paving the way for improved upper bounds that escape the conditional lower bound setting of Ghaffari, Kuhn, and Uitto.},
  author       = {Czumaj, Artur and Davies, Peter and Parter, Merav},
  booktitle    = {Proceedings of the 2021 ACM Symposium on Principles of Distributed Computing},
  isbn         = {9781450385480},
  location     = {Virtual, Italy},
  pages        = {481–491},
  publisher    = {Association for Computing Machinery},
  title        = {{Component stability in low-space massively parallel computation}},
  doi          = {10.1145/3465084.3467903},
  year         = {2021},
}

@inproceedings{9935,
  abstract     = {We present a deterministic O(log log log n)-round low-space Massively Parallel Computation (MPC) algorithm for the classical problem of (Δ+1)-coloring on n-vertex graphs. In this model, every machine has sublinear local space of size n^φ for any arbitrary constant φ \in (0,1). Our algorithm works under the relaxed setting where each machine is allowed to perform exponential local computations, while respecting the n^φ space and bandwidth limitations.

Our key technical contribution is a novel derandomization of the ingenious (Δ+1)-coloring local algorithm by Chang-Li-Pettie (STOC 2018, SIAM J. Comput. 2020). The Chang-Li-Pettie algorithm runs in T_local =poly(loglog n) rounds, which sets the state-of-the-art randomized round complexity for the problem in the local model. Our derandomization employs a combination of tools, notably pseudorandom generators (PRG) and bounded-independence hash functions.

The achieved round complexity of O(logloglog n) rounds matches the bound of log(T_local ), which currently serves an upper bound barrier for all known randomized algorithms for locally-checkable problems in this model. Furthermore, no deterministic sublogarithmic low-space MPC algorithms for the (Δ+1)-coloring problem have been known before.},
  author       = {Czumaj, Artur and Davies, Peter and Parter, Merav},
  booktitle    = {Proceedings of the 2021 ACM Symposium on Principles of Distributed Computing},
  isbn         = {978-1-4503-8548-0},
  location     = {Virtual, Italy},
  pages        = {469–479},
  publisher    = {Association for Computing Machinery},
  title        = {{Improved deterministic (Δ+1) coloring in low-space MPC}},
  doi          = {10.1145/3465084.3467937},
  year         = {2021},
}

@inproceedings{9951,
  abstract     = {There has recently been a surge of interest in the computational and complexity properties of the population model, which assumes n anonymous, computationally-bounded nodes, interacting at random, with the goal of jointly computing global predicates. Significant work has gone towards investigating majority or consensus dynamics in this model: that is, assuming that every node is initially in one of two states X or Y, determine which state had higher initial count.

In this paper, we consider a natural generalization of majority/consensus, which we call comparison : in its simplest formulation, we are given two baseline states, X and Y, present in any initial configuration in fixed, but possibly small counts. One of these states has higher count than the other: we will assume |X_0| > C |Y_0| for some constant C > 1. The challenge is to design a protocol by which nodes can quickly and reliably decide on which of the baseline states X_0 and Y_0 has higher initial count. We begin by analyzing a simple and general dynamics solving the above comparison problem, which uses O( log n ) states per node, and converges in O(log n) (parallel) time, with high probability, to a state where the whole population votes on opinions X or Y at rates proportional to the initial concentrations of |X_0| vs. |Y_0|. We then describe how this procedure can be bootstrapped to solve comparison, i.e. have every node in the population reach the "correct'' decision, with probability 1 - o(1), at the cost of O (log log n) additional states. Further, we prove that this dynamics is self-stabilizing, in the sense that it converges to the correct decision from arbitrary initial states, and leak-robust, in the sense that it can withstand spurious faulty reactions, which are known to occur in practical implementations of population protocols. Our analysis is based on a new martingale concentration result relating the discrete-time evolution of a population protocol to its expected (steady-state) analysis, which should be a useful tool when analyzing opinion dynamics and epidemic dissemination in the population model.},
  author       = {Alistarh, Dan-Adrian and Töpfer, Martin and Uznański, Przemysław},
  booktitle    = {Proceedings of the 2021 ACM Symposium on Principles of Distributed Computing},
  isbn         = {9781450385480},
  location     = {Virtual, Italy},
  pages        = {55--65},
  publisher    = {Association for Computing Machinery},
  title        = {{Comparison dynamics in population protocols}},
  doi          = {10.1145/3465084.3467915},
  year         = {2021},
}