@article{19969,
  abstract     = {In the stochastic population protocol model, we are given a connected graph with n nodes, and in every time step, a scheduler samples an edge of the graph uniformly at random and the nodes connected by this edge interact. A fundamental task in this model is stable leader election, in which all nodes start in an identical state and the aim is to reach a configuration in which (1)
exactly one node is elected as leader and (2) this node remains as the unique leader no matter what sequence of interactions follows. On cliques, the complexity of this problem has recently been settled: time-optimal protocols stabilize in (n log n) expected steps using (log log n) states, whereas protocols that use O(1) states require (n2) expected steps. In this work, we investigate the complexity of stable leader election on graphs. We provide the first non-trivial time lower bounds on general graphs, showing that, when moving beyond cliques, the complexity of stable leader election can range from O(1) to (n3) expected steps. We describe a protocol that is time-optimal on many graph families, but uses polynomially-many states. In contrast, we give a near-time-optimal protocol that uses only O(log2 n) states that is at most a factor O(log n) slower. Finally, we observe that for many graphs the constant-state protocol of Beauquier et al. [OPODIS 2013] is at most a factor O(n log n) slower than the fast polynomial-state protocol, and among constant-state protocols, this protocol has near-optimal average case complexity on dense random graphs.},
  author       = {Alistarh, Dan-Adrian and Rybicki, Joel and Voitovych, Sasha},
  issn         = {1432-0452},
  journal      = {Distributed Computing},
  pages        = {207--245},
  publisher    = {Springer Nature},
  title        = {{Near-optimal leader election in population protocols on graphs}},
  doi          = {10.1007/s00446-025-00487-7},
  volume       = {38},
  year         = {2025},
}

@article{12164,
  abstract     = {A shared-memory counter is a widely-used and well-studied concurrent object. It supports two operations: An Inc operation that increases its value by 1 and a Read operation that returns its current value. In Jayanti et al (SIAM J Comput, 30(2), 2000), Jayanti, Tan and Toueg proved a linear lower bound on the worst-case step complexity of obstruction-free implementations, from read-write registers, of a large class of shared objects that includes counters. The lower bound leaves open the question of finding counter implementations with sub-linear amortized step complexity. In this work, we address this gap. We show that n-process, wait-free and linearizable counters can be implemented from read-write registers with O(log2n) amortized step complexity. This is the first counter algorithm from read-write registers that provides sub-linear amortized step complexity in executions of arbitrary length. Since a logarithmic lower bound on the amortized step complexity of obstruction-free counter implementations exists, our upper bound is within a logarithmic factor of the optimal. The worst-case step complexity of the construction remains linear, which is optimal. This is obtained thanks to a new max register construction with O(logn) amortized step complexity in executions of arbitrary length in which the value stored in the register does not grow too quickly. We then leverage an existing counter algorithm by Aspnes, Attiya and Censor-Hillel [1] in which we “plug” our max register implementation to show that it remains linearizable while achieving O(log2n) amortized step complexity.},
  author       = {Baig, Mirza Ahad and Hendler, Danny and Milani, Alessia and Travers, Corentin},
  issn         = {1432-0452},
  journal      = {Distributed Computing},
  keywords     = {Computational Theory and Mathematics, Computer Networks and Communications, Hardware and Architecture, Theoretical Computer Science},
  pages        = {29--43},
  publisher    = {Springer Nature},
  title        = {{Long-lived counters with polylogarithmic amortized step complexity}},
  doi          = {10.1007/s00446-022-00439-5},
  volume       = {36},
  year         = {2023},
}

@article{12330,
  abstract     = {The design and implementation of efficient concurrent data structures has seen significant attention. However, most of this work has focused on concurrent data structures providing good worst-case guarantees, although, in real workloads, objects are often accessed at different rates. Efficient distribution-adaptive data structures, such as splay-trees, are known in the sequential case; however, they often are hard to translate efficiently to the concurrent case. We investigate distribution-adaptive concurrent data structures, and propose a new design called the splay-list. At a high level, the splay-list is similar to a standard skip-list, with the key distinction that the height of each element adapts dynamically to its access rate: popular elements “move up,” whereas rarely-accessed elements decrease in height. We show that the splay-list provides order-optimal amortized complexity bounds for a subset of operations, while being amenable to efficient concurrent implementation. Experiments show that the splay-list can leverage distribution-adaptivity for performance, and can outperform the only previously-known distribution-adaptive concurrent design in certain workloads.},
  author       = {Aksenov, Vitalii and Alistarh, Dan-Adrian and Drozdova, Alexandra and Mohtashami, Amirkeivan},
  issn         = {1432-0452},
  journal      = {Distributed Computing},
  pages        = {395--418},
  publisher    = {Springer Nature},
  title        = {{The splay-list: A distribution-adaptive concurrent skip-list}},
  doi          = {10.1007/s00446-022-00441-x},
  volume       = {36},
  year         = {2023},
}

@article{7939,
  abstract     = {We design fast deterministic algorithms for distance computation in the Congested Clique model. Our key contributions include:
    A (2+ϵ)-approximation for all-pairs shortest paths in O(log2n/ϵ) rounds on unweighted undirected graphs. With a small additional additive factor, this also applies for weighted graphs. This is the first sub-polynomial constant-factor approximation for APSP in this model.
    A (1+ϵ)-approximation for multi-source shortest paths from O(n−−√) sources in O(log2n/ϵ) rounds on weighted undirected graphs. This is the first sub-polynomial algorithm obtaining this approximation for a set of sources of polynomial size.

Our main techniques are new distance tools that are obtained via improved algorithms for sparse matrix multiplication, which we leverage to construct efficient hopsets and shortest paths. Furthermore, our techniques extend to additional distance problems for which we improve upon the state-of-the-art, including diameter approximation, and an exact single-source shortest paths algorithm for weighted undirected graphs in O~(n1/6) rounds. },
  author       = {Censor-Hillel, Keren and Dory, Michal and Korhonen, Janne and Leitersdorf, Dean},
  issn         = {1432-0452},
  journal      = {Distributed Computing},
  pages        = {463--487},
  publisher    = {Springer Nature},
  title        = {{Fast approximate shortest paths in the congested clique}},
  doi          = {10.1007/s00446-020-00380-5},
  volume       = {34},
  year         = {2021},
}

@article{7150,
  abstract     = {In this work, we use algebraic methods for studying distance computation and subgraph detection tasks in the congested clique model. Specifically, we adapt parallel matrix multiplication implementations to the congested clique, obtaining an O(n1−2/ω) round matrix multiplication algorithm, where ω<2.3728639 is the exponent of matrix multiplication. In conjunction with known techniques from centralised algorithmics, this gives significant improvements over previous best upper bounds in the congested clique model. The highlight results include:

1.    triangle and 4-cycle counting in O(n0.158) rounds, improving upon the O(n1/3) algorithm of Dolev et al. [DISC 2012],
2. a (1+o(1))-approximation of all-pairs shortest paths in O(n0.158) rounds, improving upon the O~(n1/2)-round (2+o(1))-approximation algorithm given by Nanongkai [STOC 2014], and
 3. computing the girth in O(n0.158) rounds, which is the first non-trivial solution in this model.
   
In addition, we present a novel constant-round combinatorial algorithm for detecting 4-cycles.},
  author       = {Censor-Hillel, Keren and Kaski, Petteri and Korhonen, Janne and Lenzen, Christoph and Paz, Ami and Suomela, Jukka},
  issn         = {0178-2770},
  journal      = {Distributed Computing},
  number       = {6},
  pages        = {461--478},
  publisher    = {Springer Nature},
  title        = {{Algebraic methods in the congested clique}},
  doi          = {10.1007/s00446-016-0270-2},
  volume       = {32},
  year         = {2019},
}

@article{536,
  abstract     = {We consider the problem of consensus in the challenging classic model. In this model, the adversary is adaptive; it can choose which processors crash at any point during the course of the algorithm. Further, communication is via asynchronous message passing: there is no known upper bound on the time to send a message from one processor to another, and all messages and coin flips are seen by the adversary. We describe a new randomized consensus protocol with expected message complexity O(n2log2n) when fewer than n / 2 processes may fail by crashing. This is an almost-linear improvement over the best previously known protocol, and within logarithmic factors of a known Ω(n2) message lower bound. The protocol further ensures that no process sends more than O(nlog3n) messages in expectation, which is again within logarithmic factors of optimal. We also present a generalization of the algorithm to an arbitrary number of failures t, which uses expected O(nt+t2log2t) total messages. Our approach is to build a message-efficient, resilient mechanism for aggregating individual processor votes, implementing the message-passing equivalent of a weak shared coin. Roughly, in our protocol, a processor first announces its votes to small groups, then propagates them to increasingly larger groups as it generates more and more votes. To bound the number of messages that an individual process might have to send or receive, the protocol progressively increases the weight of generated votes. The main technical challenge is bounding the impact of votes that are still “in flight” (generated, but not fully propagated) on the final outcome of the shared coin, especially since such votes might have different weights. We achieve this by leveraging the structure of the algorithm, and a technical argument based on martingale concentration bounds. Overall, we show that it is possible to build an efficient message-passing implementation of a shared coin, and in the process (almost-optimally) solve the classic consensus problem in the asynchronous message-passing model.},
  author       = {Alistarh, Dan-Adrian and Aspnes, James and King, Valerie and Saia, Jared},
  issn         = {0178-2770},
  journal      = {Distributed Computing},
  number       = {6},
  pages        = {489--501},
  publisher    = {Springer},
  title        = {{Communication-efficient randomized consensus}},
  doi          = {10.1007/s00446-017-0315-1},
  volume       = {31},
  year         = {2018},
}