@article{22102,
  abstract     = {Differential privacy (DP) has established itself as one of the standards for ensuring privacy of individual data. However, reasoning about DP is a challenging and error-prone task, hence methods for formal verification and refutation of DP properties have received significant interest in recent years. In this work, we present a novel method for automated formal refutation of є-DP. Our method refutes є-DP by searching for a pair of inputs together with a non-negative function over outputs whose expected value on these two inputs differs by a significant amount. The two inputs and the non-negative function over outputs are computed simultaneously, by utilizing upper expectation supermartingales and lower expectation submartingales from probabilistic program analysis, which we leverage to introduce a sound and complete proof rule for є-DP refutation. To the best of our knowledge, our method is the first method for є-DP refutation to offer the following four desirable features: (1) it is fully automated, (2) it is applicable to stochastic mechanisms with sampling instructions from both discrete and continuous distributions, (3) it provides soundness guarantees, and (4) it provides semi-completeness guarantees. Our experiments show that our prototype tool SuperDP achieves superior performance compared to the state of the art and manages to refute є-DP for a number of challenging examples collected from the literature, including ones that were out of the reach of prior methods.},
  author       = {Chatterjee, Krishnendu and Kafshdar Goharshadi, Ehsan and Zikelic, Dorde},
  issn         = {2475-1421},
  journal      = {Proceedings of the ACM on Programming Languages},
  keywords     = {Static Program Analysis, Differential Privacy, Probabilistic Programming, Martingales},
  number       = {PLDI},
  publisher    = {Association for Computing Machinery},
  title        = {{SuperDP: Differential privacy refutation via supermartingales}},
  doi          = {10.1145/3808296},
  volume       = {10},
  year         = {2026},
}

@misc{22134,
  abstract     = {This artifact provides the source code, benchmarks, and scripts necessary to build and reproduce the experimental results for `SuperDP` (Accepted at PLDI 2026). It also includes instructions for running the tool on user-provided inputs.},
  author       = {Chatterjee, Krishnendu and Kafshdar Goharshadi, Ehsan and Zikelic, Dorde},
  publisher    = {Zenodo},
  title        = {{SuperDP: Differential Privacy Refutation via Supermartingales}},
  doi          = {10.5281/ZENODO.18930113},
  year         = {2026},
}

@inproceedings{21411,
  abstract     = {To achieve fast recovery from link failures, most modern communication networks feature fully
decentralized fast re-routing mechanisms. These re-routing mechanisms rely on pre-installed static re-routing rules at the nodes (the routers), which depend only on local failure information, namely on the failed links incident to the node. Ideally, a network is perfectly resilient: the re-routing rules ensure that packets are always successfully routed to their destinations as long as the source and the destination are still physically connected in the underlying network after the failures. Unfortunately, there are examples where achieving perfect resilience is not possible. Surprisingly, only very little is known about the algorithmic aspect of when and how perfect resilience can be achieved. We investigate the computational complexity of analyzing such local fast re-routing mechanisms. Our main result is a negative one: we show that even checking whether a given set of static re-routing rules ensures perfect resilience is coNP-complete. Additionally, we investigate other fundamental variations of the problem. In particular, we show that our coNP-completeness proof also applies to scenarios where the re-routing rules have specific patterns (known as skipping in the literature). On the positive side, for scenarios where nodes do not have information about the link from which a packet arrived (the so-called in-port), we present a linear-time algorithm to realize perfect resilience whenever possible (which we show can also be determined in linear time). },
  author       = {Bentert, Matthias and Ceylan, Esra and Hübner, Valentin and Schmid, Stefan and Srba, Jiří},
  booktitle    = {29th International Conference on Principles of Distributed Systems},
  isbn         = {9783959774093},
  issn         = {1868-8969},
  location     = {Iaşi, Romania},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Fast re-routing in networks: On the complexity of perfect resilience}},
  doi          = {10.4230/LIPIcs.OPODIS.2025.31},
  volume       = {361},
  year         = {2026},
}

@article{21661,
  abstract     = {Model checking undiscounted reachability and expected-reward properties on Markov decision processes (MDPs) are key for the verification of systems that act under uncertainty. Popular algorithms are policy iteration and variants of value iteration; in tool competitions, most participants rely on the latter. These algorithms generally need worst-case exponential time. However, the problem can equally be formulated as a linear programme, solvable in polynomial time. In this paper, we give a detailed overview of today’s state-of-the-art algorithms for MDP model checking with a focus on performance and correctness. We highlight their fundamental differences, and describe various optimizations and implementation variants. We experimentally compare floating-point and exact-arithmetic implementations of all algorithms on three benchmark sets using two probabilistic model checkers. Our results show that (optimistic) value iteration is a sensible default, but other algorithms are preferable in specific settings. This paper thereby provides a guide for MDP verification practitioners—tool builders and users alike.},
  author       = {Hartmanns, Arnd and Junges, Sebastian and Quatmann, Tim and Weininger, Maximilian},
  issn         = {1433-2787},
  journal      = {International Journal on Software Tools for Technology Transfer},
  keywords     = {Quantitative model checking, Markov decision process, Linear programming, Value iteration, Policy iteration},
  publisher    = {Springer Nature},
  title        = {{The revised practitioner’s guide to MDP model checking algorithms}},
  doi          = {10.1007/s10009-026-00848-y},
  year         = {2026},
}

@inproceedings{21717,
  abstract     = {Robust Markov Decision Processes (RMDPs) generalize classical MDPs that consider uncertainties in transition probabilities by defining a set of possible transition functions. An objective is a set of runs (or infinite trajectories) of the RMDP, and the value for an objective is the maximal probability that the agent can guarantee against the adversarial environment. We consider (a) reachability objectives, where given a target set of states, the goal is to eventually arrive at one of them; and (b) parity objectives, which are a canonical representation for ω-regular objectives. The qualitative analysis problem asks whether the objective can be ensured with probability 1. In this work, we study the qualitative problem for reachability and parity objectives on RMDPs without making any assumption over the structures of the RMDPs, e.g., unichain or aperiodic. Our contributions are twofold. We first present efficient algorithms with oracle access to uncertainty sets that solve qualitative problems of reachability and parity objectives. We then report experimental results demonstrating the effectiveness of our oracle-based approach on classical RMDP examples from the literature scaling up to thousands of states.},
  author       = {Asadi, Ali and Chatterjee, Krishnendu and Kafshdar Goharshadi, Ehsan and Karrabi, Mehrdad and Shafiee, Ali},
  booktitle    = {Proceedings of the 40th AAAI Conference on Artificial Intelligence},
  issn         = {2374-3468},
  location     = {Singapore, Singapore},
  number       = {43},
  pages        = {36137--36145},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Qualitative analysis of ω-regular objectives on robust MDPs}},
  doi          = {10.1609/aaai.v40i43.40931},
  volume       = {40},
  year         = {2026},
}

@inproceedings{21722,
  abstract     = {Partially observable Markov decision processes (POMDPs) are a central model for uncertainty in sequential decision making. The most basic objective is the reachability objective, where a target set must be eventually visited, and the more general parity objectives can model all omega-regular specifications. For such objectives, the computational analysis problems are the following: (a) qualitative analysis that asks whether the objective can be satisfied with probability 1 (almost-sure winning) or probability arbitrarily close to 1 (limit-sure winning); and (b) quantitative analysis that asks for the approximation of the optimal probability of satisfying the objective. For general POMDPs, almost-sure analysis for reachability objectives is EXPTIME-complete, but limit-sure and quantitative analyses for reachability objectives are undecidable; almost-sure, limit-sure, and quantitative analyses for parity objectives are all undecidable. A special class of POMDPs, called revealing POMDPs, has been studied recently in several works, and for this subclass the almost-sure analysis for parity objectives was shown to be EXPTIME-complete. In this work, we show that for revealing POMDPs the limit-sure analysis for parity objectives is EXPTIME-complete, and even the quantitative analysis for parity objectives can be achieved in EXPTIME.},
  author       = {Asadi, Ali and Chatterjee, Krishnendu and Lurie, David and Saona Urmeneta, Raimundo J},
  booktitle    = {Proceedings of the AAAI Conference on Artificial Intelligence},
  issn         = {2374-3468},
  location     = {Singapore, Singapore},
  number       = {43},
  pages        = {36146--36154},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Revealing POMDPs: Qualitative and quantitative analysis for parity objectives}},
  doi          = {10.1609/aaai.v40i43.40932},
  volume       = {40},
  year         = {2026},
}

@article{22101,
  abstract     = {Evolutionary biology examines how the genetic and phenotypic composition
of populations changes over time. An important goal is to determine the
fixation probability of a single advantageous mutant that arises in a homogeneous
population of N residents. Many real populations experience environmental
gradients that cause mutations to be beneficial in some spatial
regions but harmful in others. Here, we study the fixation probability of a
mutant placed on a simple one-dimensional spatial structure that experiences
such a gradient. The mutant’s fitness varies linearly from1 − s to 1 + s, whereas
the resident fitness is constant and equal to 1. The existing literature suggests
that such heterogeneity in the mutant’s fitness should lead to a decrease in its
fixation probability. However, in this work, we find that small, non-negligible
gradients (s < 1=√N) substantially increase the fixation probability,while larger
gradients (s > (log N)/√N) substantially decrease it.Moreover, we quantify the
strength of this phenomenon analytically and we precisely delimit the range of
the gradients for which it occurs. Our computer simulations closely match
those findings. Altogether, our results indicate that subjecting a simple
population structure to natural environmental conditions can produce strong
counterintuitive effects.},
  author       = {Svoboda, Jakub and Nemati, Hossein and Tkadlec, Josef and Kaveh, Kamran and Chatterjee, Krishnendu},
  issn         = {2041-1723},
  journal      = {Nature Communications},
  publisher    = {Springer Nature},
  title        = {{The effect of the fitness gradient on fixation probability}},
  doi          = {10.1038/s41467-026-71777-2},
  volume       = {17},
  year         = {2026},
}

@article{20857,
  abstract     = {Evolutionary games provide a flexible mathematical framework for many problems in biology and social evolution. Prisoners’ dilemma, and in particular, the important special case of donation games, represents social dilemmas where cooperation is mutually beneficial, yet defection is preferred by selfish agents. In evolutionary games on networks, the agents interact over a population structure. The existence of population structures that promote cooperative behavior is a fascinating and active research topic. Previous research establishes structures promoting cooperation in the limit of weak selection where the benefit-to-cost ratio β exceeds 1.5. The existence of such structures for medium and strong selection for 1 < ß < 2 and for weak selection for 1 < ß < 1.5 has been a long-standing open question. First, we answer the open questions in the affirmative: For every selection strength and every ß > 1, we construct networks promoting cooperation. Second, we present a robustness result with respect to β and selection strength: Our structures promote cooperation for a range of these parameter values rather than specific parameter values. Finally, we supplement our theoretical results with simulation results on small population structures that show the effectiveness of our construction over well-studied population structures.},
  author       = {Svoboda, Jakub and Chatterjee, Krishnendu},
  issn         = {1091-6490},
  journal      = {Proceedings of the National Academy of Sciences},
  number       = {51},
  pages        = {e2524109122},
  publisher    = {National Academy of Sciences},
  title        = {{Promoters of cooperation in evolutionary games}},
  doi          = {10.1073/pnas.2524109122},
  volume       = {122},
  year         = {2025},
}

@inproceedings{21268,
  abstract     = {We consider multiple-environment Markov decision processes (MEMDP), which consist of a finite set of MDPs over the same state space, representing different scenarios of transition structure and probability. The value of a strategy is the probability to satisfy the objective, here a parity objective, in the worst-case scenario, and the value of an MEMDP is the supremum of the values achievable by a strategy.
We show that deciding whether the value is 1 is a PSPACE-complete problem, and even in P when the number of environments is fixed, along with new insights to the almost-sure winning problem, which is to decide if there exists a strategy with value 1. Pure strategies are sufficient for theses problems, whereas randomization is necessary in general when the value is smaller than 1. We present an algorithm to approximate the value, running in double exponential space. Our results are in contrast to the related model of partially-observable MDPs where all these problems are known to be undecidable.},
  author       = {Chatterjee, Krishnendu and Doyen, Laurent and Raskin, Jean-Francois and Sankur, Ocan},
  booktitle    = {52nd International Colloquium on Automata, Languages, and Programming},
  isbn         = {9783959773720},
  location     = {Aarhus, Denmark},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{The value problem for multiple-environment MDPs with parity objective}},
  doi          = {10.4230/LIPIcs.ICALP.2025.150},
  year         = {2025},
}

@inproceedings{21281,
  abstract     = {A strategy profile in a multi-player game is a Nash equilibrium if no player can unilaterally deviate to achieve a strictly better payoff. A profile is an ε-Nash equilibrium if no player can gain more than ε by unilaterally deviating from their strategy. In this work, we use ε-Nash equilibria to approximate the computation of Nash equilibria. Specifically, we focus on turn-based, multiplayer stochastic games played on graphs, where players are restricted to stationary strategies - strategies that use randomness but not memory.
The problem of deciding the constrained existence of stationary Nash equilibria - where each player’s payoff must lie within a given interval - is known to be ∃ℝ-complete in such a setting (Hansen and Sølvsten, 2020). We extend this line of work to stationary ε-Nash equilibria and present an algorithm that solves the following promise problem: given a game with a Nash equilibrium satisfying the constraints, compute an ε-Nash equilibrium that ε-satisfies those same constraints - satisfies the constraints up to an ε additive error. Our algorithm runs in FNP^NP time.
To achieve this, we first show that if a constrained Nash equilibrium exists, then one exists where the non-zero probabilities are at least an inverse of a double-exponential in the input. We further prove that such a strategy can be encoded using floating-point representations, as in the work of Frederiksen and Miltersen (2013), which finally gives us our FNP^NP algorithm. 
We further show that the decision version of the promise problem is NP-hard. Finally, we show a partial tightness result by proving a lower bound for such techniques: if a constrained Nash equilibrium exists, then there must be one where the probabilities in the strategies are double-exponentially small.},
  author       = {Asadi, Ali and Brice, Leonard and Chatterjee, Krishnendu and Thejaswini, K. S.},
  booktitle    = {45th Annual Conference on Foundations of Software Technology and Theoretical Computer Science},
  isbn         = {9783959774062},
  location     = {Pilani, India},
  pages        = {9:1--9:17},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{ε-stationary Nash equilibria in multi-player stochastic graph games}},
  doi          = {10.4230/lipics.fsttcs.2025.9},
  volume       = {360},
  year         = {2025},
}

@inproceedings{21320,
  abstract     = {Prophet inequalities are a central object of study in optimal stopping theory. In the iid model, a gambler sees values in an online fashion, sampled independently from a given distribution. Upon observing each value, the gambler either accepts it as a reward, or irrevocably rejects it and proceeds to observe the next value. The goal of the gambler, who cannot see the future, is to maximise the expected value of the reward while competing against the expectation of a prophet (the offline maximum). In other words, one seeks to maximise the gambler-to-prophet ratio of the expectations. 
This model has been studied with infinite, finite and unknown number of values. When the gambler faces a random number of values, the model is said to have a random horizon. We consider the model in which the gambler is given a priori knowledge of the horizon’s distribution. Alijani et al. (2020) designed a single-threshold algorithm achieving a ratio of 1/2 when the random horizon has an increasing hazard rate and is independent of the values. We prove that with a single threshold, a ratio of 1/2 is actually achievable for several larger classes of horizon distributions, with the largest being known as the 𝒢 class in reliability theory. Moreover, we show that this does not extend to its dual, the  ̅𝒢 class (which includes the decreasing hazard rate class), while it can be extended to low-variance horizons. Finally, we construct the first example of a family of horizons, for which multiple thresholds are necessary to achieve a nonzero ratio. We establish that the Secretary Problem optimal stopping rule provides one such algorithm, paving the way towards the study of the model beyond single-threshold algorithms.},
  author       = {Giambartolomei, Giordano and Mallmann-Trenn, Frederik and Saona Urmeneta, Raimundo J},
  booktitle    = {52nd International Colloquium on Automata, Languages, and Programming},
  isbn         = {9783959773720},
  location     = {Aarhus, Denmark},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{IID prophet inequality with random horizon: Going beyond increasing hazard rates}},
  doi          = {10.4230/LIPIcs.ICALP.2025.87},
  volume       = {334},
  year         = {2025},
}

@inproceedings{21412,
  abstract     = {Payment channel networks (PCNs) are a promising technology that alleviates blockchain scalability by shifting the transaction load from the blockchain to the PCN. Nevertheless, the network topology has to be carefully designed to maximise the transaction throughput in PCNs. Additionally, users in PCNs also have to make optimal decisions on which transactions to forward and which to reject to prolong the lifetime of their channels. In this work, we consider an input sequence of transactions over p parties. Each transaction consists of a transaction size, source, and target, and can be either accepted or rejected (entailing a cost). The goal is to design a PCN topology among the p cooperating parties, along with the channel capacities, and then output a decision for each transaction in the sequence to minimise the cost of creating and augmenting channels, as well as the cost of rejecting transactions. Our main contribution is an 𝒪(p) approximation algorithm for the problem with p parties. We further show that with some assumptions on the distribution of transactions, we can reduce the approximation ratio to 𝒪(√p). We complement our theoretical analysis with an empirical study of our assumptions and approach in the context of the Lightning Network.},
  author       = {Chatterjee, Krishnendu and Křišťan, Jan Matyáš and Schmid, Stefan and Svoboda, Jakub and Yeo, Michelle X},
  booktitle    = {39th International Symposium on Distributed Computing},
  isbn         = {9783959774024},
  issn         = {1868-8969},
  location     = {Berlin, Germany},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Boosting payment channel network liquidity with topology optimization and transaction selection}},
  doi          = {10.4230/LIPIcs.DISC.2025.23},
  volume       = {356},
  year         = {2025},
}

@article{21413,
  abstract     = {We present a general framework for applying learning algorithms and heuristical guidance to the verification of Markov decision processes (MDPs).
The primary goal of our techniques is to improve performance by avoiding an exhaustive exploration of the state space, instead focussing on particularly relevant areas of the system, guided by heuristics. Our work builds on the previous results of Br{á}zdil et al., significantly extending it as well as refining several details and fixing errors.
The presented framework focuses on probabilistic reachability, which is a core problem in verification, and is instantiated in two distinct scenarios.
The first assumes that full knowledge of the MDP is available, in particular precise transition probabilities. It performs a heuristic-driven partial exploration of the model, yielding precise lower and upper bounds on the required probability. The second tackles the case where we may only sample the MDP without knowing the exact transition dynamics. Here, we obtain probabilistic guarantees, again in terms of both the lower and upper bounds, which provides efficient stopping criteria for the approximation. In particular, the latter is an extension of statistical model-checking (SMC) for unbounded properties in MDPs. In contrast to other related approaches, we do not restrict our attention to time-bounded (finite-horizon) or discounted properties, nor assume any particular structural properties of the MDP.},
  author       = {Brázdil, Tomáš and Chatterjee, Krishnendu and Chmelik, Martin and Forejt, Vojtěch and Kretinsky, Jan and Kwiatkowska, Marta and Meggendorfer, Tobias and Parker, David and Ujma, Mateusz},
  issn         = {2751-4838},
  journal      = {TheoretiCS},
  publisher    = {TheoretiCS Foundation},
  title        = {{Learning algorithms for verification of Markov decision processes}},
  doi          = {10.46298/theoretics.25.10},
  volume       = {4},
  year         = {2025},
}

@misc{21668,
  abstract     = {This artifact allows to review and reproduce the experiments from the paper *A Revised Practitioner's Guide to MDP Model Checking Algorithms*.
The package contains all original logfiles and derived data used to generate the plots as in the paper. Furthermore, the artifact contains the model checking tools `Storm` and `mcsta` in the version exercised in the paper, the used Docker container, as well as benchmark instances and execution scripts to reproduce the experiments.

See also the artifact of the conference paper: https://zenodo.org/records/7509474},
  author       = {Hartmanns, Arnd and Junges, Sebastian and Quatmann, Tim and Weininger, Maximilian},
  publisher    = {Zenodo},
  title        = {{Benchmark data for the revised practitioner's guide to MDP model checking algorithms}},
  doi          = {10.5281/ZENODO.14500423},
  year         = {2025},
}

@article{17037,
  abstract     = {Zero-sum stochastic games are parameterized by payoffs, transitions, and possibly a discount rate. In this article, we study how the main solution concepts, the discounted and undiscounted values, vary when these parameters are perturbed. We focus on the marginal values, introduced by Mills in 1956 in the context of matrix games—that is, the directional derivatives of the value along any fixed perturbation. We provide a formula for the marginal values of a discounted stochastic game. Further, under mild assumptions on the perturbation, we provide a formula for their limit as the discount rate vanishes and for the marginal values of an undiscounted stochastic game. We also show, via an example, that the two latter differ in general.},
  author       = {Attia, Luc and Oliu-Barton, Miquel and Saona Urmeneta, Raimundo J},
  issn         = {1526-5471},
  journal      = {Mathematics of Operations Research},
  number       = {1},
  pages        = {482--505},
  publisher    = {Institute for Operations Research and the Management Sciences},
  title        = {{Marginal values of a stochastic game}},
  doi          = {10.1287/moor.2023.0297},
  volume       = {50},
  year         = {2025},
}

@article{18529,
  abstract     = {Temporal networks are obtained from time-dependent interactions among individuals, whereas the interactions can be emails, phone calls, face-to-face meetings, or work collaboration. In this article, a temporal game framework is established, in which interactions among rational individuals are embedded into two-player games in a time-dependent manner. This allows studying the time-dependent complexity and variability of interactions, and the way they affect prosocial behaviors. Based on this simple mathematical model, it is found that the level of cooperation is promoted when the time of collaboration is equally limited for every individual. This observation is confirmed by a series of systematic human experiments on over 1,400 subjects, forming a foundation for comprehensively describing human temporal interactions in collaboration. The research results reveal an important incentive for human cooperation, leading to a better understanding of a fascinating aspect of human nature in society.},
  author       = {Zhang, Yichao and Wang, Jiasheng and Wen, Guanghui and Guan, Jihong and Zhou, Shuigeng and Chen, Guanrong and Chatterjee, Krishnendu and Perc, Matjaz},
  issn         = {2327-4697},
  journal      = {IEEE Transactions on Network Science and Engineering},
  number       = {1},
  pages        = {4--12},
  publisher    = {IEEE},
  title        = {{Limitation of time promotes cooperation in structured collaboration systems}},
  doi          = {10.1109/TNSE.2024.3481434},
  volume       = {12},
  year         = {2025},
}

@article{19074,
  abstract     = {The public goods game is among the most studied metaphors of cooperation in groups. In this game, individuals can use their endowments to make contributions towards a good that benefits everyone. Each individual, however, is tempted to free-ride on the contributions of others. Herein, we study repeated public goods games among asymmetric players. Previous work has explored to which extent asymmetry allows for full cooperation, such that players contribute their full endowment each round. However, by design that work focusses on equilibria where individuals make the same contribution each round. Instead, here we consider players whose contributions along the equilibrium path can change from one round to the next. We do so for three different models – one without any budget constraints, one with endowment constraints, and one in which individuals can save their current endowment to be used in subsequent rounds. In each case, we explore two key quantities: the welfare and the resource efficiency that can be achieved in equilibrium. Welfare corresponds to the sum of all players’ payoffs. Resource efficiency relates this welfare to the total contributions made by the players. Compared to constant contribution sequences, we find that time-dependent contributions can improve resource efficiency across all three models. Moreover, they can improve the players’ welfare in the model with savings.},
  author       = {Hübner, Valentin and Hilbe, Christian and Staab, Manuel and Kleshnina, Maria and Chatterjee, Krishnendu},
  issn         = {2153-0793},
  journal      = {Dynamic Games and Applications},
  pages        = {1617--1645},
  publisher    = {Springer Nature},
  title        = {{Time-dependent strategies in repeated asymmetric public goods games}},
  doi          = {10.1007/s13235-025-00627-5},
  volume       = {15},
  year         = {2025},
}

@inproceedings{19375,
  abstract     = {Despite the advances in probabilistic model checking, the scalability of the verification methods remains limited. In particular, the state space often becomes extremely large when instantiating parameterized Markov decision processes (MDPs) even with moderate values. Synthesizing policies for such huge MDPs is beyond the reach of available tools. We propose a learning-based approach to obtain a reasonable policy for such huge MDPs.

The idea is to generalize optimal policies obtained by model-checking small instances to larger ones using decision-tree learning. Consequently, our method bypasses the need for explicit state-space exploration of large models, providing a practical solution to the state-space explosion problem. We demonstrate the efficacy of our approach by performing extensive experimentation on the relevant models from the quantitative verification benchmark set. The experimental results indicate that our policies perform well, even when the size of the model is orders of magnitude beyond the reach of state-of-the-art analysis tools.},
  author       = {Azeem, Muqsit and Chakraborty, Debraj and Kanav, Sudeep and Kretinsky, Jan and Mohagheghi, Mohammadsadegh and Mohr, Stefanie and Weininger, Maximilian},
  booktitle    = {26th International Conference on Verification, Model Checking, and Abstract Interpretation},
  isbn         = {9783031827020},
  issn         = {1611-3349},
  location     = {Denver, CO, United States},
  pages        = {97--120},
  publisher    = {Springer Nature},
  title        = {{1–2–3–Go! Policy synthesis for parameterized Markov decision processes via decision-tree learning and generalization}},
  doi          = {10.1007/978-3-031-82703-7_5},
  volume       = {15530},
  year         = {2025},
}

@inproceedings{19445,
  abstract     = {In reconfiguration, we are given two solutions to a graph problem, such as Vertex Cover or Dominating Set, with each solution represented by a placement of tokens on vertices of the graph. Our task is to reconfigure one into the other using small steps while ensuring the intermediate configurations of tokens are also valid solutions. The two commonly studied settings are Token Jumping and Token Sliding, which allows moving a single token to an arbitrary or an adjacent vertex, respectively.

We introduce new rules that generalize Token Jumping, parameterized by the number of tokens allowed to move at once and by the maximum distance of each move. Our main contribution is identifying minimal rules that allow reconfiguring any possible given solution into any other for Independent Set, Vertex Cover, and Dominating Set. For each minimal rule, we also provide an efficient algorithm that finds a corresponding reconfiguration sequence.

We further focus on the rule that allows each token to move to an adjacent vertex in a single step. This natural variant turns out to be the minimal rule that guarantees reconfigurability for Vertex Cover. We determine the computational complexity of deciding whether a (shortest) reconfiguration sequence exists under this rule for the three studied problems. While reachability for Vertex Cover is shown to be in P, finding a shortest sequence is shown to be NP-complete. For Independent Set and Dominating Set, even reachability is shown to be PSPACE-complete.},
  author       = {Křišťan, Jan Matyáš and Svoboda, Jakub},
  booktitle    = {19th International Conference and Workshops on Algorithms and Computation},
  isbn         = {9789819628445},
  issn         = {1611-3349},
  location     = {Chengdu, China},
  pages        = {244--265},
  publisher    = {Springer Nature},
  title        = {{Reconfiguration using generalized token jumping}},
  doi          = {10.1007/978-981-96-2845-2_16},
  volume       = {15411},
  year         = {2025},
}

@article{19499,
  abstract     = {Quantum hardware is inherently fragile and noisy. We find that the accuracy of traditional quantum error correction algorithms can be improved depending on the hardware. Given different hardware specifications, we automatically synthesize hardware-optimal algorithms for parity correction, qubit resetting, and GHZ (Greenberger–Horne–Zeilinger) state preparation. Using stochastic techniques from computer science, our method presents a computational tool to compute exact accuracy guarantees and synthesize optimal algorithms that are often different from traditional ones. We also show that improvements can be gained with respect to the Qiskit transpiler as we compute the hardware-optimal qubit mapping for the GHZ state-preparation problem.},
  author       = {Muroya Lei, Stefanie and Chatterjee, Krishnendu and Henzinger, Thomas A},
  issn         = {1091-6490},
  journal      = {Proceedings of the National Academy of Sciences},
  number       = {12},
  publisher    = {National Academy of Sciences},
  title        = {{Hardware-optimal quantum algorithms}},
  doi          = {10.1073/pnas.2419273122},
  volume       = {122},
  year         = {2025},
}