@inproceedings{18974,
  abstract     = {Reinforcement Learning (RL) from temporal logical specifications is a fundamental problem in sequential decision making. One of the basic and core such specification is the reachability specification that requires a target set to be eventually visited. Despite strong empirical results for RL from such specifications, the theoretical guarantees are bleak, including the impossibility of Probably Approximately Correct (PAC) guarantee for reachability specifications. Given the impossibility result, in this work we consider the problem of RL from reachability specifications along with the information of expected conditional distance (ECD). We present (a) lower bound results which establish the necessity of ECD information for PAC guarantees and (b) an algorithm that establishes PAC-guarantees given the ECD information. To the best of our knowledge, this is the first RL from reachability specifications that does not make any assumptions on the underlying environment to learn policies.},
  author       = {Svoboda, Jakub and Bansal, Suguman and Chatterjee, Krishnendu},
  booktitle    = {41st International Conference on Machine Learning},
  location     = {Vienna, Austria},
  pages        = {47331--47344},
  publisher    = {ML Research Press},
  title        = {{Reinforcement learning from reachability specifications: PAC guarantees with expected conditional distance}},
  volume       = {235},
  year         = {2024},
}

@article{17283,
  abstract     = {We consider the problems of statically refuting equivalence and similarity of output distributions defined by a pair of probabilistic programs. Equivalence and similarity are two fundamental relational properties of probabilistic programs that are essential for their correctness both in implementation and in compilation. In this work, we present a new method for static equivalence and similarity refutation. Our method refutes equivalence and similarity by computing a function over program outputs whose expected value with respect to the output distributions of two programs is different. The function is computed simultaneously with an upper expectation supermartingale and a lower expectation submartingale for the two programs, which we show to together provide a formal certificate for refuting equivalence and similarity. To the best of our knowledge, our method is the first approach to relational program analysis to offer the combination of the following desirable features: (1) it is fully automated, (2) it is applicable to infinite-state probabilistic programs, and (3) it provides formal guarantees on the correctness of its results. We implement a prototype of our method and our experiments demonstrate the effectiveness of our method to refute equivalence and similarity for a number of examples collected from the literature.},
  author       = {Chatterjee, Krishnendu and Kafshdar Goharshadi, Ehsan and Novotný, Petr and Zikelic, Dorde},
  issn         = {2475-1421},
  journal      = {Proceedings of the ACM on Programming Languages},
  publisher    = {Association for Computing Machinery},
  title        = {{Equivalence and similarity refutation for probabilistic programs}},
  doi          = {10.1145/3656462},
  volume       = {8},
  year         = {2024},
}

@inproceedings{17328,
  abstract     = {We study selfish mining attacks in longest-chain blockchains like Bitcoin, but where the proof of work is replaced with efficient proof systems - like proofs of stake or proofs of space - and consider the problem of computing an optimal selfish mining attack which maximizes expected relative revenue of the adversary, thus minimizing the chain quality. To this end, we propose a novel selfish mining attack that aims to maximize this objective and formally model the attack as a Markov decision process (MDP). We then present a formal analysis procedure which computes an ϵ-tight lower bound on the optimal expected relative revenue in the MDP and a strategy that achieves this ϵ-tight lower bound, where ϵ > 0 may be any specified precision. Our analysis is fully automated and provides formal guarantees on the correctness. We evaluate our selfish mining attack and observe that it achieves superior expected relative revenue compared to two considered baselines.
In concurrent work [Sarenche FC'24] does an automated analysis on selfish mining in predictable longest-chain blockchains based on efficient proof systems. Predictable means the randomness for the challenges is fixed for many blocks (as used e.g., in Ouroboros), while we consider unpredictable (Bitcoin-like) chains where the challenge is derived from the previous block.},
  author       = {Chatterjee, Krishnendu and Ebrahimzadeh, Amirali and Karrabi, Mehrdad and Pietrzak, Krzysztof Z and Yeo, Michelle X and Zikelic, Dorde},
  booktitle    = { Proceedings of the 43rd Annual ACM Symposium on Principles of Distributed Computing},
  isbn         = {9798400706684},
  location     = {Nantes, France},
  pages        = {268--278},
  publisher    = {Association for Computing Machinery},
  title        = {{Fully automated selfish mining analysis in efficient proof systems blockchains}},
  doi          = {10.1145/3662158.3662769},
  year         = {2024},
}

@inproceedings{17329,
  abstract     = {We initiate the study of game dynamics in the population protocol model: n agents each maintain a current local strategy and interact in pairs uniformly at random. Upon each interaction, the agents play a two-person game and receive a payoff from an underlying utility function, and they can subsequently update their strategies according to a fixed local algorithm. In this setting, we ask how the distribution over agent strategies evolves over a sequence of interactions, and we introduce a new distributional equilibrium concept to quantify the quality of such distributions. As an initial example, we study a class of repeated prisoner's dilemma games, and we consider a family of simple local update algorithms that yield non-trivial dynamics over the distribution of agent strategies. We show that these dynamics are related to a new class of high-dimensional Ehrenfest random walks, and we derive exact characterizations of their stationary distributions, bounds on their mixing times, and prove their convergence to approximate distributional equilibria. Our results highlight trade-offs between the local state space of each agent, and the convergence rate and approximation factor of the underlying dynamics. Our approach opens the door towards the further characterization of equilibrium computation for other classes of games and dynamics in the population setting.},
  author       = {Alistarh, Dan-Adrian and Chatterjee, Krishnendu and Karrabi, Mehrdad and Lazarsfeld, John M},
  booktitle    = {Proceedings of the 43rd Annual ACM Symposium on Principles of Distributed Computing},
  isbn         = {9798400706684},
  location     = {Nantes, France},
  pages        = {40--49},
  publisher    = {Association for Computing Machinery},
  title        = {{Game dynamics and equilibrium computation in the population protocol model}},
  doi          = {10.1145/3662158.3662768},
  year         = {2024},
}

@inproceedings{17402,
  abstract     = {We present version 2.0 of the Partial Exploration Tool (PET), a tool for verification of probabilistic systems. We extend the previous version by adding support for stochastic games, based on a recent unified framework for sound value iteration algorithms. Thereby, PET2 is the first tool implementing a sound and efficient approach for solving stochastic games with objectives of the type reachability/safety and mean payoff. We complement this approach by developing and implementing a partial-exploration based variant for all three objectives. Our experimental evaluation shows that PET2 offers the most efficient partial-exploration based algorithm and is the most viable tool on SGs, even outperforming unsound tools.},
  author       = {Meggendorfer, Tobias and Weininger, Maximilian},
  booktitle    = {36th International Conference on Computer Aided Verification},
  isbn         = {9783031656323},
  issn         = {1611-3349},
  location     = {Montreal, Canada},
  pages        = {359--372},
  publisher    = {Springer Nature},
  title        = {{Playing games with your PET: Extending the Partial Exploration Tool to stochastic games}},
  doi          = {10.1007/978-3-031-65633-0_16},
  volume       = {14683},
  year         = {2024},
}

@article{17474,
  abstract     = {Entropic risk (ERisk) is an established risk measure in finance, quantifying risk by an exponential re-weighting of rewards. We study ERisk for the first time in the context of turn-based stochastic games with the total reward objective. This gives rise to an objective function that demands the control of systems in a risk-averse manner. We show that the resulting games are determined and, in particular, admit optimal memoryless deterministic strategies. This contrasts risk measures that previously have been considered in the special case of Markov decision processes and that require randomization and/or memory. We provide several results on the decidability and the computational complexity of the threshold problem, i.e. whether the optimal value of ERisk exceeds a given threshold. Furthermore, an approximation algorithm for the optimal value of ERisk is provided.},
  author       = {Baier, Christel and Chatterjee, Krishnendu and Meggendorfer, Tobias and Piribauer, Jakob},
  issn         = {1090-2651},
  journal      = {Information and Computation},
  publisher    = {Elsevier},
  title        = {{Entropic risk for turn-based stochastic games}},
  doi          = {10.1016/j.ic.2024.105214},
  volume       = {301},
  year         = {2024},
}

@inproceedings{18155,
  abstract     = {We study the classical problem of verifying programs with respect to formal specifications given in the linear temporal logic (LTL). We first present novel sound and complete witnesses for LTL verification over imperative programs. Our witnesses are applicable to both verification (proving) and refutation (finding bugs) settings. We then consider LTL formulas in which atomic propositions can be polynomial constraints and turn our focus to polynomial arithmetic programs, i.e. programs in which every assignment and guard consists only of polynomial expressions. For this setting, we provide an efficient algorithm to automatically synthesize such LTL witnesses. Our synthesis procedure is both sound and semi-complete. Finally, we present experimental results demonstrating the effectiveness of our approach and that it can handle programs which were beyond the reach of previous state-of-the-art tools.},
  author       = {Chatterjee, Krishnendu and Goharshady, Amir Kafshdar and Goharshady, Ehsan and Karrabi, Mehrdad and Zikelic, Dorde},
  booktitle    = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
  isbn         = {9783031711619},
  issn         = {1611-3349},
  location     = {Milan, Italy},
  pages        = {600--619},
  publisher    = {Springer Nature},
  title        = {{Sound and complete witnesses for template-based verification of LTL properties on polynomial programs}},
  doi          = {10.1007/978-3-031-71162-6_31},
  volume       = {14933},
  year         = {2024},
}

@inproceedings{18159,
  abstract     = {Markov Decision Processes (MDPs) are a classical model for decision making in the presence of uncertainty. Often they are viewed as state transformers with planning objectives defned with respect to paths over MDP states. An increasingly
popular alternative is to view them as distribution transformers, giving rise to a sequence of probability distributions over MDP states. For instance, reachability and safety properties in modeling robot swarms or chemical reaction networks are naturally defned in terms of probability distributions over states. Verifying such distributional properties is known to be hard and often beyond the reach of classical state-based verifcation techniques. In this work, we consider the problems of certifed policy (i.e. controller) verifcation and synthesis in MDPs under distributional reach-avoidance specifcations. By certifed we mean that, along with a policy, we also aim to synthesize a (checkable) certifcate ensuring that the MDP indeed satisfes the property. Thus, given the target set of distributions and an unsafe set of distributions over MDP states, our goal is to either synthesize a certifcate for a given policy or synthesize a policy along with a certifcate, proving that the target distribution can be reached while avoiding unsafe distributions. To solve this problem, we introduce the novel notion of distributional reach-avoid certifcates and present automated procedures for (1) synthesizing a certifcate for a given policy, and (2) synthesizing a policy together with the certifcate, both providing formal guarantees on certifcate correctness. Our experimental evaluation demonstrates the ability of our method to solve several non-trivial examples, including a multi-agent robot-swarm model, to synthesize certifed policies and to certify existing policies. },
  author       = {Akshay, S and Chatterjee, Krishnendu and Meggendorfer, Tobias and Zikelic, Dorde},
  booktitle    = {Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence},
  isbn         = {9781956792041},
  issn         = {1045-0823},
  location     = {Jeju, Korea},
  pages        = {3--12},
  publisher    = {International Joint Conferences on Artificial Intelligence},
  title        = {{Certified policy verification and synthesis for MDPs under distributional reach-avoidance properties}},
  doi          = {10.24963/ijcai.2024/1},
  year         = {2024},
}

@inproceedings{18160,
  abstract     = {Markov decision processes (MDPs) provide a standard framework for sequential decision making under uncertainty. However, MDPs do not take uncertainty in transition probabilities into account. Robust Markov decision processes (RMDPs) address this shortcoming of MDPs by assigning to each transition an uncertainty set rather than a single probability value. In this work, we consider polytopic RMDPs in which all uncertainty sets are polytopes and study the problem of solving long-run average reward polytopic RMDPs. We present a novel perspective on this problem and show that it can be reduced to solving long-run average reward turn-based stochastic games with finite state and action spaces. This reduction allows us to derive several important consequences that were hitherto not known to hold for polytopic RMDPs. First, we derive new computational complexity bounds for solving long-run average reward polytopic RMDPs, showing for the first time that the threshold decision problem for them is in NP∩CONP and that they admit a randomized algorithm with sub-exponential expected runtime. Second, we present Robust Polytopic Policy Iteration (RPPI), a novel policy iteration algorithm for solving long-run average reward polytopic RMDPs. Our experimental evaluation shows that RPPI is much more efficient in solving long-run average reward polytopic RMDPs compared to state-of-the-art methods based on value iteration. },
  author       = {Chatterjee, Krishnendu and Kafshdar Goharshadi, Ehsan and Karrabi, Mehrdad and Novotný, Petr and Zikelic, Dorde},
  booktitle    = {33rd International Joint Conference on Artificial Intelligence},
  isbn         = {9781956792041},
  issn         = {1045-0823},
  location     = {Jeju, South Korea},
  pages        = {6707--6715},
  publisher    = {International Joint Conferences on Artificial Intelligence},
  title        = {{Solving long-run average reward robust MDPs via stochastic games}},
  doi          = {10.24963/ijcai.2024/741},
  year         = {2024},
}

@article{18266,
  abstract     = {Matrix games are the most basic model in game theory, and yet robustness with respect to small perturbations of the matrix entries is not fully understood. In this paper, we introduce value positivity and uniform value positivity, two properties that refine the notion of optimality in the context of polynomially perturbed matrix games. The first concept captures how the value depends on the perturbation parameter, and the second consists of the existence of a fixed strategy that guarantees the value of the unperturbed matrix game for every sufficiently small positive parameter. We provide polynomial-time algorithms to check whether a polynomially perturbed matrix game satisfies these properties. We further provide the functional form for a parameterized optimal strategy and the value function. Finally, we translate our results to linear programming and stochastic games, where value positivity is related to the existence of robust solutions.},
  author       = {Chatterjee, Krishnendu and Oliu-Barton, Miquel and Saona Urmeneta, Raimundo J},
  issn         = {1526-5471},
  journal      = {Mathematics of Operations Research},
  number       = {4},
  pages        = {2433--3282},
  publisher    = {Institute for Operations Research and the Management Sciences},
  title        = {{Value-positivity for matrix games}},
  doi          = {10.1287/moor.2022.0332},
  volume       = {50},
  year         = {2024},
}

@inproceedings{12676,
  abstract     = {Turn-based stochastic games (aka simple stochastic games) are two-player zero-sum games played on directed graphs with probabilistic transitions. The goal of player-max is to maximize the probability to reach a target state against the adversarial player-min. These games lie in NP ∩ coNP and are among the rare combinatorial problems that belong to this complexity class for which the existence of polynomial-time algorithm is a major open question. While randomized sub-exponential time algorithm exists, all known deterministic algorithms require exponential time in the worst-case. An important open question has been whether faster algorithms can be obtained parametrized by the treewidth of the game graph. Even deterministic sub-exponential time algorithm for constant treewidth turn-based stochastic games has remain elusive. In this work our main result is a deterministic algorithm to solve turn-based stochastic games that, given a game with n states, treewidth at most t, and the bit-complexity of the probabilistic transition function log D, has running time O ((tn2 log D)t log n). In particular, our algorithm is quasi-polynomial time for games with constant or poly-logarithmic treewidth.},
  author       = {Chatterjee, Krishnendu and Meggendorfer, Tobias and Saona Urmeneta, Raimundo J and Svoboda, Jakub},
  booktitle    = {Proceedings of the 2023 Annual ACM-SIAM Symposium on Discrete Algorithms},
  isbn         = {9781611977554},
  location     = {Florence, Italy},
  pages        = {4590--4605},
  publisher    = {Society for Industrial and Applied Mathematics},
  title        = {{Faster algorithm for turn-based stochastic games with bounded treewidth}},
  doi          = {10.1137/1.9781611977554.ch173},
  year         = {2023},
}

@article{12706,
  abstract     = {Allometric settings of population dynamics models are appealing due to their parsimonious nature and broad utility when studying system level effects. Here, we parameterise the size-scaled Rosenzweig-MacArthur differential equations to eliminate prey-mass dependency, facilitating an in depth analytic study of the equations which incorporates scaling parameters’ contributions to coexistence. We define the functional response term to match empirical findings, and examine situations where metabolic theory derivations and observation diverge. The dynamical properties of the Rosenzweig-MacArthur system, encompassing the distribution of size-abundance equilibria, the scaling of period and amplitude of population cycling, and relationships between predator and prey abundances, are consistent with empirical observation. Our parameterisation is an accurate minimal model across 15+ orders of mass magnitude.},
  author       = {Mckerral, Jody C. and Kleshnina, Maria and Ejov, Vladimir and Bartle, Louise and Mitchell, James G. and Filar, Jerzy A.},
  issn         = {1932-6203},
  journal      = {PLoS One},
  number       = {2},
  pages        = {e0279838},
  publisher    = {Public Library of Science},
  title        = {{Empirical parameterisation and dynamical analysis of the allometric Rosenzweig-MacArthur equations}},
  doi          = {10.1371/journal.pone.0279838},
  volume       = {18},
  year         = {2023},
}

@article{12787,
  abstract     = {Populations evolve in spatially heterogeneous environments. While a certain trait might bring a fitness advantage in some patch of the environment, a different trait might be advantageous in another patch. Here, we study the Moran birth–death process with two types of individuals in a population stretched across two patches of size N, each patch favouring one of the two types. We show that the long-term fate of such populations crucially depends on the migration rate μ
 between the patches. To classify the possible fates, we use the distinction between polynomial (short) and exponential (long) timescales. We show that when μ is high then one of the two types fixates on the whole population after a number of steps that is only polynomial in N. By contrast, when μ is low then each type holds majority in the patch where it is favoured for a number of steps that is at least exponential in N. Moreover, we precisely identify the threshold migration rate μ⋆ that separates those two scenarios, thereby exactly delineating the situations that support long-term coexistence of the two types. We also discuss the case of various cycle graphs and we present computer simulations that perfectly match our analytical results.},
  author       = {Svoboda, Jakub and Tkadlec, Josef and Kaveh, Kamran and Chatterjee, Krishnendu},
  issn         = {1471-2946},
  journal      = {Proceedings of the Royal Society A: Mathematical, Physical and Engineering Sciences},
  number       = {2271},
  publisher    = {The Royal Society},
  title        = {{Coexistence times in the Moran process with environmental heterogeneity}},
  doi          = {10.1098/rspa.2022.0685},
  volume       = {479},
  year         = {2023},
}

@article{12833,
  abstract     = {The input to the token swapping problem is a graph with vertices v1, v2, . . . , vn, and n tokens with labels 1,2, . . . , n, one on each vertex. The goal is to get token i to vertex vi for all i= 1, . . . , n using a minimum number of swaps, where a swap exchanges the tokens on the endpoints of an edge.Token swapping on a tree, also known as “sorting with a transposition tree,” is not known to be in P nor NP-complete. We present some partial results: 1. An optimum swap sequence may need to perform a swap on a leaf vertex that has the correct token (a “happy leaf”), disproving a conjecture of Vaughan. 2. Any algorithm that fixes happy leaves—as all known approximation algorithms for the problem do—has approximation factor at least 4/3. Furthermore, the two best-known 2-approximation algorithms have approximation factor exactly 2. 3. A generalized problem—weighted coloured token swapping—is NP-complete on trees, but solvable in polynomial time on paths and stars. In this version, tokens and vertices have colours, and colours have weights. The goal is to get every token to a vertex of the same colour, and the cost of a swap is the sum of the weights of the two tokens involved.},
  author       = {Biniaz, Ahmad and Jain, Kshitij and Lubiw, Anna and Masárová, Zuzana and Miltzow, Tillmann and Mondal, Debajyoti and Naredla, Anurag Murty and Tkadlec, Josef and Turcotte, Alexi},
  issn         = {1365-8050},
  journal      = {Discrete Mathematics and Theoretical Computer Science},
  number       = {2},
  publisher    = {EPI Sciences},
  title        = {{Token swapping on trees}},
  doi          = {10.46298/DMTCS.8383},
  volume       = {24},
  year         = {2023},
}

@article{12861,
  abstract     = {The field of indirect reciprocity investigates how social norms can foster cooperation when individuals continuously monitor and assess each other’s social interactions. By adhering to certain social norms, cooperating individuals can improve their reputation and, in turn, receive benefits from others. Eight social norms, known as the “leading eight," have been shown to effectively promote the evolution of cooperation as long as information is public and reliable. These norms categorize group members as either ’good’ or ’bad’. In this study, we examine a scenario where individuals instead assign nuanced reputation scores to each other, and only cooperate with those whose reputation exceeds a certain threshold. We find both analytically and through simulations that such quantitative assessments are error-correcting, thus facilitating cooperation in situations where information is private and unreliable. Moreover, our results identify four specific norms that are robust to such conditions, and may be relevant for helping to sustain cooperation in natural populations.},
  author       = {Schmid, Laura and Ekbatani, Farbod and Hilbe, Christian and Chatterjee, Krishnendu},
  issn         = {2041-1723},
  journal      = {Nature Communications},
  publisher    = {Springer Nature},
  title        = {{Quantitative assessment can stabilize indirect reciprocity under imperfect information}},
  doi          = {10.1038/s41467-023-37817-x},
  volume       = {14},
  year         = {2023},
}

@inproceedings{13139,
  abstract     = {A classical problem for Markov chains is determining their stationary (or steady-state) distribution. This problem has an equally classical solution based on eigenvectors and linear equation systems. However, this approach does not scale to large instances, and iterative solutions are desirable. It turns out that a naive approach, as used by current model checkers, may yield completely wrong results. We present a new approach, which utilizes recent advances in partial exploration and mean payoff computation to obtain a correct, converging approximation.},
  author       = {Meggendorfer, Tobias},
  booktitle    = {TACAS 2023: Tools and Algorithms for the Construction and Analysis of Systems},
  isbn         = {9783031308222},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {489--507},
  publisher    = {Springer Nature},
  title        = {{Correct approximation of stationary distributions}},
  doi          = {10.1007/978-3-031-30823-9_25},
  volume       = {13993},
  year         = {2023},
}

@inproceedings{13142,
  abstract     = {Reinforcement learning has received much attention for learning controllers of deterministic systems. We consider a learner-verifier framework for stochastic control systems and survey recent methods that formally guarantee a conjunction of reachability and safety properties. Given a property and a lower bound on the probability of the property being satisfied, our framework jointly learns a control policy and a formal certificate to ensure the satisfaction of the property with a desired probability threshold. Both the control policy and the formal certificate are continuous functions from states to reals, which are learned as parameterized neural networks. While in the deterministic case, the certificates are invariant and barrier functions for safety, or Lyapunov and ranking functions for liveness, in the stochastic case the certificates are supermartingales. For certificate verification, we use interval arithmetic abstract interpretation to bound the expected values of neural network functions.},
  author       = {Chatterjee, Krishnendu and Henzinger, Thomas A and Lechner, Mathias and Zikelic, Dorde},
  booktitle    = {Tools and Algorithms for the Construction and Analysis of Systems },
  isbn         = {9783031308222},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {3--25},
  publisher    = {Springer Nature},
  title        = {{A learner-verifier framework for neural network controllers and certificates of stochastic systems}},
  doi          = {10.1007/978-3-031-30823-9_1},
  volume       = {13993},
  year         = {2023},
}

@article{13258,
  abstract     = {Many human interactions feature the characteristics of social dilemmas where individual actions have consequences for the group and the environment. The feedback between behavior and environment can be studied with the framework of stochastic games. In stochastic games, the state of the environment can change, depending on the choices made by group members. Past work suggests that such feedback can reinforce cooperative behaviors. In particular, cooperation can evolve in stochastic games even if it is infeasible in each separate repeated game. In stochastic games, participants have an interest in conditioning their strategies on the state of the environment. Yet in many applications, precise information about the state could be scarce. Here, we study how the availability of information (or lack thereof) shapes evolution of cooperation. Already for simple examples of two state games we find surprising effects. In some cases, cooperation is only possible if there is precise information about the state of the environment. In other cases, cooperation is most abundant when there is no information about the state of the environment. We systematically analyze all stochastic games of a given complexity class, to determine when receiving information about the environment is better, neutral, or worse for evolution of cooperation.},
  author       = {Kleshnina, Maria and Hilbe, Christian and Simsa, Stepan and Chatterjee, Krishnendu and Nowak, Martin A.},
  issn         = {2041-1723},
  journal      = {Nature Communications},
  publisher    = {Springer Nature},
  title        = {{The effect of environmental information on evolution of cooperation in stochastic games}},
  doi          = {10.1038/s41467-023-39625-9},
  volume       = {14},
  year         = {2023},
}

@misc{13336,
  author       = {Kleshnina, Maria},
  publisher    = {Zenodo},
  title        = {{kleshnina/stochgames_info: The effect of environmental information on evolution of cooperation in stochastic games}},
  doi          = {10.5281/ZENODO.8059564},
  year         = {2023},
}

@inproceedings{13967,
  abstract     = {A classic solution technique for Markov decision processes (MDP) and stochastic games (SG) is value iteration (VI). Due to its good practical performance, this approximative approach is typically preferred over exact techniques, even though no practical bounds on the imprecision of the result could be given until recently. As a consequence, even the most used model checkers could return arbitrarily wrong results. Over the past decade, different works derived stopping criteria, indicating when the precision reaches the desired level, for various settings, in particular MDP with reachability, total reward, and mean payoff, and SG with reachability.In this paper, we provide the first stopping criteria for VI on SG with total reward and mean payoff, yielding the first anytime algorithms in these settings. To this end, we provide the solution in two flavours: First through a reduction to the MDP case and second directly on SG. The former is simpler and automatically utilizes any advances on MDP. The latter allows for more local computations, heading towards better practical efficiency.Our solution unifies the previously mentioned approaches for MDP and SG and their underlying ideas. To achieve this, we isolate objective-specific subroutines as well as identify objective-independent concepts. These structural concepts, while surprisingly simple, form the very essence of the unified solution.},
  author       = {Kretinsky, Jan and Meggendorfer, Tobias and Weininger, Maximilian},
  booktitle    = {38th Annual ACM/IEEE Symposium on Logic in Computer Science},
  isbn         = {9798350335873},
  issn         = {1043-6871},
  location     = {Boston, MA, United States},
  publisher    = {Institute of Electrical and Electronics Engineers},
  title        = {{Stopping criteria for value iteration on stochastic games with quantitative objectives}},
  doi          = {10.1109/LICS56636.2023.10175771},
  volume       = {2023},
  year         = {2023},
}

