@inproceedings{21412,
  abstract     = {Payment channel networks (PCNs) are a promising technology that alleviates blockchain scalability by shifting the transaction load from the blockchain to the PCN. Nevertheless, the network topology has to be carefully designed to maximise the transaction throughput in PCNs. Additionally, users in PCNs also have to make optimal decisions on which transactions to forward and which to reject to prolong the lifetime of their channels. In this work, we consider an input sequence of transactions over p parties. Each transaction consists of a transaction size, source, and target, and can be either accepted or rejected (entailing a cost). The goal is to design a PCN topology among the p cooperating parties, along with the channel capacities, and then output a decision for each transaction in the sequence to minimise the cost of creating and augmenting channels, as well as the cost of rejecting transactions. Our main contribution is an 𝒪(p) approximation algorithm for the problem with p parties. We further show that with some assumptions on the distribution of transactions, we can reduce the approximation ratio to 𝒪(√p). We complement our theoretical analysis with an empirical study of our assumptions and approach in the context of the Lightning Network.},
  author       = {Chatterjee, Krishnendu and Křišťan, Jan Matyáš and Schmid, Stefan and Svoboda, Jakub and Yeo, Michelle X},
  booktitle    = {39th International Symposium on Distributed Computing},
  isbn         = {9783959774024},
  issn         = {1868-8969},
  location     = {Berlin, Germany},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Boosting payment channel network liquidity with topology optimization and transaction selection}},
  doi          = {10.4230/LIPIcs.DISC.2025.23},
  volume       = {356},
  year         = {2025},
}

@article{21413,
  abstract     = {We present a general framework for applying learning algorithms and heuristical guidance to the verification of Markov decision processes (MDPs).
The primary goal of our techniques is to improve performance by avoiding an exhaustive exploration of the state space, instead focussing on particularly relevant areas of the system, guided by heuristics. Our work builds on the previous results of Br{á}zdil et al., significantly extending it as well as refining several details and fixing errors.
The presented framework focuses on probabilistic reachability, which is a core problem in verification, and is instantiated in two distinct scenarios.
The first assumes that full knowledge of the MDP is available, in particular precise transition probabilities. It performs a heuristic-driven partial exploration of the model, yielding precise lower and upper bounds on the required probability. The second tackles the case where we may only sample the MDP without knowing the exact transition dynamics. Here, we obtain probabilistic guarantees, again in terms of both the lower and upper bounds, which provides efficient stopping criteria for the approximation. In particular, the latter is an extension of statistical model-checking (SMC) for unbounded properties in MDPs. In contrast to other related approaches, we do not restrict our attention to time-bounded (finite-horizon) or discounted properties, nor assume any particular structural properties of the MDP.},
  author       = {Brázdil, Tomáš and Chatterjee, Krishnendu and Chmelik, Martin and Forejt, Vojtěch and Kretinsky, Jan and Kwiatkowska, Marta and Meggendorfer, Tobias and Parker, David and Ujma, Mateusz},
  issn         = {2751-4838},
  journal      = {TheoretiCS},
  publisher    = {TheoretiCS Foundation},
  title        = {{Learning algorithms for verification of Markov decision processes}},
  doi          = {10.46298/theoretics.25.10},
  volume       = {4},
  year         = {2025},
}

@article{17037,
  abstract     = {Zero-sum stochastic games are parameterized by payoffs, transitions, and possibly a discount rate. In this article, we study how the main solution concepts, the discounted and undiscounted values, vary when these parameters are perturbed. We focus on the marginal values, introduced by Mills in 1956 in the context of matrix games—that is, the directional derivatives of the value along any fixed perturbation. We provide a formula for the marginal values of a discounted stochastic game. Further, under mild assumptions on the perturbation, we provide a formula for their limit as the discount rate vanishes and for the marginal values of an undiscounted stochastic game. We also show, via an example, that the two latter differ in general.},
  author       = {Attia, Luc and Oliu-Barton, Miquel and Saona Urmeneta, Raimundo J},
  issn         = {1526-5471},
  journal      = {Mathematics of Operations Research},
  number       = {1},
  pages        = {482--505},
  publisher    = {Institute for Operations Research and the Management Sciences},
  title        = {{Marginal values of a stochastic game}},
  doi          = {10.1287/moor.2023.0297},
  volume       = {50},
  year         = {2025},
}

@inproceedings{18155,
  abstract     = {We study the classical problem of verifying programs with respect to formal specifications given in the linear temporal logic (LTL). We first present novel sound and complete witnesses for LTL verification over imperative programs. Our witnesses are applicable to both verification (proving) and refutation (finding bugs) settings. We then consider LTL formulas in which atomic propositions can be polynomial constraints and turn our focus to polynomial arithmetic programs, i.e. programs in which every assignment and guard consists only of polynomial expressions. For this setting, we provide an efficient algorithm to automatically synthesize such LTL witnesses. Our synthesis procedure is both sound and semi-complete. Finally, we present experimental results demonstrating the effectiveness of our approach and that it can handle programs which were beyond the reach of previous state-of-the-art tools.},
  author       = {Chatterjee, Krishnendu and Goharshady, Amir Kafshdar and Goharshady, Ehsan and Karrabi, Mehrdad and Zikelic, Dorde},
  booktitle    = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
  isbn         = {9783031711619},
  issn         = {1611-3349},
  location     = {Milan, Italy},
  pages        = {600--619},
  publisher    = {Springer Nature},
  title        = {{Sound and complete witnesses for template-based verification of LTL properties on polynomial programs}},
  doi          = {10.1007/978-3-031-71162-6_31},
  volume       = {14933},
  year         = {2024},
}

@inproceedings{18159,
  abstract     = {Markov Decision Processes (MDPs) are a classical model for decision making in the presence of uncertainty. Often they are viewed as state transformers with planning objectives defned with respect to paths over MDP states. An increasingly
popular alternative is to view them as distribution transformers, giving rise to a sequence of probability distributions over MDP states. For instance, reachability and safety properties in modeling robot swarms or chemical reaction networks are naturally defned in terms of probability distributions over states. Verifying such distributional properties is known to be hard and often beyond the reach of classical state-based verifcation techniques. In this work, we consider the problems of certifed policy (i.e. controller) verifcation and synthesis in MDPs under distributional reach-avoidance specifcations. By certifed we mean that, along with a policy, we also aim to synthesize a (checkable) certifcate ensuring that the MDP indeed satisfes the property. Thus, given the target set of distributions and an unsafe set of distributions over MDP states, our goal is to either synthesize a certifcate for a given policy or synthesize a policy along with a certifcate, proving that the target distribution can be reached while avoiding unsafe distributions. To solve this problem, we introduce the novel notion of distributional reach-avoid certifcates and present automated procedures for (1) synthesizing a certifcate for a given policy, and (2) synthesizing a policy together with the certifcate, both providing formal guarantees on certifcate correctness. Our experimental evaluation demonstrates the ability of our method to solve several non-trivial examples, including a multi-agent robot-swarm model, to synthesize certifed policies and to certify existing policies. },
  author       = {Akshay, S and Chatterjee, Krishnendu and Meggendorfer, Tobias and Zikelic, Dorde},
  booktitle    = {Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence},
  isbn         = {9781956792041},
  issn         = {1045-0823},
  location     = {Jeju, Korea},
  pages        = {3--12},
  publisher    = {International Joint Conferences on Artificial Intelligence},
  title        = {{Certified policy verification and synthesis for MDPs under distributional reach-avoidance properties}},
  doi          = {10.24963/ijcai.2024/1},
  year         = {2024},
}

@inproceedings{18160,
  abstract     = {Markov decision processes (MDPs) provide a standard framework for sequential decision making under uncertainty. However, MDPs do not take uncertainty in transition probabilities into account. Robust Markov decision processes (RMDPs) address this shortcoming of MDPs by assigning to each transition an uncertainty set rather than a single probability value. In this work, we consider polytopic RMDPs in which all uncertainty sets are polytopes and study the problem of solving long-run average reward polytopic RMDPs. We present a novel perspective on this problem and show that it can be reduced to solving long-run average reward turn-based stochastic games with finite state and action spaces. This reduction allows us to derive several important consequences that were hitherto not known to hold for polytopic RMDPs. First, we derive new computational complexity bounds for solving long-run average reward polytopic RMDPs, showing for the first time that the threshold decision problem for them is in NP∩CONP and that they admit a randomized algorithm with sub-exponential expected runtime. Second, we present Robust Polytopic Policy Iteration (RPPI), a novel policy iteration algorithm for solving long-run average reward polytopic RMDPs. Our experimental evaluation shows that RPPI is much more efficient in solving long-run average reward polytopic RMDPs compared to state-of-the-art methods based on value iteration. },
  author       = {Chatterjee, Krishnendu and Kafshdar Goharshadi, Ehsan and Karrabi, Mehrdad and Novotný, Petr and Zikelic, Dorde},
  booktitle    = {33rd International Joint Conference on Artificial Intelligence},
  isbn         = {9781956792041},
  issn         = {1045-0823},
  location     = {Jeju, South Korea},
  pages        = {6707--6715},
  publisher    = {International Joint Conferences on Artificial Intelligence},
  title        = {{Solving long-run average reward robust MDPs via stochastic games}},
  doi          = {10.24963/ijcai.2024/741},
  year         = {2024},
}

@article{18266,
  abstract     = {Matrix games are the most basic model in game theory, and yet robustness with respect to small perturbations of the matrix entries is not fully understood. In this paper, we introduce value positivity and uniform value positivity, two properties that refine the notion of optimality in the context of polynomially perturbed matrix games. The first concept captures how the value depends on the perturbation parameter, and the second consists of the existence of a fixed strategy that guarantees the value of the unperturbed matrix game for every sufficiently small positive parameter. We provide polynomial-time algorithms to check whether a polynomially perturbed matrix game satisfies these properties. We further provide the functional form for a parameterized optimal strategy and the value function. Finally, we translate our results to linear programming and stochastic games, where value positivity is related to the existence of robust solutions.},
  author       = {Chatterjee, Krishnendu and Oliu-Barton, Miquel and Saona Urmeneta, Raimundo J},
  issn         = {1526-5471},
  journal      = {Mathematics of Operations Research},
  number       = {4},
  pages        = {2433--3282},
  publisher    = {Institute for Operations Research and the Management Sciences},
  title        = {{Value-positivity for matrix games}},
  doi          = {10.1287/moor.2022.0332},
  volume       = {50},
  year         = {2024},
}

@inproceedings{18600,
  abstract     = {The analysis of formal models that include quantitative aspects such as timing or probabilistic choices is performed by quantitative verification tools. Broad and mature tool support is available for computing basic properties such as expected rewards on basic models such as Markov chains. Previous editions of QComp, the comparison of tools for the analysis of quantitative formal models, focused on this setting. Many application scenarios, however, require more advanced property types such as LTL and parameter synthesis queries as well as advanced models like stochastic games and partially observable MDPs. For these, tool support is in its infancy today. This paper presents the outcomes of QComp 2023: a survey of the state of the art in quantitative verification tool support for advanced property types and models. With tools ranging from first research prototypes to well-supported integrations into established toolsets, this report highlights today’s active areas and tomorrow’s challenges in tool-focused research for quantitative verification.},
  author       = {Andriushchenko, Roman and Bork, Alexander and Budde, Carlos E. and Češka, Milan and Grover, Kush and Hahn, Ernst Moritz and Hartmanns, Arnd and Israelsen, Bryant and Jansen, Nils and Jeppson, Joshua and Junges, Sebastian and Köhl, Maximilian A. and Könighofer, Bettina and Kretinsky, Jan and Meggendorfer, Tobias and Parker, David and Pranger, Stefan and Quatmann, Tim and Ruijters, Enno and Taylor, Landon and Volk, Matthias and Weininger, Maximilian and Zhang, Zhen},
  booktitle    = {TOOLympics Challenge 2023},
  isbn         = {9783031676949},
  issn         = {1611-3349},
  pages        = {90--146},
  publisher    = {Springer Nature},
  title        = {{Tools at the Frontiers of Quantitative Verification: QComp 2023 Competition Report}},
  doi          = {10.1007/978-3-031-67695-6_4},
  volume       = {14550},
  year         = {2024},
}

@article{18630,
  abstract     = {Markov chains are the de facto finite-state model for stochastic dynamical systems, and Markov decision processes (MDPs) extend Markov chains by incorporating non-deterministic behaviors. Given an MDP and rewards on states, a classical optimization criterion is the maximal expected total reward where the MDP stops after T steps, which can be computed by a simple dynamic programming algorithm. We consider a natural generalization of the problem where the stopping times can be chosen according to a probability distribution, such that the expected stopping time is T, to optimize the expected total reward. Quite surprisingly we establish inter-reducibility of the expected stopping-time problem for Markov chains with the Positivity problem (which is related to the well-known Skolem problem), for which establishing either decidability or undecidability would be a major breakthrough. Given the hardness of the exact problem, we consider the approximate version of the problem: we show that it can be solved in exponential time for Markov chains and in exponential space for MDPs.},
  author       = {Chatterjee, Krishnendu and Doyen, Laurent},
  issn         = {1860-5974},
  journal      = {Logical Methods in Computer Science},
  number       = {4},
  pages        = {11:1--11:34},
  publisher    = {EPI Sciences},
  title        = {{Stochastic processes with expected stopping time}},
  doi          = {10.46298/lmcs-20(4:11)2024},
  volume       = {20},
  year         = {2024},
}

@article{18703,
  abstract     = {Spatial games provide a simple and elegant mathematical model to study the evolution of cooperation in networks. In spatial games, individuals reside in vertices, adopt simple strategies, and interact with neighbors to receive a payoff. Depending on their own and neighbors’ payoffs, individuals can change their strategy. The payoff is determined by the Prisoners’ Dilemma, a classical matrix game, where players cooperate or defect. While cooperation is the desired behavior, defection provides a higher payoff for a selfish individual. There are many theoretical and empirical studies related to the role of the network in the evolution of cooperation. However, the fundamental question of whether there exist networks that for low initial cooperation rate ensure a high chance of fixation, i.e., cooperation spreads across the whole population, has remained elusive for spatial games with strong selection. In this work, we answer this fundamental question in the affirmative by presenting network structures that ensure high fixation probability for cooperators in the strong selection regime. Besides, our structures have many desirable properties: (a) they ensure the spread of cooperation even for a low initial density of cooperation and high temptation of defection, (b) they have constant degrees, and (c) the number of steps, until cooperation spreads, is at most quadratic in the size of the network.},
  author       = {Svoboda, Jakub and Chatterjee, Krishnendu},
  issn         = {1091-6490},
  journal      = {Proceedings of the National Academy of Sciences of the United States of America},
  number       = {50},
  publisher    = {National Academy of Sciences},
  title        = {{Density amplifiers of cooperation for spatial games}},
  doi          = {10.1073/pnas.2405605121},
  volume       = {121},
  year         = {2024},
}

@inproceedings{18925,
  abstract     = {Given the increasingly stringent requirements on the performance and efficiency of communication networks, over the last years, great efforts have been made to render networks more flexible and programmable. In particular, modern networks support a flexible rerouting of flows, e.g., depending on the dynamically changing traffic or network conditions. However, the underlying algorithmic problems are still not well-understood today.In this paper, we revisit the k-Network Flow Update problem that asks for a schedule to reroute k unsplittable flows from their current paths to the given new paths, in a congestion-free manner in a capacitated network. We show that the problem is already NP-hard for three acyclic flows on simple directed graphs. Our main contribution is an efficient algorithm for sparse networks; specifically the algorithm is fixed parameter tractable in the number of flows and the treewidth of a graph that is the union of all flows. Our results also settle the open complexity question in the literature.},
  author       = {Ceylan, Esra and Chatterjee, Krishnendu and Schmid, Stefan and Svoboda, Jakub},
  booktitle    = {NOMS 2024-2024 IEEE Network Operations and Management Symposium},
  isbn         = {9798350327946},
  issn         = {2374-9709},
  location     = {Seoul, Republic of Korea},
  publisher    = {IEEE},
  title        = {{Congestion-free rerouting of network flows: Hardness and an FPT algorithm}},
  doi          = {10.1109/noms59830.2024.10575579},
  year         = {2024},
}

@inproceedings{18974,
  abstract     = {Reinforcement Learning (RL) from temporal logical specifications is a fundamental problem in sequential decision making. One of the basic and core such specification is the reachability specification that requires a target set to be eventually visited. Despite strong empirical results for RL from such specifications, the theoretical guarantees are bleak, including the impossibility of Probably Approximately Correct (PAC) guarantee for reachability specifications. Given the impossibility result, in this work we consider the problem of RL from reachability specifications along with the information of expected conditional distance (ECD). We present (a) lower bound results which establish the necessity of ECD information for PAC guarantees and (b) an algorithm that establishes PAC-guarantees given the ECD information. To the best of our knowledge, this is the first RL from reachability specifications that does not make any assumptions on the underlying environment to learn policies.},
  author       = {Svoboda, Jakub and Bansal, Suguman and Chatterjee, Krishnendu},
  booktitle    = {41st International Conference on Machine Learning},
  location     = {Vienna, Austria},
  pages        = {47331--47344},
  publisher    = {ML Research Press},
  title        = {{Reinforcement learning from reachability specifications: PAC guarantees with expected conditional distance}},
  volume       = {235},
  year         = {2024},
}

@article{14820,
  abstract     = {We consider a natural problem dealing with weighted packet selection across a rechargeable link, which e.g., finds applications in cryptocurrency networks. The capacity of a link (u, v) is determined by how many nodes u and v allocate for this link. Specifically, the input is a finite ordered sequence of packets that arrive in both directions along a link. Given (u, v) and a packet of weight x going from u to v, node u can either accept or reject the packet. If u accepts the packet, the capacity on link (u, v) decreases by x. Correspondingly, v's capacity on 
 increases by x. If a node rejects the packet, this will entail a cost affinely linear in the weight of the packet. A link is “rechargeable” in the sense that the total capacity of the link has to remain constant, but the allocation of capacity at the ends of the link can depend arbitrarily on the nodes' decisions. The goal is to minimise the sum of the capacity injected into the link and the cost of rejecting packets. We show that the problem is NP-hard, but can be approximated efficiently with a ratio of (1+E) . (1+3)  for some arbitrary E>0.},
  author       = {Schmid, Stefan and Svoboda, Jakub and Yeo, Michelle X},
  issn         = {0304-3975},
  journal      = {Theoretical Computer Science},
  keywords     = {General Computer Science, Theoretical Computer Science},
  publisher    = {Elsevier},
  title        = {{Weighted packet selection for rechargeable links in cryptocurrency networks: Complexity and approximation}},
  doi          = {10.1016/j.tcs.2023.114353},
  volume       = {989},
  year         = {2024},
}

@inproceedings{15006,
  abstract     = {Graphical games are a useful framework for modeling the interactions of (selfish) agents who are connected via an underlying topology and whose behaviors influence each other. They have wide applications ranging from computer science to economics and biology. Yet, even though an agent’s payoff only depends on the actions of their direct neighbors in graphical games, computing the Nash equilibria and making statements about the convergence time of "natural" local dynamics in particular can be highly challenging. In this work, we present a novel approach for classifying complexity of Nash equilibria in graphical games by establishing a connection to local graph algorithms, a subfield of distributed computing. In particular, we make the observation that the equilibria of graphical games are equivalent to locally verifiable labelings (LVL) in graphs; vertex labelings which are verifiable with constant-round local algorithms. This connection allows us to derive novel lower bounds on the convergence time to equilibrium of best-response dynamics in graphical games. Since we establish that distributed convergence can sometimes be provably slow, we also introduce and give bounds on an intuitive notion of "time-constrained" inefficiency of best responses. We exemplify how our results can be used in the implementation of mechanisms that ensure convergence of best responses to a Nash equilibrium. Our results thus also give insight into the convergence of strategy-proof algorithms for graphical games, which is still not well understood.},
  author       = {Hirvonen, Juho and Schmid, Laura and Chatterjee, Krishnendu and Schmid, Stefan},
  booktitle    = {27th International Conference on Principles of Distributed Systems},
  isbn         = {9783959773089},
  issn         = {1868-8969},
  location     = {Tokyo, Japan},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{On the convergence time in graphical games: A locality-sensitive approach}},
  doi          = {10.4230/LIPIcs.OPODIS.2023.11},
  volume       = {286},
  year         = {2024},
}

@article{15083,
  abstract     = {Direct reciprocity is a powerful mechanism for cooperation in social dilemmas. The very logic of reciprocity, however, seems to require that individuals are symmetric, and that everyone has the same means to influence each others’ payoffs. Yet in many applications, individuals are asymmetric. Herein, we study the effect of asymmetry in linear public good games. Individuals may differ in their endowments (their ability to contribute to a public good) and in their productivities (how effective their contributions are). Given the individuals’ productivities, we ask which allocation of endowments is optimal for cooperation. To this end, we consider two notions of optimality. The first notion focuses on the resilience of cooperation. The respective endowment distribution ensures that full cooperation is feasible even under the most adverse conditions. The second notion focuses on efficiency. The corresponding endowment distribution maximizes group welfare. Using analytical methods, we fully characterize these two endowment distributions. This analysis reveals that both optimality notions favor some endowment inequality: More productive players ought to get higher endowments. Yet the two notions disagree on how unequal endowments are supposed to be. A focus on resilience results in less inequality. With additional simulations, we show that the optimal endowment allocation needs to account for both the resilience and the efficiency of cooperation.},
  author       = {Hübner, Valentin and Staab, Manuel and Hilbe, Christian and Chatterjee, Krishnendu and Kleshnina, Maria},
  issn         = {1091-6490},
  journal      = {Proceedings of the National Academy of Sciences of the United States of America},
  number       = {10},
  publisher    = {National Academy of Sciences},
  title        = {{Efficiency and resilience of cooperation in asymmetric social dilemmas}},
  doi          = {10.1073/pnas.2315558121},
  volume       = {121},
  year         = {2024},
}

@misc{15108,
  abstract     = {in the research article "Efficiency and resilience of cooperation in asymmetric social dilemmas" (by Valentin Hübner, Manuel Staab, Christian Hilbe, Krishnendu Chatterjee, and Maria Kleshnina).

We used different implementations for the case of two and three players, both described below.},
  author       = {Hübner, Valentin and Kleshnina, Maria},
  publisher    = {Zenodo},
  title        = {{Computer code for "Efficiency and resilience of cooperation in asymmetric social dilemmas"}},
  doi          = {10.5281/ZENODO.10639167},
  year         = {2024},
}

@article{15297,
  abstract     = {Populations evolve by accumulating advantageous mutations. Every population has some spatial structure that can be modeled by an underlying network. The network then influences the probability that new advantageous mutations fixate. Amplifiers of selection are networks that increase the fixation probability of advantageous mutants, as compared to the unstructured fully-connected network. Whether or not a network is an amplifier depends on the choice of the random process that governs the evolutionary dynamics. Two popular choices are Moran process with Birth-death updating and Moran process with death-Birth updating. Interestingly, while some networks are amplifiers under Birth-death updating and other networks are amplifiers under death-Birth updating, so far no spatial structures have been found that function as an amplifier under both types of updating simultaneously. In this work, we identify networks that act as amplifiers of selection under both versions of the Moran process. The amplifiers are robust, modular, and increase fixation probability for any mutant fitness advantage in a range r ∈ (1, 1.2). To complement this positive result, we also prove that for certain quantities closely related to fixation probability, it is impossible to improve them simultaneously for both versions of the Moran process. Together, our results highlight how the two versions of the Moran process differ and what they have in common.},
  author       = {Svoboda, Jakub and Joshi, Soham Shrikant and Tkadlec, Josef and Chatterjee, Krishnendu},
  issn         = {1553-7358},
  journal      = {PLoS Computational Biology},
  number       = {3},
  publisher    = {Public Library of Science},
  title        = {{Amplifiers of selection for the Moran process with both Birth-death and death-Birth updating}},
  doi          = {10.1371/journal.pcbi.1012008},
  volume       = {20},
  year         = {2024},
}

@article{12738,
  abstract     = {We study turn-based stochastic zero-sum games with lexicographic preferences over objectives. Stochastic games are standard models in control, verification, and synthesis of stochastic reactive systems that exhibit both randomness as well as controllable and adversarial non-determinism. Lexicographic order allows one to consider multiple objectives with a strict preference order. To the best of our knowledge, stochastic games with lexicographic objectives have not been studied before. For a mixture of reachability and safety objectives, we show that deterministic lexicographically optimal strategies exist and memory is only required to remember the already satisfied and violated objectives. For a constant number of objectives, we show that the relevant decision problem is in NP∩coNP, matching the current known bound for single objectives; and in general the decision problem is PSPACE-hard and can be solved in NEXPTIME∩coNEXPTIME. We present an algorithm that computes the lexicographically optimal strategies via a reduction to the computation of optimal strategies in a sequence of single-objectives games. For omega-regular objectives, we restrict our analysis to one-player games, also known as Markov decision processes. We show that lexicographically optimal strategies exist and need either randomization or finite memory. We present an algorithm that solves the relevant decision problem in polynomial time. We have implemented our algorithms and report experimental results on various case studies.},
  author       = {Chatterjee, Krishnendu and Katoen, Joost P and Mohr, Stefanie and Weininger, Maximilian and Winkler, Tobias},
  issn         = {1572-8102},
  journal      = {Formal Methods in System Design},
  pages        = {40--80},
  publisher    = {Springer Nature},
  title        = {{Stochastic games with lexicographic objectives}},
  doi          = {10.1007/s10703-023-00411-4},
  volume       = {63},
  year         = {2024},
}

@inproceedings{17098,
  abstract     = {Turn-based discounted-sum games are two-player zero-sum games played on finite directed graphs. The vertices of the graph are partitioned between player 1 and player 2. Plays are infinite walks on the graph where the next vertex is decided by a player that owns the current vertex. Each edge is assigned an integer weight and the payoff of a play is the discounted-sum of the weights of the play. The goal of player 1 is to maximize the discounted-sum payoff against the adversarial player 2. These games lie in NP ∩ coNP and are among the rare combinatorial problems that belong to this complexity class and the existence of a polynomial-time algorithm is a major open question. Since breaking the general exponential barrier has been a challenging problem, faster parameterized algorithms have been considered. If the discount factor is expressed in unary, then discounted-sum games can be solved in polynomial time. However, if the discount factor is arbitrary (or expressed in binary), but the weights are in unary, none of the existing approaches yield a sub-exponential bound. Our main result is a new analysis technique for a classical algorithm (namely, the strategy iteration algorithm) that present a new runtime bound which is [EQUATION] for game graphs with n vertices and absolute weights of at most W. In particular, our result yields a deterministic sub-exponential bound for games with weights that are constant or represented in unary.},
  author       = {Asadi, Ali and Chatterjee, Krishnendu and Svoboda, Jakub and Saona Urmeneta, Raimundo J},
  booktitle    = {39th Annual ACM/IEEE Symposium on Logic in Computer Science},
  isbn         = {9798400706608},
  issn         = {1043-6871},
  location     = {Tallinn, Estonia},
  publisher    = {Association for Computing Machinery},
  title        = {{Deterministic sub-exponential algorithm for discounted-sum games with unary weights}},
  doi          = {10.1145/3661814.3662080},
  year         = {2024},
}

@inproceedings{17099,
  abstract     = {We study two-player zero-sum concurrent stochastic games with finite state and action space played for an infinite number of steps. In every step, the two players simultaneously and independently choose an action. Given the current state and the chosen actions, the next state is obtained according to a stochastic transition function. An objective is a measurable function on plays (or infinite trajectories) of the game, and the value for an objective is the maximal expectation that the player can guarantee against the adversarial player. We consider: (a) stateful-discounted objectives, which are similar to the classical discounted-sum objectives, but states are associated with different discount factors rather than a single discount factor; and (b) parity objectives, which are a canonical representation for ω-regular objectives. For stateful-discounted objectives, given an ordering of the discount factors, the limit value is the limit of the value of the stateful-discounted objectives, as the discount factors approach zero according to the given order.
The computational problem we consider is the approximation of the value within an arbitrary
additive error. The above problem is known to be in EXPSPACE for the limit value of statefuldiscounted objectives and in PSPACE for parity objectives. The best-known algorithms for both the above problems are at least exponential time, with an exponential dependence on the number of states and actions. Our main results for the value approximation problem for the limit value of stateful-discounted objectives and parity objectives are as follows: (a) we establish TFNP[NP] complexity; and (b) we present algorithms that improve the dependency on the number of actions in the exponent from linear to logarithmic. In particular, if the number of states is constant, our algorithms run in polynomial time.},
  author       = {Asadi, Ali and Chatterjee, Krishnendu and Saona Urmeneta, Raimundo J and Svoboda, Jakub},
  booktitle    = {44th IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science},
  isbn         = {9783959773553},
  issn         = {1868-8969},
  location     = {Gujarat, India},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Concurrent stochastic games with stateful-discounted and parity objectives: Complexity and algorithms}},
  doi          = {10.4230/LIPIcs.FSTTCS.2024.5},
  volume       = {323},
  year         = {2024},
}

