@inproceedings{20225,
  abstract     = {We present the first supermartingale certificate for quantitative 
-regular properties of discrete-time infinite-state stochastic systems. Our certificate is defined on the product of the stochastic system and a limit-deterministic Büchi automaton that specifies the property of interest; hence we call it a limit-deterministic Büchi supermartingale (LDBSM). Previously known supermartingale certificates applied only to quantitative reachability, safety, or reach-avoid properties, and to qualitative (i.e., probability 1) 
-regular properties.We also present fully automated algorithms for the template-based synthesis of LDBSMs, for the case when the stochastic system dynamics and the controller can be represented in terms of polynomial inequalities. Our experiments demonstrate the ability of our method to solve verification and control tasks for stochastic systems that were beyond the reach of previous supermartingale-based approaches.},
  author       = {Henzinger, Thomas A and Mallik, Kaushik and Sadeghi, Pouya and Zikelic, Dorde},
  booktitle    = {37th International Conference on Computer Aided Verification},
  isbn         = {9783031986789},
  issn         = {1611-3349},
  location     = {Zagreb, Croatia},
  pages        = {29--55},
  publisher    = {Springer Nature},
  title        = {{Supermartingale certificates for quantitative omega-regular verification and control}},
  doi          = {10.1007/978-3-031-98679-6_2},
  volume       = {15932},
  year         = {2025},
}

@inproceedings{20256,
  abstract     = {We study the problem of predictive runtime monitoring of black-box dynamical systems with quantitative safety properties. The black-box setting stipulates that the exact semantics of the dynamical system and the controller are unknown, and that we are only able to observe the state of the controlled (aka, closed-loop) system at finitely many time points. We present a novel framework for predicting future states of the system based on the states observed in the past. The numbers of past states and of predicted future states are parameters provided by the user. Our method is based on a combination of Taylor’s expansion and the backward difference operator for numerical differentiation. We also derive an upper bound on the prediction error under the assumption that the system dynamics and the controller are smooth. The predicted states are then used to predict safety violations ahead in time. Our experiments demonstrate practical applicability of our method for complex black-box systems, showing that it is computationally lightweight and yet significantly more accurate than the state-of-the-art predictive safety monitoring techniques.},
  author       = {Henzinger, Thomas A and Kresse, Fabian and Mallik, Kaushik and Yu, Zhengqi and Zikelic, Dorde},
  booktitle    = {7th Annual Learning for Dynamics & Control Conference},
  issn         = {2640-3498},
  location     = {Ann Arbor, MI, United States},
  pages        = {804--816},
  publisher    = {ML Research Press},
  title        = {{Predictive monitoring of black-box dynamical systems}},
  volume       = {283},
  year         = {2025},
}

@inproceedings{20648,
  abstract     = {Polynomial quantified entailments with existentially and universally quantified variables arise in many problems of verification and program analysis. We present PolyQEnt which is a tool for solving polynomial quantified entailments in which variables on both sides of the implication are real valued or unbounded integers. Our tool provides a unified framework for polynomial quantified entailment problems that arise in several papers in the literature. Our experimental evaluation over a wide range of benchmarks shows the applicability of the tool as well as its benefits as opposed to simply using existing SMT solvers to solve such constraints.},
  author       = {Chatterjee, Krishnendu and Goharshady, Amir Kafshdar and Kafshdar Goharshadi, Ehsan and Karrabi, Mehrdad and Saadat, Milad and Seeliger, Maximilian and Zikelic, Dorde},
  booktitle    = {23rd International Symposium on Automated Technology for Verification and Analysis},
  isbn         = {9783032087065},
  issn         = {1611-3349},
  location     = {Bengaluru, India},
  pages        = {411--424},
  publisher    = {Springer Nature},
  title        = {{PolyQEnt: A polynomial quantified entailment solver}},
  doi          = {10.1007/978-3-032-08707-2_19},
  volume       = {16145},
  year         = {2025},
}

@inproceedings{19667,
  abstract     = {The problem of checking satisfiability of linear real arithmetic (LRA) and non-linear real arithmetic (NRA) formulas has broad applications, in particular, they are at the heart of logic-related applications such as logic for artificial intelligence, program analysis, etc. While there has been much work on checking satisfiability of unquantified LRA and NRA formulas, the problem of checking satisfiability of quantified LRA and NRA formulas remains a significant challenge. The main bottleneck in the existing methods is a computationally expensive quantifier elimination step. In this work, we propose a novel method for efficient quantifier elimination in quantified LRA and NRA formulas. We propose a template-based Skolemization approach, where we automatically synthesize linear/polynomial Skolem functions in order to eliminate quantifiers in the formula. The key technical ingredient in our approach are Positivstellensätze theorems from algebraic geometry, which allow for an efficient manipulation of polynomial inequalities. Our method offers a range of appealing theoretical properties combined with a strong practical performance. On the theory side, our method is sound, semi-complete, and runs in subexponential time and polynomial space, as opposed to existing sound and complete quantifier elimination methods that run in doubly-exponential time and at least exponential space. On the practical side, our experiments show superior performance compared to state of the art SMT solvers in terms of the number of solved instances and runtime, both on LRA and on NRA benchmarks.},
  author       = {Chatterjee, Krishnendu and Kafshdar Goharshadi, Ehsan and Karrabi, Mehrdad and Motwani, Harshit J. and Seeliger, Maximilian and Zikelic, Dorde},
  booktitle    = {Proceedings of the 39th AAAI Conference on Artificial Intelligence},
  issn         = {2374-3468},
  location     = {Philadelphia, PA, United States},
  number       = {11},
  pages        = {11158--11166},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Quantified linear and polynomial arithmetic satisfiability via template-based skolemization}},
  doi          = {10.1609/aaai.v39i11.33213},
  volume       = {39},
  year         = {2025},
}

@inproceedings{19668,
  abstract     = {Learning-based methods provide a promising approach to solving highly non-linear control tasks that are often challenging for classical control methods. To ensure the satisfaction of a safety property, learning-based methods jointly learn a control policy together with a certificate function for the property. Popular examples include barrier functions for safety and Lyapunov functions for asymptotic stability. While there has been significant progress on learning-based control with certificate functions in the white-box setting, where the correctness of the certificate function can be formally verified, there has been little work on ensuring their reliability in the black-box setting where the system dynamics are unknown. In this work, we consider the problems of certifying and repairing neural network control policies and certificate functions in the black-box setting. We propose a novel framework that utilizes runtime monitoring to detect system behaviors that violate the property of interest under some initially trained neural network policy and certificate. These violating behaviors are used to extract new training data, that is used to re-train the neural network policy and the certificate function and to ultimately repair them. We demonstrate the effectiveness of our approach empirically by using it to repair and to boost the safety rate of neural network policies learned by a state-of-the-art method for learning-based control on two autonomous system control tasks.},
  author       = {Yu, Zhengqi and Zikelic, Dorde and Henzinger, Thomas A},
  booktitle    = {Proceedings of the 39th AAAI Conference on Artificial Intelligence},
  issn         = {2374-3468},
  location     = {Philadelphia, PA, United States},
  number       = {25},
  pages        = {26409--26417},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Neural control and certificate repair via runtime monitoring}},
  doi          = {10.1609/aaai.v39i25.34840},
  volume       = {39},
  year         = {2025},
}

@inproceedings{19744,
  abstract     = {We consider the problem of refuting equivalence of probabilistic programs, i.e., the problem of proving that two probabilistic programs induce different output distributions. We study this problem in the context of programs with conditioning (i.e., with observe and score statements), where the output distribution is conditioned by the event that all the observe statements along a run evaluate to true, and where the probability densities of different runs may be updated via the score statements. Building on a recent work on programs without conditioning, we present a new equivalence refutation method for programs with conditioning. Our method is based on weighted restarting, a novel transformation of probabilistic programs with conditioning to the output equivalent probabilistic programs without conditioning that we introduce in this work. Our method is the first to be both a) fully automated, and b) providing provably correct answers. We demonstrate the applicability of our method on a set of programs from the probabilistic inference literature.},
  author       = {Chatterjee, Krishnendu and Kafshdar Goharshadi, Ehsan and Novotný, Petr and Zikelic, Dorde},
  booktitle    = {31st International Conference on Tools and Algorithms for the Construction and Analysis of Systems},
  isbn         = {9783031906527},
  issn         = {1611-3349},
  location     = {Hamilton, ON, Canada},
  pages        = {279--300},
  publisher    = {Springer Nature},
  title        = {{Refuting equivalence in probabilistic programs with conditioning}},
  doi          = {10.1007/978-3-031-90653-4_14},
  volume       = {15697},
  year         = {2025},
}

@inproceedings{18155,
  abstract     = {We study the classical problem of verifying programs with respect to formal specifications given in the linear temporal logic (LTL). We first present novel sound and complete witnesses for LTL verification over imperative programs. Our witnesses are applicable to both verification (proving) and refutation (finding bugs) settings. We then consider LTL formulas in which atomic propositions can be polynomial constraints and turn our focus to polynomial arithmetic programs, i.e. programs in which every assignment and guard consists only of polynomial expressions. For this setting, we provide an efficient algorithm to automatically synthesize such LTL witnesses. Our synthesis procedure is both sound and semi-complete. Finally, we present experimental results demonstrating the effectiveness of our approach and that it can handle programs which were beyond the reach of previous state-of-the-art tools.},
  author       = {Chatterjee, Krishnendu and Goharshady, Amir Kafshdar and Goharshady, Ehsan and Karrabi, Mehrdad and Zikelic, Dorde},
  booktitle    = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
  isbn         = {9783031711619},
  issn         = {1611-3349},
  location     = {Milan, Italy},
  pages        = {600--619},
  publisher    = {Springer Nature},
  title        = {{Sound and complete witnesses for template-based verification of LTL properties on polynomial programs}},
  doi          = {10.1007/978-3-031-71162-6_31},
  volume       = {14933},
  year         = {2024},
}

@inproceedings{18159,
  abstract     = {Markov Decision Processes (MDPs) are a classical model for decision making in the presence of uncertainty. Often they are viewed as state transformers with planning objectives defned with respect to paths over MDP states. An increasingly
popular alternative is to view them as distribution transformers, giving rise to a sequence of probability distributions over MDP states. For instance, reachability and safety properties in modeling robot swarms or chemical reaction networks are naturally defned in terms of probability distributions over states. Verifying such distributional properties is known to be hard and often beyond the reach of classical state-based verifcation techniques. In this work, we consider the problems of certifed policy (i.e. controller) verifcation and synthesis in MDPs under distributional reach-avoidance specifcations. By certifed we mean that, along with a policy, we also aim to synthesize a (checkable) certifcate ensuring that the MDP indeed satisfes the property. Thus, given the target set of distributions and an unsafe set of distributions over MDP states, our goal is to either synthesize a certifcate for a given policy or synthesize a policy along with a certifcate, proving that the target distribution can be reached while avoiding unsafe distributions. To solve this problem, we introduce the novel notion of distributional reach-avoid certifcates and present automated procedures for (1) synthesizing a certifcate for a given policy, and (2) synthesizing a policy together with the certifcate, both providing formal guarantees on certifcate correctness. Our experimental evaluation demonstrates the ability of our method to solve several non-trivial examples, including a multi-agent robot-swarm model, to synthesize certifed policies and to certify existing policies. },
  author       = {Akshay, S and Chatterjee, Krishnendu and Meggendorfer, Tobias and Zikelic, Dorde},
  booktitle    = {Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence},
  isbn         = {9781956792041},
  issn         = {1045-0823},
  location     = {Jeju, Korea},
  pages        = {3--12},
  publisher    = {International Joint Conferences on Artificial Intelligence},
  title        = {{Certified policy verification and synthesis for MDPs under distributional reach-avoidance properties}},
  doi          = {10.24963/ijcai.2024/1},
  year         = {2024},
}

@inproceedings{18160,
  abstract     = {Markov decision processes (MDPs) provide a standard framework for sequential decision making under uncertainty. However, MDPs do not take uncertainty in transition probabilities into account. Robust Markov decision processes (RMDPs) address this shortcoming of MDPs by assigning to each transition an uncertainty set rather than a single probability value. In this work, we consider polytopic RMDPs in which all uncertainty sets are polytopes and study the problem of solving long-run average reward polytopic RMDPs. We present a novel perspective on this problem and show that it can be reduced to solving long-run average reward turn-based stochastic games with finite state and action spaces. This reduction allows us to derive several important consequences that were hitherto not known to hold for polytopic RMDPs. First, we derive new computational complexity bounds for solving long-run average reward polytopic RMDPs, showing for the first time that the threshold decision problem for them is in NP∩CONP and that they admit a randomized algorithm with sub-exponential expected runtime. Second, we present Robust Polytopic Policy Iteration (RPPI), a novel policy iteration algorithm for solving long-run average reward polytopic RMDPs. Our experimental evaluation shows that RPPI is much more efficient in solving long-run average reward polytopic RMDPs compared to state-of-the-art methods based on value iteration. },
  author       = {Chatterjee, Krishnendu and Kafshdar Goharshadi, Ehsan and Karrabi, Mehrdad and Novotný, Petr and Zikelic, Dorde},
  booktitle    = {33rd International Joint Conference on Artificial Intelligence},
  isbn         = {9781956792041},
  issn         = {1045-0823},
  location     = {Jeju, South Korea},
  pages        = {6707--6715},
  publisher    = {International Joint Conferences on Artificial Intelligence},
  title        = {{Solving long-run average reward robust MDPs via stochastic games}},
  doi          = {10.24963/ijcai.2024/741},
  year         = {2024},
}

@article{17162,
  abstract     = {Cost analysis, also known as resource usage analysis, is the task of finding bounds on the total cost of a program and is a well-studied problem in static analysis. In this work, we consider two classical quantitative problems in cost analysis for probabilistic programs. The first problem is to find a bound on the expected total cost of the program. This is a natural measure for the resource usage of the program and can also be directly applied to average-case runtime analysis. The second problem asks for a tail bound, i.e. ‍given a threshold t the goal is to find a probability bound p such that ℙ[total cost ≥ t] ≤ p. Intuitively, given a threshold t on the resource, the problem is to find the likelihood that the total cost exceeds this threshold.
First, for expectation bounds, a major obstacle in previous works on cost analysis is that they can handle only non-negative costs or bounded variable updates. In contrast, we provide a new variant of the standard notion of cost martingales, that allows us to find expectation bounds for a class of programs with general positive or negative costs and no restriction on the variable updates. More specifically, our approach is applicable as long as there is a lower bound on the total cost incurred along every path.
Second, for tail bounds, all previous methods are limited to programs in which the expected total cost is finite. In contrast, we present a novel approach, based on a combination of our martingale-based method for expectation bounds with a quantitative safety analysis, to obtain a solution to the tail bound problem that is applicable even to programs with infinite expected cost. Specifically, this allows us to obtain runtime tail bounds for programs that do not terminate almost-surely.
In summary, we provide a novel combination of martingale-based cost analysis and quantitative safety analysis that is able to find expectation and tail cost bounds for probabilistic programs, without the restrictions of non-negative costs, bounded updates, or finiteness of the expected total cost. Finally, we provide experimental results showcasing that our approach can solve instances that were beyond the reach of previous methods.},
  author       = {Chatterjee, Krishnendu and Goharshady, Amir Kafshdar and Meggendorfer, Tobias and Zikelic, Dorde},
  issn         = {2475-1421},
  journal      = {Proceedings of the ACM on Programming Languages},
  number       = {OOPSLA1},
  publisher    = {Association for Computing Machinery},
  title        = {{Quantitative bounds on resource usage of probabilistic programs}},
  doi          = {10.1145/3649824},
  volume       = {8},
  year         = {2024},
}

@article{17283,
  abstract     = {We consider the problems of statically refuting equivalence and similarity of output distributions defined by a pair of probabilistic programs. Equivalence and similarity are two fundamental relational properties of probabilistic programs that are essential for their correctness both in implementation and in compilation. In this work, we present a new method for static equivalence and similarity refutation. Our method refutes equivalence and similarity by computing a function over program outputs whose expected value with respect to the output distributions of two programs is different. The function is computed simultaneously with an upper expectation supermartingale and a lower expectation submartingale for the two programs, which we show to together provide a formal certificate for refuting equivalence and similarity. To the best of our knowledge, our method is the first approach to relational program analysis to offer the combination of the following desirable features: (1) it is fully automated, (2) it is applicable to infinite-state probabilistic programs, and (3) it provides formal guarantees on the correctness of its results. We implement a prototype of our method and our experiments demonstrate the effectiveness of our method to refute equivalence and similarity for a number of examples collected from the literature.},
  author       = {Chatterjee, Krishnendu and Kafshdar Goharshadi, Ehsan and Novotný, Petr and Zikelic, Dorde},
  issn         = {2475-1421},
  journal      = {Proceedings of the ACM on Programming Languages},
  publisher    = {Association for Computing Machinery},
  title        = {{Equivalence and similarity refutation for probabilistic programs}},
  doi          = {10.1145/3656462},
  volume       = {8},
  year         = {2024},
}

@inproceedings{17328,
  abstract     = {We study selfish mining attacks in longest-chain blockchains like Bitcoin, but where the proof of work is replaced with efficient proof systems - like proofs of stake or proofs of space - and consider the problem of computing an optimal selfish mining attack which maximizes expected relative revenue of the adversary, thus minimizing the chain quality. To this end, we propose a novel selfish mining attack that aims to maximize this objective and formally model the attack as a Markov decision process (MDP). We then present a formal analysis procedure which computes an ϵ-tight lower bound on the optimal expected relative revenue in the MDP and a strategy that achieves this ϵ-tight lower bound, where ϵ > 0 may be any specified precision. Our analysis is fully automated and provides formal guarantees on the correctness. We evaluate our selfish mining attack and observe that it achieves superior expected relative revenue compared to two considered baselines.
In concurrent work [Sarenche FC'24] does an automated analysis on selfish mining in predictable longest-chain blockchains based on efficient proof systems. Predictable means the randomness for the challenges is fixed for many blocks (as used e.g., in Ouroboros), while we consider unpredictable (Bitcoin-like) chains where the challenge is derived from the previous block.},
  author       = {Chatterjee, Krishnendu and Ebrahimzadeh, Amirali and Karrabi, Mehrdad and Pietrzak, Krzysztof Z and Yeo, Michelle X and Zikelic, Dorde},
  booktitle    = { Proceedings of the 43rd Annual ACM Symposium on Principles of Distributed Computing},
  isbn         = {9798400706684},
  location     = {Nantes, France},
  pages        = {268--278},
  publisher    = {Association for Computing Machinery},
  title        = {{Fully automated selfish mining analysis in efficient proof systems blockchains}},
  doi          = {10.1145/3662158.3662769},
  year         = {2024},
}

@inproceedings{14242,
  abstract     = {We study the problem of training and certifying adversarially robust quantized neural networks (QNNs). Quantization is a technique for making neural networks more efficient by running them using low-bit integer arithmetic and is therefore commonly adopted in industry. Recent work has shown that floating-point neural networks that have been verified to be robust can become vulnerable to adversarial attacks after quantization, and certification of the quantized representation is necessary to guarantee robustness. In this work, we present quantization-aware interval bound propagation (QA-IBP), a novel method for training robust QNNs. Inspired by advances in robust learning of non-quantized networks, our training algorithm computes the gradient of an abstract representation of the actual network. Unlike existing approaches, our method can handle the discrete semantics of QNNs. Based on QA-IBP, we also develop a complete verification procedure for verifying the adversarial robustness of QNNs, which is guaranteed to terminate and produce a correct answer. Compared to existing approaches, the key advantage of our verification procedure is that it runs entirely on GPU or other accelerator devices. We demonstrate experimentally that our approach significantly outperforms existing methods and establish the new state-of-the-art for training and certifying the robustness of QNNs.},
  author       = {Lechner, Mathias and Zikelic, Dorde and Chatterjee, Krishnendu and Henzinger, Thomas A and Rus, Daniela},
  booktitle    = {Proceedings of the 37th AAAI Conference on Artificial Intelligence},
  isbn         = {9781577358800},
  location     = {Washington, DC, United States},
  number       = {12},
  pages        = {14964--14973},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Quantization-aware interval bound propagation for training certifiably robust quantized neural networks}},
  doi          = {10.1609/aaai.v37i12.26747},
  volume       = {37},
  year         = {2023},
}

@inproceedings{14243,
  abstract     = {Two-player zero-sum "graph games" are central in logic, verification, and multi-agent systems. The game proceeds by placing a token on a vertex of a graph, and allowing the players to move it to produce an infinite path, which determines the winner or payoff of the game. Traditionally, the players alternate turns in moving the token. In "bidding games", however, the players have budgets and in each turn, an auction (bidding) determines which player moves the token. So far, bidding games have only been studied as full-information games. In this work we initiate the study of partial-information bidding games: we study bidding games in which a player's initial budget is drawn from a known probability distribution. We show that while for some bidding mechanisms and objectives, it is straightforward to adapt the results from the full-information setting to the partial-information setting, for others, the analysis is significantly more challenging, requires new techniques, and gives rise to interesting results. Specifically, we study games with "mean-payoff" objectives in combination with "poorman" bidding. We construct optimal strategies for a partially-informed player who plays against a fully-informed adversary. We show that, somewhat surprisingly, the "value" under pure strategies does not necessarily exist in such games.},
  author       = {Avni, Guy and Jecker, Ismael R and Zikelic, Dorde},
  booktitle    = {Proceedings of the 37th AAAI Conference on Artificial Intelligence},
  isbn         = {9781577358800},
  location     = {Washington, DC, United States},
  number       = {5},
  pages        = {5464--5471},
  title        = {{Bidding graph games with partially-observable budgets}},
  doi          = {10.1609/aaai.v37i5.25679},
  volume       = {37},
  year         = {2023},
}

@inproceedings{14317,
  abstract     = {Markov decision processes can be viewed as transformers of probability distributions. While this view is useful from a practical standpoint to reason about trajectories of distributions, basic reachability and safety problems are known to be computationally intractable (i.e., Skolem-hard) to solve in such models. Further, we show that even for simple examples of MDPs, strategies for safety objectives over distributions can require infinite memory and randomization.
In light of this, we present a novel overapproximation approach to synthesize strategies in an MDP, such that a safety objective over the distributions is met. More precisely, we develop a new framework for template-based synthesis of certificates as affine distributional and inductive invariants for safety objectives in MDPs. We provide two algorithms within this framework. One can only synthesize memoryless strategies, but has relative completeness guarantees, while the other can synthesize general strategies. The runtime complexity of both algorithms is in PSPACE. We implement these algorithms and show that they can solve several non-trivial examples.},
  author       = {Akshay, S. and Chatterjee, Krishnendu and Meggendorfer, Tobias and Zikelic, Dorde},
  booktitle    = {International Conference on Computer Aided Verification},
  isbn         = {9783031377082},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {86--112},
  publisher    = {Springer Nature},
  title        = {{MDPs as distribution transformers: Affine invariant synthesis for safety objectives}},
  doi          = {10.1007/978-3-031-37709-9_5},
  volume       = {13966},
  year         = {2023},
}

@inproceedings{14518,
  abstract     = {We consider bidding games, a class of two-player zero-sum graph games. The game proceeds as follows. Both players have bounded budgets. A token is placed on a vertex of a graph, in each turn the players simultaneously submit bids, and the higher bidder moves the token, where we break bidding ties in favor of Player 1. Player 1 wins the game iff the token visits a designated target vertex. We consider, for the first time, poorman discrete-bidding in which the granularity of the bids is restricted and the higher bid is paid to the bank. Previous work either did not impose granularity restrictions or considered Richman bidding (bids are paid to the opponent). While the latter mechanisms are technically more accessible, the former is more appealing from a practical standpoint. Our study focuses on threshold budgets, which is the necessary and sufficient initial budget required for Player 1 to ensure winning against a given Player 2 budget. We first show existence of thresholds. In DAGs, we show that threshold budgets can be approximated with error bounds by thresholds under continuous-bidding and that they exhibit a periodic behavior. We identify closed-form solutions in special cases. We implement and experiment with an algorithm to find threshold budgets.},
  author       = {Avni, Guy and Meggendorfer, Tobias and Sadhukhan, Suman and Tkadlec, Josef and Zikelic, Dorde},
  booktitle    = {Frontiers in Artificial Intelligence and Applications},
  isbn         = {9781643684369},
  issn         = {0922-6389},
  location     = {Krakow, Poland},
  pages        = {141--148},
  publisher    = {IOS Press},
  title        = {{Reachability poorman discrete-bidding games}},
  doi          = {10.3233/FAIA230264},
  volume       = {372},
  year         = {2023},
}

@inproceedings{14559,
  abstract     = {We consider the problem of learning control policies in discrete-time stochastic systems which guarantee that the system stabilizes within some specified stabilization region with probability 1. Our approach is based on the novel notion of stabilizing ranking supermartingales (sRSMs) that we introduce in this work. Our sRSMs overcome the limitation of methods proposed in previous works whose applicability is restricted to systems in which the stabilizing region cannot be left once entered under any control policy. We present a learning procedure that learns a control policy together with an sRSM that formally certifies probability 1 stability, both learned as neural networks. We show that this procedure can also be adapted to formally verifying that, under a given Lipschitz continuous control policy, the stochastic system stabilizes within some stabilizing region with probability 1. Our experimental evaluation shows that our learning procedure can successfully learn provably stabilizing policies in practice.},
  author       = {Ansaripour, Matin and Chatterjee, Krishnendu and Henzinger, Thomas A and Lechner, Mathias and Zikelic, Dorde},
  booktitle    = {21st International Symposium on Automated Technology for Verification and Analysis},
  isbn         = {9783031453281},
  issn         = {1611-3349},
  location     = {Singapore, Singapore},
  pages        = {357--379},
  publisher    = {Springer Nature},
  title        = {{Learning provably stabilizing neural controllers for discrete-time stochastic systems}},
  doi          = {10.1007/978-3-031-45329-8_17},
  volume       = {14215},
  year         = {2023},
}

@inproceedings{13142,
  abstract     = {Reinforcement learning has received much attention for learning controllers of deterministic systems. We consider a learner-verifier framework for stochastic control systems and survey recent methods that formally guarantee a conjunction of reachability and safety properties. Given a property and a lower bound on the probability of the property being satisfied, our framework jointly learns a control policy and a formal certificate to ensure the satisfaction of the property with a desired probability threshold. Both the control policy and the formal certificate are continuous functions from states to reals, which are learned as parameterized neural networks. While in the deterministic case, the certificates are invariant and barrier functions for safety, or Lyapunov and ranking functions for liveness, in the stochastic case the certificates are supermartingales. For certificate verification, we use interval arithmetic abstract interpretation to bound the expected values of neural network functions.},
  author       = {Chatterjee, Krishnendu and Henzinger, Thomas A and Lechner, Mathias and Zikelic, Dorde},
  booktitle    = {Tools and Algorithms for the Construction and Analysis of Systems },
  isbn         = {9783031308222},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {3--25},
  publisher    = {Springer Nature},
  title        = {{A learner-verifier framework for neural network controllers and certificates of stochastic systems}},
  doi          = {10.1007/978-3-031-30823-9_1},
  volume       = {13993},
  year         = {2023},
}

@article{14778,
  abstract     = {We consider the almost-sure (a.s.) termination problem for probabilistic programs, which are a stochastic extension of classical imperative programs. Lexicographic ranking functions provide a sound and practical approach for termination of non-probabilistic programs, and their extension to probabilistic programs is achieved via lexicographic ranking supermartingales (LexRSMs). However, LexRSMs introduced in the previous work have a limitation that impedes their automation: all of their components have to be non-negative in all reachable states. This might result in a LexRSM not existing even for simple terminating programs. Our contributions are twofold. First, we introduce a generalization of LexRSMs that allows for some components to be negative. This standard feature of non-probabilistic termination proofs was hitherto not known to be sound in the probabilistic setting, as the soundness proof requires a careful analysis of the underlying stochastic process. Second, we present polynomial-time algorithms using our generalized LexRSMs for proving a.s. termination in broad classes of linear-arithmetic programs.},
  author       = {Chatterjee, Krishnendu and Kafshdar Goharshady, Ehsan and Novotný, Petr and Zárevúcky, Jiří and Zikelic, Dorde},
  issn         = {1433-299X},
  journal      = {Formal Aspects of Computing},
  keywords     = {Theoretical Computer Science, Software},
  number       = {2},
  publisher    = {Association for Computing Machinery},
  title        = {{On lexicographic proof rules for probabilistic termination}},
  doi          = {10.1145/3585391},
  volume       = {35},
  year         = {2023},
}

@inproceedings{14830,
  abstract     = {We study the problem of learning controllers for discrete-time non-linear stochastic dynamical systems with formal reach-avoid guarantees. This work presents the first method for providing formal reach-avoid guarantees, which combine and generalize stability and safety guarantees, with a tolerable probability threshold p in [0,1] over the infinite time horizon. Our method leverages advances in machine learning literature and it represents formal certificates as neural networks. In particular, we learn a certificate in the form of a reach-avoid supermartingale (RASM), a novel notion that we introduce in this work. Our RASMs provide reachability and avoidance guarantees by imposing constraints on what can be viewed as a stochastic extension of level sets of Lyapunov functions for deterministic systems. Our approach solves several important problems -- it can be used to learn a control policy from scratch, to verify a reach-avoid specification for a fixed control policy, or to fine-tune a pre-trained policy if it does not satisfy the reach-avoid specification. We validate our approach on 3 stochastic non-linear reinforcement learning tasks.},
  author       = {Zikelic, Dorde and Lechner, Mathias and Henzinger, Thomas A and Chatterjee, Krishnendu},
  booktitle    = {Proceedings of the 37th AAAI Conference on Artificial Intelligence},
  issn         = {2374-3468},
  keywords     = {General Medicine},
  location     = {Washington, DC, United States},
  number       = {10},
  pages        = {11926--11935},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Learning control policies for stochastic systems with reach-avoid guarantees}},
  doi          = {10.1609/aaai.v37i10.26407},
  volume       = {37},
  year         = {2023},
}

