[{"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2405.04015"}],"article_processing_charge":"No","oa_version":"Preprint","doi":"10.24963/ijcai.2024/1","date_created":"2024-09-29T22:01:38Z","title":"Certified policy verification and synthesis for MDPs under distributional reach-avoidance properties","department":[{"_id":"KrCh"}],"corr_author":"1","acknowledgement":"This work was supported in part by the ERC-2020-CoG 863818 (FoRM-SMArt), the Singapore Ministry of Education (MOE) Academic Research Fund (AcRF) Tier 1 grant, Google Research Award 2023 and the SBI Foundation Hub for Data and Analytics.","publication_identifier":{"issn":["1045-0823"],"isbn":["9781956792041"]},"project":[{"name":"Formal Methods for Stochastic Models: Algorithms and Applications","grant_number":"863818","_id":"0599E47C-7A3F-11EA-A408-12923DDC885E","call_identifier":"H2020"}],"external_id":{"arxiv":["2405.04015"]},"abstract":[{"lang":"eng","text":"Markov Decision Processes (MDPs) are a classical model for decision making in the presence of uncertainty. Often they are viewed as state transformers with planning objectives defned with respect to paths over MDP states. An increasingly\r\npopular alternative is to view them as distribution transformers, giving rise to a sequence of probability distributions over MDP states. For instance, reachability and safety properties in modeling robot swarms or chemical reaction networks are naturally defned in terms of probability distributions over states. Verifying such distributional properties is known to be hard and often beyond the reach of classical state-based verifcation techniques. In this work, we consider the problems of certifed policy (i.e. controller) verifcation and synthesis in MDPs under distributional reach-avoidance specifcations. By certifed we mean that, along with a policy, we also aim to synthesize a (checkable) certifcate ensuring that the MDP indeed satisfes the property. Thus, given the target set of distributions and an unsafe set of distributions over MDP states, our goal is to either synthesize a certifcate for a given policy or synthesize a policy along with a certifcate, proving that the target distribution can be reached while avoiding unsafe distributions. To solve this problem, we introduce the novel notion of distributional reach-avoid certifcates and present automated procedures for (1) synthesizing a certifcate for a given policy, and (2) synthesizing a policy together with the certifcate, both providing formal guarantees on certifcate correctness. Our experimental evaluation demonstrates the ability of our method to solve several non-trivial examples, including a multi-agent robot-swarm model, to synthesize certifed policies and to certify existing policies. "}],"month":"09","type":"conference","arxiv":1,"date_published":"2024-09-01T00:00:00Z","_id":"18159","publication_status":"published","language":[{"iso":"eng"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","day":"01","oa":1,"quality_controlled":"1","date_updated":"2025-04-14T07:52:46Z","year":"2024","author":[{"full_name":"Akshay, S","last_name":"Akshay","first_name":"S"},{"orcid":"0000-0002-4561-241X","id":"2E5DCA20-F248-11E8-B48F-1D18A9856A87","full_name":"Chatterjee, Krishnendu","last_name":"Chatterjee","first_name":"Krishnendu"},{"orcid":"0000-0002-1712-2165","id":"b21b0c15-30a2-11eb-80dc-f13ca25802e1","last_name":"Meggendorfer","first_name":"Tobias","full_name":"Meggendorfer, Tobias"},{"last_name":"Zikelic","first_name":"Dorde","full_name":"Zikelic, Dorde","orcid":"0000-0002-4681-1699","id":"294AA7A6-F248-11E8-B48F-1D18A9856A87"}],"page":"3-12","citation":{"ama":"Akshay S, Chatterjee K, Meggendorfer T, Zikelic D. Certified policy verification and synthesis for MDPs under distributional reach-avoidance properties. In: <i>Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence</i>. International Joint Conferences on Artificial Intelligence; 2024:3-12. doi:<a href=\"https://doi.org/10.24963/ijcai.2024/1\">10.24963/ijcai.2024/1</a>","mla":"Akshay, S., et al. “Certified Policy Verification and Synthesis for MDPs under Distributional Reach-Avoidance Properties.” <i>Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence</i>, International Joint Conferences on Artificial Intelligence, 2024, pp. 3–12, doi:<a href=\"https://doi.org/10.24963/ijcai.2024/1\">10.24963/ijcai.2024/1</a>.","ieee":"S. Akshay, K. Chatterjee, T. Meggendorfer, and D. Zikelic, “Certified policy verification and synthesis for MDPs under distributional reach-avoidance properties,” in <i>Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence</i>, Jeju, Korea, 2024, pp. 3–12.","apa":"Akshay, S., Chatterjee, K., Meggendorfer, T., &#38; Zikelic, D. (2024). Certified policy verification and synthesis for MDPs under distributional reach-avoidance properties. In <i>Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence</i> (pp. 3–12). Jeju, Korea: International Joint Conferences on Artificial Intelligence. <a href=\"https://doi.org/10.24963/ijcai.2024/1\">https://doi.org/10.24963/ijcai.2024/1</a>","chicago":"Akshay, S, Krishnendu Chatterjee, Tobias Meggendorfer, and Dorde Zikelic. “Certified Policy Verification and Synthesis for MDPs under Distributional Reach-Avoidance Properties.” In <i>Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence</i>, 3–12. International Joint Conferences on Artificial Intelligence, 2024. <a href=\"https://doi.org/10.24963/ijcai.2024/1\">https://doi.org/10.24963/ijcai.2024/1</a>.","ista":"Akshay S, Chatterjee K, Meggendorfer T, Zikelic D. 2024. Certified policy verification and synthesis for MDPs under distributional reach-avoidance properties. Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence. IJCAI: International Joint Conference on Artificial Intelligence, 3–12.","short":"S. Akshay, K. Chatterjee, T. Meggendorfer, D. Zikelic, in:, Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence, International Joint Conferences on Artificial Intelligence, 2024, pp. 3–12."},"publication":"Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence","status":"public","ec_funded":1,"scopus_import":"1","conference":{"location":"Jeju, Korea","name":"IJCAI: International Joint Conference on Artificial Intelligence","end_date":"2024-08-09","start_date":"2024-08-03"},"publisher":"International Joint Conferences on Artificial Intelligence"},{"oa":1,"quality_controlled":"1","OA_place":"repository","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","day":"01","page":"6707-6715","OA_type":"green","citation":{"ieee":"K. Chatterjee, E. Goharshady, M. Karrabi, P. Novotný, and D. Zikelic, “Solving long-run average reward robust MDPs via stochastic games,” in <i>33rd International Joint Conference on Artificial Intelligence</i>, Jeju, South Korea, 2024, pp. 6707–6715.","apa":"Chatterjee, K., Goharshady, E., Karrabi, M., Novotný, P., &#38; Zikelic, D. (2024). Solving long-run average reward robust MDPs via stochastic games. In <i>33rd International Joint Conference on Artificial Intelligence</i> (pp. 6707–6715). Jeju, South Korea: International Joint Conferences on Artificial Intelligence. <a href=\"https://doi.org/10.24963/ijcai.2024/741\">https://doi.org/10.24963/ijcai.2024/741</a>","ama":"Chatterjee K, Goharshady E, Karrabi M, Novotný P, Zikelic D. Solving long-run average reward robust MDPs via stochastic games. In: <i>33rd International Joint Conference on Artificial Intelligence</i>. International Joint Conferences on Artificial Intelligence; 2024:6707-6715. doi:<a href=\"https://doi.org/10.24963/ijcai.2024/741\">10.24963/ijcai.2024/741</a>","mla":"Chatterjee, Krishnendu, et al. “Solving Long-Run Average Reward Robust MDPs via Stochastic Games.” <i>33rd International Joint Conference on Artificial Intelligence</i>, International Joint Conferences on Artificial Intelligence, 2024, pp. 6707–15, doi:<a href=\"https://doi.org/10.24963/ijcai.2024/741\">10.24963/ijcai.2024/741</a>.","chicago":"Chatterjee, Krishnendu, Ehsan Goharshady, Mehrdad Karrabi, Petr Novotný, and Dorde Zikelic. “Solving Long-Run Average Reward Robust MDPs via Stochastic Games.” In <i>33rd International Joint Conference on Artificial Intelligence</i>, 6707–15. International Joint Conferences on Artificial Intelligence, 2024. <a href=\"https://doi.org/10.24963/ijcai.2024/741\">https://doi.org/10.24963/ijcai.2024/741</a>.","ista":"Chatterjee K, Goharshady E, Karrabi M, Novotný P, Zikelic D. 2024. Solving long-run average reward robust MDPs via stochastic games. 33rd International Joint Conference on Artificial Intelligence. IJCAI: International Joint Conference on Artificial Intelligence, 6707–6715.","short":"K. Chatterjee, E. Goharshady, M. Karrabi, P. Novotný, D. Zikelic, in:, 33rd International Joint Conference on Artificial Intelligence, International Joint Conferences on Artificial Intelligence, 2024, pp. 6707–6715."},"status":"public","publication":"33rd International Joint Conference on Artificial Intelligence","publisher":"International Joint Conferences on Artificial Intelligence","conference":{"location":"Jeju, South Korea","start_date":"2024-08-03","end_date":"2024-08-09","name":"IJCAI: International Joint Conference on Artificial Intelligence"},"ec_funded":1,"scopus_import":"1","date_updated":"2025-04-14T07:52:46Z","author":[{"full_name":"Chatterjee, Krishnendu","last_name":"Chatterjee","first_name":"Krishnendu","orcid":"0000-0002-4561-241X","id":"2E5DCA20-F248-11E8-B48F-1D18A9856A87"},{"id":"103b4fa0-896a-11ed-bdf8-87b697bef40d","orcid":"0000-0002-8595-0587","last_name":"Kafshdar Goharshadi","first_name":"Ehsan","full_name":"Kafshdar Goharshadi, Ehsan"},{"first_name":"Mehrdad","last_name":"Karrabi","full_name":"Karrabi, Mehrdad","id":"67638922-f394-11eb-9cf6-f20423e08757"},{"id":"3CC3B868-F248-11E8-B48F-1D18A9856A87","full_name":"Novotný, Petr","last_name":"Novotný","first_name":"Petr"},{"orcid":"0000-0002-4681-1699","id":"294AA7A6-F248-11E8-B48F-1D18A9856A87","last_name":"Zikelic","first_name":"Dorde","full_name":"Zikelic, Dorde"}],"year":"2024","acknowledgement":"This work was supported in part by the ERC-2020-CoG 863818 (FoRM-SMArt) and the Czech Science Foundation\r\ngrant no. GA23-06963S.","corr_author":"1","publication_identifier":{"isbn":["9781956792041"],"issn":["1045-0823"]},"external_id":{"arxiv":["2312.13912"]},"project":[{"call_identifier":"H2020","_id":"0599E47C-7A3F-11EA-A408-12923DDC885E","grant_number":"863818","name":"Formal Methods for Stochastic Models: Algorithms and Applications"}],"type":"conference","month":"09","arxiv":1,"abstract":[{"lang":"eng","text":"Markov decision processes (MDPs) provide a standard framework for sequential decision making under uncertainty. However, MDPs do not take uncertainty in transition probabilities into account. Robust Markov decision processes (RMDPs) address this shortcoming of MDPs by assigning to each transition an uncertainty set rather than a single probability value. In this work, we consider polytopic RMDPs in which all uncertainty sets are polytopes and study the problem of solving long-run average reward polytopic RMDPs. We present a novel perspective on this problem and show that it can be reduced to solving long-run average reward turn-based stochastic games with finite state and action spaces. This reduction allows us to derive several important consequences that were hitherto not known to hold for polytopic RMDPs. First, we derive new computational complexity bounds for solving long-run average reward polytopic RMDPs, showing for the first time that the threshold decision problem for them is in NP∩CONP and that they admit a randomized algorithm with sub-exponential expected runtime. Second, we present Robust Polytopic Policy Iteration (RPPI), a novel policy iteration algorithm for solving long-run average reward polytopic RMDPs. Our experimental evaluation shows that RPPI is much more efficient in solving long-run average reward polytopic RMDPs compared to state-of-the-art methods based on value iteration. "}],"date_created":"2024-09-29T22:01:39Z","oa_version":"Preprint","doi":"10.24963/ijcai.2024/741","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2312.13912"}],"article_processing_charge":"No","title":"Solving long-run average reward robust MDPs via stochastic games","department":[{"_id":"KrCh"}],"publication_status":"published","language":[{"iso":"eng"}],"date_published":"2024-09-01T00:00:00Z","_id":"18160"}]