[{"abstract":[{"lang":"eng","text":"<jats:p>Markov decision processes (MDPs) are the defacto framework for sequential decision making in the presence of stochastic uncertainty. A classical optimization criterion for MDPs is to maximize the expected discounted-sum payoff, which ignores low probability catastrophic events with highly negative impact on the system. On the other hand, risk-averse policies require the probability of undesirable events to be below a given threshold, but they do not account for optimization of the expected payoff. We consider MDPs with discounted-sum payoff with failure states which represent catastrophic outcomes. The objective of risk-constrained planning is to maximize the expected discounted-sum payoff among risk-averse policies that ensure the probability to encounter a failure state is below a desired threshold. Our main contribution is an efficient risk-constrained planning algorithm that combines UCT-like search with a predictor learned through interaction with the MDP (in the style of AlphaZero) and with a risk-constrained action selection via linear programming. We demonstrate the effectiveness of our approach with experiments on classical MDPs from the literature, including benchmarks with an order of 106 states.</jats:p>"}],"intvolume":"        34","publication_status":"published","publication_identifier":{"issn":["2374-3468"]},"title":"Reinforcement learning of risk-constrained policies in Markov decision processes","oa_version":"Preprint","author":[{"last_name":"Brázdil","full_name":"Brázdil, Tomáš","first_name":"Tomáš"},{"orcid":"0000-0002-4561-241X","first_name":"Krishnendu","last_name":"Chatterjee","full_name":"Chatterjee, Krishnendu","id":"2E5DCA20-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Novotný","full_name":"Novotný, Petr","first_name":"Petr"},{"first_name":"Jiří","last_name":"Vahala","full_name":"Vahala, Jiří"}],"day":"03","article_type":"original","date_created":"2024-03-04T08:07:22Z","volume":34,"language":[{"iso":"eng"}],"oa":1,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","issue":"06","citation":{"ama":"Brázdil T, Chatterjee K, Novotný P, Vahala J. Reinforcement learning of risk-constrained policies in Markov decision processes. <i>Proceedings of the 34th AAAI Conference on Artificial Intelligence</i>. 2020;34(06):9794-9801. doi:<a href=\"https://doi.org/10.1609/aaai.v34i06.6531\">10.1609/aaai.v34i06.6531</a>","short":"T. Brázdil, K. Chatterjee, P. Novotný, J. Vahala, Proceedings of the 34th AAAI Conference on Artificial Intelligence 34 (2020) 9794–9801.","ieee":"T. Brázdil, K. Chatterjee, P. Novotný, and J. Vahala, “Reinforcement learning of risk-constrained policies in Markov decision processes,” <i>Proceedings of the 34th AAAI Conference on Artificial Intelligence</i>, vol. 34, no. 06. Association for the Advancement of Artificial Intelligence, pp. 9794–9801, 2020.","ista":"Brázdil T, Chatterjee K, Novotný P, Vahala J. 2020. Reinforcement learning of risk-constrained policies in Markov decision processes. Proceedings of the 34th AAAI Conference on Artificial Intelligence. 34(06), 9794–9801.","chicago":"Brázdil, Tomáš, Krishnendu Chatterjee, Petr Novotný, and Jiří Vahala. “Reinforcement Learning of Risk-Constrained Policies in Markov Decision Processes.” <i>Proceedings of the 34th AAAI Conference on Artificial Intelligence</i>. Association for the Advancement of Artificial Intelligence, 2020. <a href=\"https://doi.org/10.1609/aaai.v34i06.6531\">https://doi.org/10.1609/aaai.v34i06.6531</a>.","mla":"Brázdil, Tomáš, et al. “Reinforcement Learning of Risk-Constrained Policies in Markov Decision Processes.” <i>Proceedings of the 34th AAAI Conference on Artificial Intelligence</i>, vol. 34, no. 06, Association for the Advancement of Artificial Intelligence, 2020, pp. 9794–801, doi:<a href=\"https://doi.org/10.1609/aaai.v34i06.6531\">10.1609/aaai.v34i06.6531</a>.","apa":"Brázdil, T., Chatterjee, K., Novotný, P., &#38; Vahala, J. (2020). Reinforcement learning of risk-constrained policies in Markov decision processes. <i>Proceedings of the 34th AAAI Conference on Artificial Intelligence</i>. New York, NY, United States: Association for the Advancement of Artificial Intelligence. <a href=\"https://doi.org/10.1609/aaai.v34i06.6531\">https://doi.org/10.1609/aaai.v34i06.6531</a>"},"arxiv":1,"month":"04","department":[{"_id":"KrCh"}],"page":"9794-9801","quality_controlled":"1","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2002.12086"}],"publisher":"Association for the Advancement of Artificial Intelligence","doi":"10.1609/aaai.v34i06.6531","article_processing_charge":"No","type":"journal_article","date_updated":"2024-03-04T08:30:16Z","_id":"15055","project":[{"_id":"25863FF4-B435-11E9-9278-68D0E5697425","name":"Game Theory","grant_number":"S11407","call_identifier":"FWF"}],"status":"public","publication":"Proceedings of the 34th AAAI Conference on Artificial Intelligence","conference":{"location":"New York, NY, United States","name":"AAAI: Conference on Artificial Intelligence","start_date":"2020-02-07","end_date":"2020-02-12"},"acknowledgement":"Krishnendu Chatterjee is supported by the Austrian Science Fund (FWF) NFN Grant No. S11407-N23 (RiSE/SHiNE), and COST Action GAMENET. Tomas Brazdil is supported by the Grant Agency of Masaryk University grant no. MUNI/G/0739/2017 and by the Czech Science Foundation grant No. 18-11193S. Petr Novotny and Jirı Vahala are supported by the Czech Science Foundation grant No. GJ19-15134Y.","date_published":"2020-04-03T00:00:00Z","external_id":{"arxiv":["2002.12086"]},"year":"2020","keyword":["General Medicine"]}]