@inproceedings{15006,
  abstract     = {Graphical games are a useful framework for modeling the interactions of (selfish) agents who are connected via an underlying topology and whose behaviors influence each other. They have wide applications ranging from computer science to economics and biology. Yet, even though an agent’s payoff only depends on the actions of their direct neighbors in graphical games, computing the Nash equilibria and making statements about the convergence time of "natural" local dynamics in particular can be highly challenging. In this work, we present a novel approach for classifying complexity of Nash equilibria in graphical games by establishing a connection to local graph algorithms, a subfield of distributed computing. In particular, we make the observation that the equilibria of graphical games are equivalent to locally verifiable labelings (LVL) in graphs; vertex labelings which are verifiable with constant-round local algorithms. This connection allows us to derive novel lower bounds on the convergence time to equilibrium of best-response dynamics in graphical games. Since we establish that distributed convergence can sometimes be provably slow, we also introduce and give bounds on an intuitive notion of "time-constrained" inefficiency of best responses. We exemplify how our results can be used in the implementation of mechanisms that ensure convergence of best responses to a Nash equilibrium. Our results thus also give insight into the convergence of strategy-proof algorithms for graphical games, which is still not well understood.},
  author       = {Hirvonen, Juho and Schmid, Laura and Chatterjee, Krishnendu and Schmid, Stefan},
  booktitle    = {27th International Conference on Principles of Distributed Systems},
  isbn         = {9783959773089},
  issn         = {18688969},
  location     = {Tokyo, Japan},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{On the convergence time in graphical games: A locality-sensitive approach}},
  doi          = {10.4230/LIPIcs.OPODIS.2023.11},
  volume       = {286},
  year         = {2024},
}

@inproceedings{12676,
  abstract     = {Turn-based stochastic games (aka simple stochastic games) are two-player zero-sum games played on directed graphs with probabilistic transitions. The goal of player-max is to maximize the probability to reach a target state against the adversarial player-min. These games lie in NP ∩ coNP and are among the rare combinatorial problems that belong to this complexity class for which the existence of polynomial-time algorithm is a major open question. While randomized sub-exponential time algorithm exists, all known deterministic algorithms require exponential time in the worst-case. An important open question has been whether faster algorithms can be obtained parametrized by the treewidth of the game graph. Even deterministic sub-exponential time algorithm for constant treewidth turn-based stochastic games has remain elusive. In this work our main result is a deterministic algorithm to solve turn-based stochastic games that, given a game with n states, treewidth at most t, and the bit-complexity of the probabilistic transition function log D, has running time O ((tn2 log D)t log n). In particular, our algorithm is quasi-polynomial time for games with constant or poly-logarithmic treewidth.},
  author       = {Chatterjee, Krishnendu and Meggendorfer, Tobias and Saona Urmeneta, Raimundo J and Svoboda, Jakub},
  booktitle    = {Proceedings of the 2023 Annual ACM-SIAM Symposium on Discrete Algorithms},
  isbn         = {9781611977554},
  location     = {Florence, Italy},
  pages        = {4590--4605},
  publisher    = {Society for Industrial and Applied Mathematics},
  title        = {{Faster algorithm for turn-based stochastic games with bounded treewidth}},
  doi          = {10.1137/1.9781611977554.ch173},
  year         = {2023},
}

@article{12706,
  abstract     = {Allometric settings of population dynamics models are appealing due to their parsimonious nature and broad utility when studying system level effects. Here, we parameterise the size-scaled Rosenzweig-MacArthur differential equations to eliminate prey-mass dependency, facilitating an in depth analytic study of the equations which incorporates scaling parameters’ contributions to coexistence. We define the functional response term to match empirical findings, and examine situations where metabolic theory derivations and observation diverge. The dynamical properties of the Rosenzweig-MacArthur system, encompassing the distribution of size-abundance equilibria, the scaling of period and amplitude of population cycling, and relationships between predator and prey abundances, are consistent with empirical observation. Our parameterisation is an accurate minimal model across 15+ orders of mass magnitude.},
  author       = {Mckerral, Jody C. and Kleshnina, Maria and Ejov, Vladimir and Bartle, Louise and Mitchell, James G. and Filar, Jerzy A.},
  issn         = {1932-6203},
  journal      = {PLoS One},
  number       = {2},
  pages        = {e0279838},
  publisher    = {Public Library of Science},
  title        = {{Empirical parameterisation and dynamical analysis of the allometric Rosenzweig-MacArthur equations}},
  doi          = {10.1371/journal.pone.0279838},
  volume       = {18},
  year         = {2023},
}

@article{12738,
  abstract     = {We study turn-based stochastic zero-sum games with lexicographic preferences over objectives. Stochastic games are standard models in control, verification, and synthesis of stochastic reactive systems that exhibit both randomness as well as controllable and adversarial non-determinism. Lexicographic order allows one to consider multiple objectives with a strict preference order. To the best of our knowledge, stochastic games with lexicographic objectives have not been studied before. For a mixture of reachability and safety objectives, we show that deterministic lexicographically optimal strategies exist and memory is only required to remember the already satisfied and violated objectives. For a constant number of objectives, we show that the relevant decision problem is in NP∩coNP, matching the current known bound for single objectives; and in general the decision problem is PSPACE-hard and can be solved in NEXPTIME∩coNEXPTIME. We present an algorithm that computes the lexicographically optimal strategies via a reduction to the computation of optimal strategies in a sequence of single-objectives games. For omega-regular objectives, we restrict our analysis to one-player games, also known as Markov decision processes. We show that lexicographically optimal strategies exist and need either randomization or finite memory. We present an algorithm that solves the relevant decision problem in polynomial time. We have implemented our algorithms and report experimental results on various case studies.},
  author       = {Chatterjee, Krishnendu and Katoen, Joost P and Mohr, Stefanie and Weininger, Maximilian and Winkler, Tobias},
  issn         = {1572-8102},
  journal      = {Formal Methods in System Design},
  publisher    = {Springer Nature},
  title        = {{Stochastic games with lexicographic objectives}},
  doi          = {10.1007/s10703-023-00411-4},
  year         = {2023},
}

@article{12787,
  abstract     = {Populations evolve in spatially heterogeneous environments. While a certain trait might bring a fitness advantage in some patch of the environment, a different trait might be advantageous in another patch. Here, we study the Moran birth–death process with two types of individuals in a population stretched across two patches of size N, each patch favouring one of the two types. We show that the long-term fate of such populations crucially depends on the migration rate μ
 between the patches. To classify the possible fates, we use the distinction between polynomial (short) and exponential (long) timescales. We show that when μ is high then one of the two types fixates on the whole population after a number of steps that is only polynomial in N. By contrast, when μ is low then each type holds majority in the patch where it is favoured for a number of steps that is at least exponential in N. Moreover, we precisely identify the threshold migration rate μ⋆ that separates those two scenarios, thereby exactly delineating the situations that support long-term coexistence of the two types. We also discuss the case of various cycle graphs and we present computer simulations that perfectly match our analytical results.},
  author       = {Svoboda, Jakub and Tkadlec, Josef and Kaveh, Kamran and Chatterjee, Krishnendu},
  issn         = {1471-2946},
  journal      = {Proceedings of the Royal Society A: Mathematical, Physical and Engineering Sciences},
  number       = {2271},
  publisher    = {The Royal Society},
  title        = {{Coexistence times in the Moran process with environmental heterogeneity}},
  doi          = {10.1098/rspa.2022.0685},
  volume       = {479},
  year         = {2023},
}

@article{12833,
  abstract     = {The input to the token swapping problem is a graph with vertices v1, v2, . . . , vn, and n tokens with labels 1,2, . . . , n, one on each vertex. The goal is to get token i to vertex vi for all i= 1, . . . , n using a minimum number of swaps, where a swap exchanges the tokens on the endpoints of an edge.Token swapping on a tree, also known as “sorting with a transposition tree,” is not known to be in P nor NP-complete. We present some partial results: 1. An optimum swap sequence may need to perform a swap on a leaf vertex that has the correct token (a “happy leaf”), disproving a conjecture of Vaughan. 2. Any algorithm that fixes happy leaves—as all known approximation algorithms for the problem do—has approximation factor at least 4/3. Furthermore, the two best-known 2-approximation algorithms have approximation factor exactly 2. 3. A generalized problem—weighted coloured token swapping—is NP-complete on trees, but solvable in polynomial time on paths and stars. In this version, tokens and vertices have colours, and colours have weights. The goal is to get every token to a vertex of the same colour, and the cost of a swap is the sum of the weights of the two tokens involved.},
  author       = {Biniaz, Ahmad and Jain, Kshitij and Lubiw, Anna and Masárová, Zuzana and Miltzow, Tillmann and Mondal, Debajyoti and Naredla, Anurag Murty and Tkadlec, Josef and Turcotte, Alexi},
  issn         = {1365-8050},
  journal      = {Discrete Mathematics and Theoretical Computer Science},
  number       = {2},
  publisher    = {EPI Sciences},
  title        = {{Token swapping on trees}},
  doi          = {10.46298/DMTCS.8383},
  volume       = {24},
  year         = {2023},
}

@article{12861,
  abstract     = {The field of indirect reciprocity investigates how social norms can foster cooperation when individuals continuously monitor and assess each other’s social interactions. By adhering to certain social norms, cooperating individuals can improve their reputation and, in turn, receive benefits from others. Eight social norms, known as the “leading eight," have been shown to effectively promote the evolution of cooperation as long as information is public and reliable. These norms categorize group members as either ’good’ or ’bad’. In this study, we examine a scenario where individuals instead assign nuanced reputation scores to each other, and only cooperate with those whose reputation exceeds a certain threshold. We find both analytically and through simulations that such quantitative assessments are error-correcting, thus facilitating cooperation in situations where information is private and unreliable. Moreover, our results identify four specific norms that are robust to such conditions, and may be relevant for helping to sustain cooperation in natural populations.},
  author       = {Schmid, Laura and Ekbatani, Farbod and Hilbe, Christian and Chatterjee, Krishnendu},
  issn         = {2041-1723},
  journal      = {Nature Communications},
  publisher    = {Springer Nature},
  title        = {{Quantitative assessment can stabilize indirect reciprocity under imperfect information}},
  doi          = {10.1038/s41467-023-37817-x},
  volume       = {14},
  year         = {2023},
}

@inproceedings{13139,
  abstract     = {A classical problem for Markov chains is determining their stationary (or steady-state) distribution. This problem has an equally classical solution based on eigenvectors and linear equation systems. However, this approach does not scale to large instances, and iterative solutions are desirable. It turns out that a naive approach, as used by current model checkers, may yield completely wrong results. We present a new approach, which utilizes recent advances in partial exploration and mean payoff computation to obtain a correct, converging approximation.},
  author       = {Meggendorfer, Tobias},
  booktitle    = {TACAS 2023: Tools and Algorithms for the Construction and Analysis of Systems},
  isbn         = {9783031308222},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {489--507},
  publisher    = {Springer Nature},
  title        = {{Correct approximation of stationary distributions}},
  doi          = {10.1007/978-3-031-30823-9_25},
  volume       = {13993},
  year         = {2023},
}

@inproceedings{13142,
  abstract     = {Reinforcement learning has received much attention for learning controllers of deterministic systems. We consider a learner-verifier framework for stochastic control systems and survey recent methods that formally guarantee a conjunction of reachability and safety properties. Given a property and a lower bound on the probability of the property being satisfied, our framework jointly learns a control policy and a formal certificate to ensure the satisfaction of the property with a desired probability threshold. Both the control policy and the formal certificate are continuous functions from states to reals, which are learned as parameterized neural networks. While in the deterministic case, the certificates are invariant and barrier functions for safety, or Lyapunov and ranking functions for liveness, in the stochastic case the certificates are supermartingales. For certificate verification, we use interval arithmetic abstract interpretation to bound the expected values of neural network functions.},
  author       = {Chatterjee, Krishnendu and Henzinger, Thomas A and Lechner, Mathias and Zikelic, Dorde},
  booktitle    = {Tools and Algorithms for the Construction and Analysis of Systems },
  isbn         = {9783031308222},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {3--25},
  publisher    = {Springer Nature},
  title        = {{A learner-verifier framework for neural network controllers and certificates of stochastic systems}},
  doi          = {10.1007/978-3-031-30823-9_1},
  volume       = {13993},
  year         = {2023},
}

@inproceedings{13238,
  abstract     = {We consider a natural problem dealing with weighted packet selection across a rechargeable link, which e.g., finds applications in cryptocurrency networks. The capacity of a link (u, v) is determined by how much nodes u and v allocate for this link. Specifically, the input is a finite ordered sequence of packets that arrive in both directions along a link. Given (u, v) and a packet of weight x going from u to v, node u can either accept or reject the packet. If u accepts the packet, the capacity on link (u, v) decreases by x. Correspondingly, v’s capacity on (u, v) increases by x. If a node rejects the packet, this will entail a cost affinely linear in the weight of the packet. A link is “rechargeable” in the sense that the total capacity of the link has to remain constant, but the allocation of capacity at the ends of the link can depend arbitrarily on the nodes’ decisions. The goal is to minimise the sum of the capacity injected into the link and the cost of rejecting packets. We show that the problem is NP-hard, but can be approximated efficiently with a ratio of (1+ε)⋅(1+3–√) for some arbitrary ε>0.
.},
  author       = {Schmid, Stefan and Svoboda, Jakub and Yeo, Michelle X},
  booktitle    = {SIROCCO 2023: Structural Information and Communication Complexity },
  isbn         = {9783031327322},
  issn         = {1611-3349},
  location     = {Alcala de Henares, Spain},
  pages        = {576--594},
  publisher    = {Springer Nature},
  title        = {{Weighted packet selection for rechargeable links in cryptocurrency networks: Complexity and approximation}},
  doi          = {10.1007/978-3-031-32733-9_26},
  volume       = {13892},
  year         = {2023},
}

@article{13258,
  abstract     = {Many human interactions feature the characteristics of social dilemmas where individual actions have consequences for the group and the environment. The feedback between behavior and environment can be studied with the framework of stochastic games. In stochastic games, the state of the environment can change, depending on the choices made by group members. Past work suggests that such feedback can reinforce cooperative behaviors. In particular, cooperation can evolve in stochastic games even if it is infeasible in each separate repeated game. In stochastic games, participants have an interest in conditioning their strategies on the state of the environment. Yet in many applications, precise information about the state could be scarce. Here, we study how the availability of information (or lack thereof) shapes evolution of cooperation. Already for simple examples of two state games we find surprising effects. In some cases, cooperation is only possible if there is precise information about the state of the environment. In other cases, cooperation is most abundant when there is no information about the state of the environment. We systematically analyze all stochastic games of a given complexity class, to determine when receiving information about the environment is better, neutral, or worse for evolution of cooperation.},
  author       = {Kleshnina, Maria and Hilbe, Christian and Simsa, Stepan and Chatterjee, Krishnendu and Nowak, Martin A.},
  issn         = {2041-1723},
  journal      = {Nature Communications},
  publisher    = {Springer Nature},
  title        = {{The effect of environmental information on evolution of cooperation in stochastic games}},
  doi          = {10.1038/s41467-023-39625-9},
  volume       = {14},
  year         = {2023},
}

@misc{13336,
  author       = {Kleshnina, Maria},
  publisher    = {Zenodo},
  title        = {{kleshnina/stochgames_info: The effect of environmental information on evolution of cooperation in stochastic games}},
  doi          = {10.5281/ZENODO.8059564},
  year         = {2023},
}

@inproceedings{14830,
  abstract     = {We study the problem of learning controllers for discrete-time non-linear stochastic dynamical systems with formal reach-avoid guarantees. This work presents the first method for providing formal reach-avoid guarantees, which combine and generalize stability and safety guarantees, with a tolerable probability threshold p in [0,1] over the infinite time horizon. Our method leverages advances in machine learning literature and it represents formal certificates as neural networks. In particular, we learn a certificate in the form of a reach-avoid supermartingale (RASM), a novel notion that we introduce in this work. Our RASMs provide reachability and avoidance guarantees by imposing constraints on what can be viewed as a stochastic extension of level sets of Lyapunov functions for deterministic systems. Our approach solves several important problems -- it can be used to learn a control policy from scratch, to verify a reach-avoid specification for a fixed control policy, or to fine-tune a pre-trained policy if it does not satisfy the reach-avoid specification. We validate our approach on 3 stochastic non-linear reinforcement learning tasks.},
  author       = {Zikelic, Dorde and Lechner, Mathias and Henzinger, Thomas A and Chatterjee, Krishnendu},
  booktitle    = {Proceedings of the 37th AAAI Conference on Artificial Intelligence},
  issn         = {2374-3468},
  keywords     = {General Medicine},
  location     = {Washington, DC, United States},
  number       = {10},
  pages        = {11926--11935},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Learning control policies for stochastic systems with reach-avoid guarantees}},
  doi          = {10.1609/aaai.v37i10.26407},
  volume       = {37},
  year         = {2023},
}

@misc{14990,
  abstract     = {The software artefact to evaluate the approximation of stationary distributions implementation.},
  author       = {Meggendorfer, Tobias},
  publisher    = {Zenodo},
  title        = {{Artefact for: Correct Approximation of Stationary Distributions}},
  doi          = {10.5281/ZENODO.7548214},
  year         = {2023},
}

@inproceedings{15023,
  abstract     = {Reinforcement learning has shown promising results in learning neural network policies for complicated control tasks. However, the lack of formal guarantees about the behavior of such policies remains an impediment to their deployment. We propose a novel method for learning a composition of neural network policies in stochastic environments, along with a formal certificate which guarantees that a specification over the policy's behavior is satisfied with the desired probability. Unlike prior work on verifiable RL, our approach leverages the compositional nature of logical specifications provided in SpectRL, to learn over graphs of probabilistic reach-avoid specifications. The formal guarantees are provided by learning neural network policies together with reach-avoid supermartingales (RASM) for the graph’s sub-tasks and then composing them into a global policy. We also derive a tighter lower bound compared to previous work on the probability of reach-avoidance implied by a RASM, which is required to find a compositional policy with an acceptable probabilistic threshold for complex tasks with multiple edge policies. We implement a prototype of our approach and evaluate it on a Stochastic Nine Rooms environment.},
  author       = {Zikelic, Dorde and Lechner, Mathias and Verma, Abhinav and Chatterjee, Krishnendu and Henzinger, Thomas A},
  booktitle    = {37th Conference on Neural Information Processing Systems},
  location     = {New Orleans, LO, United States},
  title        = {{Compositional policy learning in stochastic control systems with formal guarantees}},
  year         = {2023},
}

@inproceedings{13967,
  abstract     = {A classic solution technique for Markov decision processes (MDP) and stochastic games (SG) is value iteration (VI). Due to its good practical performance, this approximative approach is typically preferred over exact techniques, even though no practical bounds on the imprecision of the result could be given until recently. As a consequence, even the most used model checkers could return arbitrarily wrong results. Over the past decade, different works derived stopping criteria, indicating when the precision reaches the desired level, for various settings, in particular MDP with reachability, total reward, and mean payoff, and SG with reachability.In this paper, we provide the first stopping criteria for VI on SG with total reward and mean payoff, yielding the first anytime algorithms in these settings. To this end, we provide the solution in two flavours: First through a reduction to the MDP case and second directly on SG. The former is simpler and automatically utilizes any advances on MDP. The latter allows for more local computations, heading towards better practical efficiency.Our solution unifies the previously mentioned approaches for MDP and SG and their underlying ideas. To achieve this, we isolate objective-specific subroutines as well as identify objective-independent concepts. These structural concepts, while surprisingly simple, form the very essence of the unified solution.},
  author       = {Kretinsky, Jan and Meggendorfer, Tobias and Weininger, Maximilian},
  booktitle    = {38th Annual ACM/IEEE Symposium on Logic in Computer Science},
  isbn         = {9798350335873},
  issn         = {1043-6871},
  location     = {Boston, MA, United States},
  publisher    = {Institute of Electrical and Electronics Engineers},
  title        = {{Stopping criteria for value iteration on stochastic games with quantitative objectives}},
  doi          = {10.1109/LICS56636.2023.10175771},
  volume       = {2023},
  year         = {2023},
}

@inproceedings{14242,
  abstract     = {We study the problem of training and certifying adversarially robust quantized neural networks (QNNs). Quantization is a technique for making neural networks more efficient by running them using low-bit integer arithmetic and is therefore commonly adopted in industry. Recent work has shown that floating-point neural networks that have been verified to be robust can become vulnerable to adversarial attacks after quantization, and certification of the quantized representation is necessary to guarantee robustness. In this work, we present quantization-aware interval bound propagation (QA-IBP), a novel method for training robust QNNs. Inspired by advances in robust learning of non-quantized networks, our training algorithm computes the gradient of an abstract representation of the actual network. Unlike existing approaches, our method can handle the discrete semantics of QNNs. Based on QA-IBP, we also develop a complete verification procedure for verifying the adversarial robustness of QNNs, which is guaranteed to terminate and produce a correct answer. Compared to existing approaches, the key advantage of our verification procedure is that it runs entirely on GPU or other accelerator devices. We demonstrate experimentally that our approach significantly outperforms existing methods and establish the new state-of-the-art for training and certifying the robustness of QNNs.},
  author       = {Lechner, Mathias and Zikelic, Dorde and Chatterjee, Krishnendu and Henzinger, Thomas A and Rus, Daniela},
  booktitle    = {Proceedings of the 37th AAAI Conference on Artificial Intelligence},
  isbn         = {9781577358800},
  location     = {Washington, DC, United States},
  number       = {12},
  pages        = {14964--14973},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Quantization-aware interval bound propagation for training certifiably robust quantized neural networks}},
  doi          = {10.1609/aaai.v37i12.26747},
  volume       = {37},
  year         = {2023},
}

@inproceedings{14243,
  abstract     = {Two-player zero-sum "graph games" are central in logic, verification, and multi-agent systems. The game proceeds by placing a token on a vertex of a graph, and allowing the players to move it to produce an infinite path, which determines the winner or payoff of the game. Traditionally, the players alternate turns in moving the token. In "bidding games", however, the players have budgets and in each turn, an auction (bidding) determines which player moves the token. So far, bidding games have only been studied as full-information games. In this work we initiate the study of partial-information bidding games: we study bidding games in which a player's initial budget is drawn from a known probability distribution. We show that while for some bidding mechanisms and objectives, it is straightforward to adapt the results from the full-information setting to the partial-information setting, for others, the analysis is significantly more challenging, requires new techniques, and gives rise to interesting results. Specifically, we study games with "mean-payoff" objectives in combination with "poorman" bidding. We construct optimal strategies for a partially-informed player who plays against a fully-informed adversary. We show that, somewhat surprisingly, the "value" under pure strategies does not necessarily exist in such games.},
  author       = {Avni, Guy and Jecker, Ismael R and Zikelic, Dorde},
  booktitle    = {Proceedings of the 37th AAAI Conference on Artificial Intelligence},
  isbn         = {9781577358800},
  location     = {Washington, DC, United States},
  number       = {5},
  pages        = {5464--5471},
  title        = {{Bidding graph games with partially-observable budgets}},
  doi          = {10.1609/aaai.v37i5.25679},
  volume       = {37},
  year         = {2023},
}

@inproceedings{14259,
  abstract     = {We provide a learning-based technique for guessing a winning strategy in a parity game originating from an LTL synthesis problem. A cheaply obtained guess can be useful in several applications. Not only can the guessed strategy be applied as best-effort in cases where the game’s huge size prohibits rigorous approaches, but it can also increase the scalability of rigorous LTL synthesis in several ways. Firstly, checking whether a guessed strategy is winning is easier than constructing one. Secondly, even if the guess is wrong in some places, it can be fixed by strategy iteration faster than constructing one from scratch. Thirdly, the guess can be used in on-the-fly approaches to prioritize exploration in the most fruitful directions.
In contrast to previous works, we (i) reflect the highly structured logical information in game’s states, the so-called semantic labelling, coming from the recent LTL-to-automata translations, and (ii) learn to reflect it properly by learning from previously solved games, bringing the solving process closer to human-like reasoning.},
  author       = {Kretinsky, Jan and Meggendorfer, Tobias and Prokop, Maximilian and Rieder, Sabine},
  booktitle    = {35th International Conference on Computer Aided Verification },
  isbn         = {9783031377051},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {390--414},
  publisher    = {Springer Nature},
  title        = {{Guessing winning policies in LTL synthesis by semantic learning}},
  doi          = {10.1007/978-3-031-37706-8_20},
  volume       = {13964},
  year         = {2023},
}

@inproceedings{14317,
  abstract     = {Markov decision processes can be viewed as transformers of probability distributions. While this view is useful from a practical standpoint to reason about trajectories of distributions, basic reachability and safety problems are known to be computationally intractable (i.e., Skolem-hard) to solve in such models. Further, we show that even for simple examples of MDPs, strategies for safety objectives over distributions can require infinite memory and randomization.
In light of this, we present a novel overapproximation approach to synthesize strategies in an MDP, such that a safety objective over the distributions is met. More precisely, we develop a new framework for template-based synthesis of certificates as affine distributional and inductive invariants for safety objectives in MDPs. We provide two algorithms within this framework. One can only synthesize memoryless strategies, but has relative completeness guarantees, while the other can synthesize general strategies. The runtime complexity of both algorithms is in PSPACE. We implement these algorithms and show that they can solve several non-trivial examples.},
  author       = {Akshay, S. and Chatterjee, Krishnendu and Meggendorfer, Tobias and Zikelic, Dorde},
  booktitle    = {International Conference on Computer Aided Verification},
  isbn         = {9783031377082},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {86--112},
  publisher    = {Springer Nature},
  title        = {{MDPs as distribution transformers: Affine invariant synthesis for safety objectives}},
  doi          = {10.1007/978-3-031-37709-9_5},
  volume       = {13966},
  year         = {2023},
}