@article{716,
  abstract     = {Two-player games on graphs are central in many problems in formal verification and program analysis, such as synthesis and verification of open systems. In this work, we consider solving recursive game graphs (or pushdown game graphs) that model the control flow of sequential programs with recursion.While pushdown games have been studied before with qualitative objectives-such as reachability and ?-regular objectives- in this work, we study for the first time such games with the most well-studied quantitative objective, the mean-payoff objective. In pushdown games, two types of strategies are relevant: (1) global strategies, which depend on the entire global history; and (2) modular strategies, which have only local memory and thus do not depend on the context of invocation but rather only on the history of the current invocation of the module. Our main results are as follows: (1) One-player pushdown games with mean-payoff objectives under global strategies are decidable in polynomial time. (2) Two-player pushdown games with mean-payoff objectives under global strategies are undecidable. (3) One-player pushdown games with mean-payoff objectives under modular strategies are NP-hard. (4) Two-player pushdown games with mean-payoff objectives under modular strategies can be solved in NP (i.e., both one-player and two-player pushdown games with mean-payoff objectives under modular strategies are NP-complete). We also establish the optimal strategy complexity by showing that global strategies for mean-payoff objectives require infinite memory even in one-player pushdown games and memoryless modular strategies are sufficient in two-player pushdown games. Finally, we also show that all the problems have the same complexity if the stack boundedness condition is added, where along with the mean-payoff objective the player must also ensure that the stack height is bounded.},
  author       = {Chatterjee, Krishnendu and Velner, Yaron},
  issn         = {00045411},
  journal      = {Journal of the ACM},
  number       = {5},
  pages        = {34},
  publisher    = {ACM},
  title        = {{The complexity of mean-payoff pushdown games}},
  doi          = {10.1145/3121408},
  volume       = {64},
  year         = {2017},
}

@article{717,
  abstract     = {We consider finite-state and recursive game graphs with multidimensional mean-payoff objectives. In recursive games two types of strategies are relevant: global strategies and modular strategies. Our contributions are: (1) We show that finite-state multidimensional mean-payoff games can be solved in polynomial time if the number of dimensions and the maximal absolute value of weights are fixed; whereas for arbitrary dimensions the problem is coNP-complete. (2) We show that one-player recursive games with multidimensional mean-payoff objectives can be solved in polynomial time. Both above algorithms are based on hyperplane separation technique. (3) For recursive games we show that under modular strategies the multidimensional problem is undecidable. We show that if the number of modules, exits, and the maximal absolute value of the weights are fixed, then one-dimensional recursive mean-payoff games under modular strategies can be solved in polynomial time, whereas for unbounded number of exits or modules the problem is NP-hard.},
  author       = {Chatterjee, Krishnendu and Velner, Yaron},
  journal      = {Journal of Computer and System Sciences},
  pages        = {236 -- 259},
  publisher    = {Academic Press},
  title        = {{Hyperplane separation technique for multidimensional mean-payoff games}},
  doi          = {10.1016/j.jcss.2017.04.005},
  volume       = {88},
  year         = {2017},
}

@article{719,
  abstract     = {The ubiquity of computation in modern machines and devices imposes a need to assert the correctness of their behavior. Especially in the case of safety-critical systems, their designers need to take measures that enforce their safe operation. Formal methods has emerged as a research field that addresses this challenge: by rigorously proving that all system executions adhere to their specifications, the correctness of an implementation under concern can be assured. To achieve this goal, a plethora of techniques are nowadays available, all of which are optimized for different system types and application domains.},
  author       = {Chatterjee, Krishnendu and Ehlers, Rüdiger},
  issn         = {00015903},
  journal      = {Acta Informatica},
  number       = {6},
  pages        = {543 -- 544},
  publisher    = {Springer},
  title        = {{Special issue: Synthesis and SYNT 2014}},
  doi          = {10.1007/s00236-017-0299-0},
  volume       = {54},
  year         = {2017},
}

@article{744,
  abstract     = {In evolutionary game theory interactions between individuals are often assumed obligatory. However, in many real-life situations, individuals can decide to opt out of an interaction depending on the information they have about the opponent. We consider a simple evolutionary game theoretic model to study such a scenario, where at each encounter between two individuals the type of the opponent (cooperator/defector) is known with some probability, and where each individual either accepts or opts out of the interaction. If the type of the opponent is unknown, a trustful individual accepts the interaction, whereas a suspicious individual opts out of the interaction. If either of the two individuals opt out both individuals remain without an interaction. We show that in the prisoners dilemma optional interactions along with suspicious behaviour facilitates the emergence of trustful cooperation.},
  author       = {Priklopil, Tadeas and Chatterjee, Krishnendu and Nowak, Martin},
  issn         = {00225193},
  journal      = { Journal of Theoretical Biology},
  pages        = {64 -- 72},
  publisher    = {Elsevier},
  title        = {{Optional interactions and suspicious behaviour facilitates trustful cooperation in prisoners dilemma}},
  doi          = {10.1016/j.jtbi.2017.08.025},
  volume       = {433},
  year         = {2017},
}

@article{464,
  abstract     = {The computation of the winning set for parity objectives and for Streett objectives in graphs as well as in game graphs are central problems in computer-aided verification, with application to the verification of closed systems with strong fairness conditions, the verification of open systems, checking interface compatibility, well-formedness of specifications, and the synthesis of reactive systems. We show how to compute the winning set on n vertices for (1) parity-3 (aka one-pair Streett) objectives in game graphs in time O(n5/2) and for (2) k-pair Streett objectives in graphs in time O(n2+nklogn). For both problems this gives faster algorithms for dense graphs and represents the first improvement in asymptotic running time in 15 years.},
  author       = {Chatterjee, Krishnendu and Henzinger, Monika H and Loitzenbauer, Veronika},
  issn         = {1860-5974},
  journal      = {Logical Methods in Computer Science},
  number       = {3},
  publisher    = {International Federation of Computational Logic},
  title        = {{Improved algorithms for parity and Streett objectives}},
  doi          = {10.23638/LMCS-13(3:26)2017},
  volume       = {13},
  year         = {2017},
}

@article{465,
  abstract     = {The edit distance between two words w 1 , w 2 is the minimal number of word operations (letter insertions, deletions, and substitutions) necessary to transform w 1 to w 2 . The edit distance generalizes to languages L 1 , L 2 , where the edit distance from L 1 to L 2 is the minimal number k such that for every word from L 1 there exists a word in L 2 with edit distance at most k . We study the edit distance computation problem between pushdown automata and their subclasses. The problem of computing edit distance to a pushdown automaton is undecidable, and in practice, the interesting question is to compute the edit distance from a pushdown automaton (the implementation, a standard model for programs with recursion) to a regular language (the specification). In this work, we present a complete picture of decidability and complexity for the following problems: (1) deciding whether, for a given threshold k , the edit distance from a pushdown automaton to a finite automaton is at most k , and (2) deciding whether the edit distance from a pushdown automaton to a finite automaton is finite. },
  author       = {Chatterjee, Krishnendu and Henzinger, Thomas A and Ibsen-Jensen, Rasmus and Otop, Jan},
  issn         = {18605974},
  journal      = {Logical Methods in Computer Science},
  number       = {3},
  publisher    = {International Federation of Computational Logic},
  title        = {{Edit distance for pushdown automata}},
  doi          = {10.23638/LMCS-13(3:23)2017},
  volume       = {13},
  year         = {2017},
}

@article{466,
  abstract     = {We consider Markov decision processes (MDPs) with multiple limit-average (or mean-payoff) objectives. There exist two different views: (i) the expectation semantics, where the goal is to optimize the expected mean-payoff objective, and (ii) the satisfaction semantics, where the goal is to maximize the probability of runs such that the mean-payoff value stays above a given vector. We consider optimization with respect to both objectives at once, thus unifying the existing semantics. Precisely, the goal is to optimize the expectation while ensuring the satisfaction constraint. Our problem captures the notion of optimization with respect to strategies that are risk-averse (i.e., ensure certain probabilistic guarantee). Our main results are as follows: First, we present algorithms for the decision problems which are always polynomial in the size of the MDP. We also show that an approximation of the Pareto-curve can be computed in time polynomial in the size of the MDP, and the approximation factor, but exponential in the number of dimensions. Second, we present a complete characterization of the strategy complexity (in terms of memory bounds and randomization) required to solve our problem. },
  author       = {Chatterjee, Krishnendu and Křetínská, Zuzana and Kretinsky, Jan},
  issn         = {18605974},
  journal      = {Logical Methods in Computer Science},
  number       = {2},
  publisher    = {International Federation of Computational Logic},
  title        = {{Unifying two views on multiple mean-payoff objectives in Markov decision processes}},
  doi          = {10.23638/LMCS-13(2:15)2017},
  volume       = {13},
  year         = {2017},
}

@article{467,
  abstract     = {Recently there has been a significant effort to handle quantitative properties in formal verification and synthesis. While weighted automata over finite and infinite words provide a natural and flexible framework to express quantitative properties, perhaps surprisingly, some basic system properties such as average response time cannot be expressed using weighted automata or in any other known decidable formalism. In this work, we introduce nested weighted automata as a natural extension of weighted automata, which makes it possible to express important quantitative properties such as average response time. In nested weighted automata, a master automaton spins off and collects results from weighted slave automata, each of which computes a quantity along a finite portion of an infinite word. Nested weighted automata can be viewed as the quantitative analogue of monitor automata, which are used in runtime verification. We establish an almost-complete decidability picture for the basic decision problems about nested weighted automata and illustrate their applicability in several domains. In particular, nested weighted automata can be used to decide average response time properties.},
  author       = {Chatterjee, Krishnendu and Henzinger, Thomas A and Otop, Jan},
  issn         = {15293785},
  journal      = {ACM Transactions on Computational Logic (TOCL)},
  number       = {4},
  publisher    = {ACM},
  title        = {{Nested weighted automata}},
  doi          = {10.1145/3152769},
  volume       = {18},
  year         = {2017},
}

@article{512,
  abstract     = {The fixation probability is the probability that a new mutant introduced in a homogeneous population eventually takes over the entire population. The fixation probability is a fundamental quantity of natural selection, and known to depend on the population structure. Amplifiers of natural selection are population structures which increase the fixation probability of advantageous mutants, as compared to the baseline case of well-mixed populations. In this work we focus on symmetric population structures represented as undirected graphs. In the regime of undirected graphs, the strongest amplifier known has been the Star graph, and the existence of undirected graphs with stronger amplification properties has remained open for over a decade. In this work we present the Comet and Comet-swarm families of undirected graphs. We show that for a range of fitness values of the mutants, the Comet and Cometswarm graphs have fixation probability strictly larger than the fixation probability of the Star graph, for fixed population size and at the limit of large populations, respectively. },
  author       = {Pavlogiannis, Andreas and Tkadlec, Josef and Chatterjee, Krishnendu and Nowak, Martin},
  issn         = {20452322},
  journal      = {Scientific Reports},
  number       = {1},
  publisher    = {Nature Publishing Group},
  title        = {{Amplification on undirected population structures: Comets beat stars}},
  doi          = {10.1038/s41598-017-00107-w},
  volume       = {7},
  year         = {2017},
}

@misc{5455,
  abstract     = {A fundamental algorithmic problem at the heart of static analysis is Dyck reachability. The input is a graphwhere the edges are labeled with different types of opening and closing parentheses, and the reachabilityinformation is computed via paths whose parentheses are properly matched. We present new results for Dyckreachability problems with applications to alias analysis and data-dependence analysis. Our main contributions,that include improved upper bounds as well as lower bounds that establish optimality guarantees, are asfollows:First, we consider Dyck reachability on bidirected graphs, which is the standard way of performing field-sensitive points-to analysis. Given a bidirected graph withnnodes andmedges, we present: (i) an algorithmwith worst-case running timeO(m+n·α(n)), whereα(n)is the inverse Ackermann function, improving thepreviously knownO(n2)time bound; (ii) a matching lower bound that shows that our algorithm is optimalwrt to worst-case complexity; and (iii) an optimal average-case upper bound ofO(m)time, improving thepreviously knownO(m·logn)bound.Second, we consider the problem of context-sensitive data-dependence analysis, where the task is to obtainanalysis summaries of library code in the presence of callbacks. Our algorithm preprocesses libraries in almostlinear time, after which the contribution of the library in the complexity of the client analysis is only linear,and only wrt the number of call sites.Third, we prove that combinatorial algorithms for Dyck reachability on general graphs with truly sub-cubic bounds cannot be obtained without obtaining sub-cubic combinatorial algorithms for Boolean MatrixMultiplication, which is a long-standing open problem. Thus we establish that the existing combinatorialalgorithms for Dyck reachability are (conditionally) optimal for general graphs. We also show that the samehardness holds for graphs of constant treewidth.Finally, we provide a prototype implementation of our algorithms for both alias analysis and data-dependenceanalysis. Our experimental evaluation demonstrates that the new algorithms significantly outperform allexisting methods on the two problems, over real-world benchmarks.},
  author       = {Chatterjee, Krishnendu and Choudhary, Bhavya and Pavlogiannis, Andreas},
  issn         = {2664-1690},
  pages        = {37},
  publisher    = {IST Austria},
  title        = {{Optimal Dyck reachability for data-dependence and alias analysis}},
  doi          = {10.15479/AT:IST-2017-870-v1-1},
  year         = {2017},
}

@misc{5456,
  abstract     = {We present a new dynamic partial-order reduction method for stateless model checking of concurrent programs. A common approach for exploring program behaviors relies on enumerating the traces of the program, without storing the visited states (aka stateless exploration). As the number of distinct traces grows exponentially, dynamic partial-order reduction (DPOR) techniques have been successfully used to partition the space of traces into equivalence classes (Mazurkiewicz partitioning), with the goal of exploring only few representative traces from each class.
We introduce a new equivalence on traces under sequential consistency semantics, which we call the observation equivalence. Two traces are observationally equivalent if every read event observes the same write event in both traces. While the traditional Mazurkiewicz equivalence is control-centric, our new definition is data-centric. We show that our observation equivalence is coarser than the Mazurkiewicz equivalence, and in many cases even exponentially coarser. We devise a DPOR exploration of the trace space, called data-centric DPOR, based on the observation equivalence.
1. For acyclic architectures, our algorithm is guaranteed to explore exactly one representative trace from each observation class, while spending polynomial time per class. Hence, our algorithm is optimal wrt the observation equivalence, and in several cases explores exponentially fewer traces than any enumerative method based on the Mazurkiewicz equivalence.
2. For cyclic architectures, we consider an equivalence between traces which is finer than the observation equivalence; but coarser than the Mazurkiewicz equivalence, and in some cases is exponentially coarser. Our data-centric DPOR algorithm remains optimal under this trace equivalence. 
Finally, we perform a basic experimental comparison between the existing Mazurkiewicz-based DPOR and our data-centric DPOR on a set of academic benchmarks. Our results show a significant reduction in both running time and the number of explored equivalence classes.},
  author       = {Chalupa, Marek and Chatterjee, Krishnendu and Pavlogiannis, Andreas and Sinha, Nishant and Vaidya, Kapil},
  issn         = {2664-1690},
  pages        = {36},
  publisher    = {IST Austria},
  title        = {{Data-centric dynamic partial order reduction}},
  doi          = {10.15479/AT:IST-2017-872-v1-1},
  year         = {2017},
}

@inproceedings{551,
  abstract     = {Evolutionary graph theory studies the evolutionary dynamics in a population structure given as a connected graph. Each node of the graph represents an individual of the population, and edges determine how offspring are placed. We consider the classical birth-death Moran process where there are two types of individuals, namely, the residents with fitness 1 and mutants with fitness r. The fitness indicates the reproductive strength. The evolutionary dynamics happens as follows: in the initial step, in a population of all resident individuals a mutant is introduced, and then at each step, an individual is chosen proportional to the fitness of its type to reproduce, and the offspring replaces a neighbor uniformly at random. The process stops when all individuals are either residents or mutants. The probability that all individuals in the end are mutants is called the fixation probability, which is a key factor in the rate of evolution. We consider the problem of approximating the fixation probability. The class of algorithms that is extremely relevant for approximation of the fixation probabilities is the Monte-Carlo simulation of the process. Previous results present a polynomial-time Monte-Carlo algorithm for undirected graphs when r is given in unary. First, we present a simple modification: instead of simulating each step, we discard ineffective steps, where no node changes type (i.e., either residents replace residents, or mutants replace mutants). Using the above simple modification and our result that the number of effective steps is concentrated around the expected number of effective steps, we present faster polynomial-time Monte-Carlo algorithms for undirected graphs. Our algorithms are always at least a factor O(n2/ log n) faster as compared to the previous algorithms, where n is the number of nodes, and is polynomial even if r is given in binary. We also present lower bounds showing that the upper bound on the expected number of effective steps we present is asymptotically tight for undirected graphs. },
  author       = {Chatterjee, Krishnendu and Ibsen-Jensen, Rasmus and Nowak, Martin},
  booktitle    = {Leibniz International Proceedings in Informatics},
  isbn         = {978-395977046-0},
  location     = {Aalborg, Denmark},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Faster Monte Carlo algorithms for fixation probability of the Moran process on undirected graphs}},
  doi          = {10.4230/LIPIcs.MFCS.2017.61},
  volume       = {83},
  year         = {2017},
}

@inproceedings{552,
  abstract     = {Graph games provide the foundation for modeling and synthesis of reactive processes. Such games are played over graphs where the vertices are controlled by two adversarial players. We consider graph games where the objective of the first player is the conjunction of a qualitative objective (specified as a parity condition) and a quantitative objective (specified as a meanpayoff condition). There are two variants of the problem, namely, the threshold problem where the quantitative goal is to ensure that the mean-payoff value is above a threshold, and the value problem where the quantitative goal is to ensure the optimal mean-payoff value; in both cases ensuring the qualitative parity objective. The previous best-known algorithms for game graphs with n vertices, m edges, parity objectives with d priorities, and maximal absolute reward value W for mean-payoff objectives, are as follows: O(nd+1 . m . w) for the threshold problem, and O(nd+2 · m · W) for the value problem. Our main contributions are faster algorithms, and the running times of our algorithms are as follows: O(nd-1 · m ·W) for the threshold problem, and O(nd · m · W · log(n · W)) for the value problem. For mean-payoff parity objectives with two priorities, our algorithms match the best-known bounds of the algorithms for mean-payoff games (without conjunction with parity objectives). Our results are relevant in synthesis of reactive systems with both functional requirement (given as a qualitative objective) and performance requirement (given as a quantitative objective).},
  author       = {Chatterjee, Krishnendu and Henzinger, Monika H and Svozil, Alexander},
  booktitle    = {Leibniz International Proceedings in Informatics},
  isbn         = {978-395977046-0},
  location     = {Aalborg, Denmark},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Faster algorithms for mean-payoff parity games}},
  doi          = {10.4230/LIPIcs.MFCS.2017.39},
  volume       = {83},
  year         = {2017},
}

@inproceedings{553,
  abstract     = {We consider two player, zero-sum, finite-state concurrent reachability games, played for an infinite number of rounds, where in every round, each player simultaneously and independently of the other players chooses an action, whereafter the successor state is determined by a probability distribution given by the current state and the chosen actions. Player 1 wins iff a designated goal state is eventually visited. We are interested in the complexity of stationary strategies measured by their patience, which is defined as the inverse of the smallest non-zero probability employed. Our main results are as follows: We show that: (i) the optimal bound on the patience of optimal and -optimal strategies, for both players is doubly exponential; and (ii) even in games with a single non-absorbing state exponential (in the number of actions) patience is necessary. },
  author       = {Chatterjee, Krishnendu and Hansen, Kristofer and Ibsen-Jensen, Rasmus},
  booktitle    = {Leibniz International Proceedings in Informatics},
  isbn         = {978-395977046-0},
  location     = {Aalborg, Denmark},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Strategy complexity of concurrent safety games}},
  doi          = {10.4230/LIPIcs.MFCS.2017.55},
  volume       = {83},
  year         = {2017},
}

@misc{5559,
  abstract     = {Strong amplifiers of natural selection},
  author       = {Pavlogiannis, Andreas and Tkadlec, Josef and Chatterjee, Krishnendu and Nowak , Martin},
  keywords     = {natural selection},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Strong amplifiers of natural selection}},
  doi          = {10.15479/AT:ISTA:51},
  year         = {2017},
}

@inbook{625,
  abstract     = {In the analysis of reactive systems a quantitative objective assigns a real value to every trace of the system. The value decision problem for a quantitative objective requires a trace whose value is at least a given threshold, and the exact value decision problem requires a trace whose value is exactly the threshold. We compare the computational complexity of the value and exact value decision problems for classical quantitative objectives, such as sum, discounted sum, energy, and mean-payoff for two standard models of reactive systems, namely, graphs and graph games.},
  author       = {Chatterjee, Krishnendu and Doyen, Laurent and Henzinger, Thomas A},
  booktitle    = {Models, Algorithms, Logics and Tools},
  editor       = {Aceto, Luca and Bacci, Giorgio and Ingólfsdóttir, Anna and Legay, Axel and Mardare, Radu},
  isbn         = {978-3-319-63120-2},
  issn         = {0302-9743},
  pages        = {367 -- 381},
  publisher    = {Springer},
  title        = {{The cost of exactness in quantitative reachability}},
  doi          = {10.1007/978-3-319-63121-9_18},
  volume       = {10460},
  year         = {2017},
}

@inproceedings{628,
  abstract     = {We consider the problem of developing automated techniques for solving recurrence relations to aid the expected-runtime analysis of programs. The motivation is that several classical textbook algorithms have quite efficient expected-runtime complexity, whereas the corresponding worst-case bounds are either inefficient (e.g., Quick-Sort), or completely ineffective (e.g., Coupon-Collector). Since the main focus of expected-runtime analysis is to obtain efficient bounds, we consider bounds that are either logarithmic, linear or almost-linear (O(log n), O(n), O(n · log n), respectively, where n represents the input size). Our main contribution is an efficient (simple linear-time algorithm) sound approach for deriving such expected-runtime bounds for the analysis of recurrence relations induced by randomized algorithms. The experimental results show that our approach can efficiently derive asymptotically optimal expected-runtime bounds for recurrences of classical randomized algorithms, including Randomized-Search, Quick-Sort, Quick-Select, Coupon-Collector, where the worst-case bounds are either inefficient (such as linear as compared to logarithmic expected-runtime complexity, or quadratic as compared to linear or almost-linear expected-runtime complexity), or ineffective.},
  author       = {Chatterjee, Krishnendu and Fu, Hongfei and Murhekar, Aniket},
  editor       = {Majumdar, Rupak and Kunčak, Viktor},
  isbn         = {978-331963386-2},
  location     = {Heidelberg, Germany},
  pages        = {118 -- 139},
  publisher    = {Springer},
  title        = {{Automated recurrence analysis for almost linear expected runtime bounds}},
  doi          = {10.1007/978-3-319-63387-9_6},
  volume       = {10426},
  year         = {2017},
}

@inproceedings{639,
  abstract     = {We study the problem of developing efficient approaches for proving worst-case bounds of non-deterministic recursive programs. Ranking functions are sound and complete for proving termination and worst-case bounds of non-recursive programs. First, we apply ranking functions to recursion, resulting in measure functions, and show that they provide a sound and complete approach to prove worst-case bounds of non-deterministic recursive programs. Our second contribution is the synthesis of measure functions in non-polynomial forms. We show that non-polynomial measure functions with logarithm and exponentiation can be synthesized through abstraction of logarithmic or exponentiation terms, Farkas’ Lemma, and Handelman’s Theorem using linear programming. While previous methods obtain worst-case polynomial bounds, our approach can synthesize bounds of the form O(n log n) as well as O(nr) where r is not an integer. We present experimental results to demonstrate that our approach can efficiently obtain worst-case bounds of classical recursive algorithms such as Merge-Sort, Closest-Pair, Karatsuba’s algorithm and Strassen’s algorithm.},
  author       = {Chatterjee, Krishnendu and Fu, Hongfei and Goharshady, Amir},
  editor       = {Majumdar, Rupak and Kunčak, Viktor},
  isbn         = {978-331963389-3},
  location     = {Heidelberg, Germany},
  pages        = {41 -- 63},
  publisher    = {Springer},
  title        = {{Non-polynomial worst case analysis of recursive programs}},
  doi          = {10.1007/978-3-319-63390-9_3},
  volume       = {10427},
  year         = {2017},
}

@inproceedings{645,
  abstract     = {Markov decision processes (MDPs) are standard models for probabilistic systems with non-deterministic behaviours. Long-run average rewards provide a mathematically elegant formalism for expressing long term performance. Value iteration (VI) is one of the simplest and most efficient algorithmic approaches to MDPs with other properties, such as reachability objectives. Unfortunately, a naive extension of VI does not work for MDPs with long-run average rewards, as there is no known stopping criterion. In this work our contributions are threefold. (1) We refute a conjecture related to stopping criteria for MDPs with long-run average rewards. (2) We present two practical algorithms for MDPs with long-run average rewards based on VI. First, we show that a combination of applying VI locally for each maximal end-component (MEC) and VI for reachability objectives can provide approximation guarantees. Second, extending the above approach with a simulation-guided on-demand variant of VI, we present an anytime algorithm that is able to deal with very large models. (3) Finally, we present experimental results showing that our methods significantly outperform the standard approaches on several benchmarks.},
  author       = {Ashok, Pranav and Chatterjee, Krishnendu and Daca, Przemyslaw and Kretinsky, Jan and Meggendorfer, Tobias},
  editor       = {Majumdar, Rupak and Kunčak, Viktor},
  isbn         = {978-331963386-2},
  location     = {Heidelberg, Germany},
  pages        = {201 -- 221},
  publisher    = {Springer},
  title        = {{Value iteration for long run average reward in markov decision processes}},
  doi          = {10.1007/978-3-319-63387-9_10},
  volume       = {10426},
  year         = {2017},
}

@inproceedings{6519,
  abstract     = {Graph games with omega-regular winning conditions provide a mathematical framework to analyze a wide range of problems in the analysis of reactive systems and programs (such as the synthesis of reactive systems, program repair, and the verification of branching time properties). Parity conditions are canonical forms to specify omega-regular winning conditions. Graph games with parity conditions are equivalent to mu-calculus model checking, and thus a very important algorithmic problem. Symbolic algorithms are of great significance because they provide scalable algorithms for the analysis of large finite-state systems, as well as algorithms for the analysis of infinite-state systems with finite quotient. A set-based symbolic algorithm uses the basic set operations and the one-step predecessor operators. We consider graph games with n vertices and parity conditions with c priorities (equivalently, a mu-calculus formula with c alternations of least and greatest fixed points). While many explicit algorithms exist for graph games with parity conditions, for set-based symbolic algorithms there are only two algorithms (notice that we use space to refer to the number of sets stored by a symbolic algorithm): (a) the basic algorithm that requires O(n^c) symbolic operations and linear space; and (b) an improved algorithm that requires O(n^{c/2+1}) symbolic operations but also O(n^{c/2+1}) space (i.e., exponential space). In this work we present two set-based symbolic algorithms for parity games: (a) our first algorithm requires O(n^{c/2+1}) symbolic operations and only requires linear space; and (b) developing on our first algorithm, we present an algorithm that requires O(n^{c/3+1}) symbolic operations and only linear space. We also present the first linear space set-based symbolic algorithm for parity games that requires at most a sub-exponential number of symbolic operations. },
  author       = {Chatterjee, Krishnendu and Dvorák, Wolfgang and Henzinger, Monika H and Loitzenbauer, Veronika},
  location     = {Stockholm, Sweden},
  publisher    = {Schloss Dagstuhl -Leibniz-Zentrum fuer Informatik},
  title        = {{Improved set-based symbolic algorithms for parity games}},
  doi          = {10.4230/LIPICS.CSL.2017.18},
  volume       = {82},
  year         = {2017},
}

