@article{9311,
  abstract     = {Partially observable Markov decision processes (POMDPs) are standard models for dynamic systems with probabilistic and nondeterministic behaviour in uncertain environments. We prove that in POMDPs with long-run average objective, the decision maker has approximately optimal strategies with finite memory. This implies notably that approximating the long-run value is recursively enumerable, as well as a weak continuity property of the value with respect to the transition function. },
  author       = {Chatterjee, Krishnendu and Saona Urmeneta, Raimundo J and Ziliotto, Bruno},
  issn         = {1526-5471},
  journal      = {Mathematics of Operations Research},
  keywords     = {Management Science and Operations Research, General Mathematics, Computer Science Applications},
  number       = {1},
  pages        = {100--119},
  publisher    = {Institute for Operations Research and the Management Sciences},
  title        = {{Finite-memory strategies in POMDPs with long-run average objectives}},
  doi          = {10.1287/moor.2020.1116},
  volume       = {47},
  year         = {2022},
}

