@inproceedings{13310,
  abstract     = {Machine-learned systems are in widespread use for making decisions about humans, and it is important that they are fair, i.e., not biased against individuals based on sensitive attributes. We present runtime verification of algorithmic fairness for systems whose models are unknown, but are assumed to have a Markov chain structure. We introduce a specification language that can model many common algorithmic fairness properties, such as demographic parity, equal opportunity, and social burden. We build monitors that observe a long sequence of events as generated by a given system, and output, after each observation, a quantitative estimate of how fair or biased the system was on that run until that point in time. The estimate is proven to be correct modulo a variable error bound and a given confidence level, where the error bound gets tighter as the observed sequence gets longer. Our monitors are of two types, and use, respectively, frequentist and Bayesian statistical inference techniques. While the frequentist monitors compute estimates that are objectively correct with respect to the ground truth, the Bayesian monitors compute estimates that are correct subject to a given prior belief about the system’s model. Using a prototype implementation, we show how we can monitor if a bank is fair in giving loans to applicants from different social backgrounds, and if a college is fair in admitting students while maintaining a reasonable financial burden on the society. Although they exhibit different theoretical complexities in certain cases, in our experiments, both frequentist and Bayesian monitors took less than a millisecond to update their verdicts after each observation.},
  author       = {Henzinger, Thomas A and Karimi, Mahyar and Kueffner, Konstantin and Mallik, Kaushik},
  booktitle    = {Computer Aided Verification},
  isbn         = {9783031377020},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {358–382},
  publisher    = {Springer Nature},
  title        = {{Monitoring algorithmic fairness}},
  doi          = {10.1007/978-3-031-37703-7_17},
  volume       = {13965},
  year         = {2023},
}

@inproceedings{14076,
  abstract     = {Hyperproperties are properties that relate multiple execution traces. Previous work on monitoring hyperproperties focused on synchronous hyperproperties, usually specified in HyperLTL. When monitoring synchronous hyperproperties, all traces are assumed to proceed at the same speed. We introduce (multi-trace) prefix transducers and show how to use them for monitoring synchronous as well as, for the first time, asynchronous hyperproperties. Prefix transducers map multiple input traces into one or more output traces by incrementally matching prefixes of the input traces against expressions similar to regular expressions. The prefixes of different traces which are consumed by a single matching step of the monitor may have different lengths. The deterministic and executable nature of prefix transducers makes them more suitable as an intermediate formalism for runtime verification than logical specifications, which tend to be highly non-deterministic, especially in the case of asynchronous hyperproperties. We report on a set of experiments about monitoring asynchronous version of observational determinism.},
  author       = {Chalupa, Marek and Henzinger, Thomas A},
  booktitle    = {23nd International Conference on Runtime Verification},
  isbn         = {978-3-031-44266-7},
  location     = {Thessaloniki, Greek},
  pages        = {168--190},
  publisher    = {Springer Nature},
  title        = {{Monitoring hyperproperties with prefix transducers}},
  doi          = {10.1007/978-3-031-44267-4_9},
  volume       = {14245},
  year         = {2023},
}

@inproceedings{14242,
  abstract     = {We study the problem of training and certifying adversarially robust quantized neural networks (QNNs). Quantization is a technique for making neural networks more efficient by running them using low-bit integer arithmetic and is therefore commonly adopted in industry. Recent work has shown that floating-point neural networks that have been verified to be robust can become vulnerable to adversarial attacks after quantization, and certification of the quantized representation is necessary to guarantee robustness. In this work, we present quantization-aware interval bound propagation (QA-IBP), a novel method for training robust QNNs. Inspired by advances in robust learning of non-quantized networks, our training algorithm computes the gradient of an abstract representation of the actual network. Unlike existing approaches, our method can handle the discrete semantics of QNNs. Based on QA-IBP, we also develop a complete verification procedure for verifying the adversarial robustness of QNNs, which is guaranteed to terminate and produce a correct answer. Compared to existing approaches, the key advantage of our verification procedure is that it runs entirely on GPU or other accelerator devices. We demonstrate experimentally that our approach significantly outperforms existing methods and establish the new state-of-the-art for training and certifying the robustness of QNNs.},
  author       = {Lechner, Mathias and Zikelic, Dorde and Chatterjee, Krishnendu and Henzinger, Thomas A and Rus, Daniela},
  booktitle    = {Proceedings of the 37th AAAI Conference on Artificial Intelligence},
  isbn         = {9781577358800},
  location     = {Washington, DC, United States},
  number       = {12},
  pages        = {14964--14973},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Quantization-aware interval bound propagation for training certifiably robust quantized neural networks}},
  doi          = {10.1609/aaai.v37i12.26747},
  volume       = {37},
  year         = {2023},
}

@inproceedings{14243,
  abstract     = {Two-player zero-sum "graph games" are central in logic, verification, and multi-agent systems. The game proceeds by placing a token on a vertex of a graph, and allowing the players to move it to produce an infinite path, which determines the winner or payoff of the game. Traditionally, the players alternate turns in moving the token. In "bidding games", however, the players have budgets and in each turn, an auction (bidding) determines which player moves the token. So far, bidding games have only been studied as full-information games. In this work we initiate the study of partial-information bidding games: we study bidding games in which a player's initial budget is drawn from a known probability distribution. We show that while for some bidding mechanisms and objectives, it is straightforward to adapt the results from the full-information setting to the partial-information setting, for others, the analysis is significantly more challenging, requires new techniques, and gives rise to interesting results. Specifically, we study games with "mean-payoff" objectives in combination with "poorman" bidding. We construct optimal strategies for a partially-informed player who plays against a fully-informed adversary. We show that, somewhat surprisingly, the "value" under pure strategies does not necessarily exist in such games.},
  author       = {Avni, Guy and Jecker, Ismael R and Zikelic, Dorde},
  booktitle    = {Proceedings of the 37th AAAI Conference on Artificial Intelligence},
  isbn         = {9781577358800},
  location     = {Washington, DC, United States},
  number       = {5},
  pages        = {5464--5471},
  title        = {{Bidding graph games with partially-observable budgets}},
  doi          = {10.1609/aaai.v37i5.25679},
  volume       = {37},
  year         = {2023},
}

@misc{12407,
  abstract     = {As the complexity and criticality of software increase every year, so does the importance of run-time monitoring. Third-party monitoring, with limited knowledge of the monitored software, and best-effort monitoring, which keeps pace with the monitored software, are especially valuable, yet underexplored areas of run-time monitoring. Most existing monitoring frameworks do not support their combination because they either require access to the monitored code for instrumentation purposes or the processing of all observed events, or both.

We present a middleware framework, VAMOS, for the run-time monitoring of software which is explicitly designed to support third-party and best-effort scenarios. The design goals of VAMOS are (i) efficiency (keeping pace at low overhead), (ii) flexibility (the ability to monitor black-box code through a variety of different event channels, and the connectability to monitors written in different specification languages), and (iii) ease-of-use. To achieve its goals, VAMOS combines aspects of event broker and event recognition systems with aspects of stream processing systems.

We implemented a prototype toolchain for VAMOS and conducted experiments including a case study of monitoring for data races. The results indicate that VAMOS enables writing useful yet efficient monitors, is compatible with a variety of event sources and monitor specifications, and simplifies key aspects of setting up a monitoring system from scratch.},
  author       = {Chalupa, Marek and Mühlböck, Fabian and Muroya Lei, Stefanie and Henzinger, Thomas A},
  issn         = {2664-1690},
  keywords     = {runtime monitoring, best effort, third party},
  pages        = {38},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{VAMOS: Middleware for Best-Effort Third-Party Monitoring}},
  doi          = {10.15479/AT:ISTA:12407},
  year         = {2023},
}

@inproceedings{12467,
  abstract     = {Safety and liveness are elementary concepts of computation, and the foundation of many verification paradigms. The safety-liveness classification of boolean properties characterizes whether a given property can be falsified by observing a finite prefix of an infinite computation trace (always for safety, never for liveness). In quantitative specification and verification, properties assign not truth values, but quantitative values to infinite traces (e.g., a cost, or the distance to a boolean property). We introduce quantitative safety and liveness, and we prove that our definitions induce conservative quantitative generalizations of both (1)~the safety-progress hierarchy of boolean properties and (2)~the safety-liveness decomposition of boolean properties. In particular, we show that every quantitative property can be written as the pointwise minimum of a quantitative safety property and a quantitative liveness property. Consequently, like boolean properties, also quantitative properties can be min-decomposed into safety and liveness parts, or alternatively, max-decomposed into co-safety and co-liveness parts. Moreover, quantitative properties can be approximated naturally. We prove that every quantitative property that has both safe and co-safe approximations can be monitored arbitrarily precisely by a monitor that uses only a finite number of states.},
  author       = {Henzinger, Thomas A and Mazzocchi, Nicolas Adrien and Sarac, Naci E},
  booktitle    = {26th International Conference Foundations of Software Science and Computation Structures},
  isbn         = {9783031308284},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {349--370},
  publisher    = {Springer Nature},
  title        = {{Quantitative safety and liveness}},
  doi          = {10.1007/978-3-031-30829-1_17},
  volume       = {13992},
  year         = {2023},
}

@article{12704,
  abstract     = {Adversarial training (i.e., training on adversarially perturbed input data) is a well-studied method for making neural networks robust to potential adversarial attacks during inference. However, the improved robustness does not come for free but rather is accompanied by a decrease in overall model accuracy and performance. Recent work has shown that, in practical robot learning applications, the effects of adversarial training do not pose a fair trade-off but inflict a net loss when measured in holistic robot performance. This work revisits the robustness-accuracy trade-off in robot learning by systematically analyzing if recent advances in robust training methods and theory in conjunction with adversarial robot learning, are capable of making adversarial training suitable for real-world robot applications. We evaluate three different robot learning tasks ranging from autonomous driving in a high-fidelity environment amenable to sim-to-real deployment to mobile robot navigation and gesture recognition. Our results demonstrate that, while these techniques make incremental improvements on the trade-off on a relative scale, the negative impact on the nominal accuracy caused by adversarial training still outweighs the improved robustness by an order of magnitude. We conclude that although progress is happening, further advances in robust learning methods are necessary before they can benefit robot learning tasks in practice.},
  author       = {Lechner, Mathias and Amini, Alexander and Rus, Daniela and Henzinger, Thomas A},
  issn         = {2377-3766},
  journal      = {IEEE Robotics and Automation Letters},
  number       = {3},
  pages        = {1595--1602},
  publisher    = {Institute of Electrical and Electronics Engineers},
  title        = {{Revisiting the adversarial robustness-accuracy tradeoff in robot learning}},
  doi          = {10.1109/LRA.2023.3240930},
  volume       = {8},
  year         = {2023},
}

@inproceedings{12854,
  abstract     = {The main idea behind BUBAAK is to run multiple program analyses in parallel and use runtime monitoring and enforcement to observe and control their progress in real time. The analyses send information about (un)explored states of the program and discovered invariants to a monitor. The monitor processes the received data and can force an analysis to stop the search of certain program parts (which have already been analyzed by other analyses), or to make it utilize a program invariant found by another analysis.
At SV-COMP  2023, the implementation of data exchange between the monitor and the analyses was not yet completed, which is why BUBAAK only ran several analyses in parallel, without any coordination. Still, BUBAAK won the meta-category FalsificationOverall and placed very well in several other (sub)-categories of the competition.},
  author       = {Chalupa, Marek and Henzinger, Thomas A},
  booktitle    = {Tools and Algorithms for the Construction and Analysis of Systems},
  isbn         = {9783031308192},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {535--540},
  publisher    = {Springer Nature},
  title        = {{Bubaak: Runtime monitoring of program verifiers}},
  doi          = {10.1007/978-3-031-30820-8_32},
  volume       = {13994},
  year         = {2023},
}

@inproceedings{12856,
  abstract     = {As the complexity and criticality of software increase every year, so does the importance of run-time monitoring. Third-party monitoring, with limited knowledge of the monitored software, and best-effort monitoring, which keeps pace with the monitored software, are especially valuable, yet underexplored areas of run-time monitoring. Most existing monitoring frameworks do not support their combination because they either require access to the monitored code for instrumentation purposes or the processing of all observed events, or both.

We present a middleware framework, VAMOS, for the run-time monitoring of software which is explicitly designed to support third-party and best-effort scenarios. The design goals of VAMOS are (i) efficiency (keeping pace at low overhead), (ii) flexibility (the ability to monitor black-box code through a variety of different event channels, and the connectability to monitors written in different specification languages), and (iii) ease-of-use. To achieve its goals, VAMOS combines aspects of event broker and event recognition systems with aspects of stream processing systems.
We implemented a prototype toolchain for VAMOS and conducted experiments including a case study of monitoring for data races. The results indicate that VAMOS enables writing useful yet efficient monitors, is compatible with a variety of event sources and monitor specifications, and simplifies key aspects of setting up a monitoring system from scratch.},
  author       = {Chalupa, Marek and Mühlböck, Fabian and Muroya Lei, Stefanie and Henzinger, Thomas A},
  booktitle    = {Fundamental Approaches to Software Engineering},
  isbn         = {9783031308253},
  issn         = {1611-3349},
  location     = {Paris, France},
  pages        = {260--281},
  publisher    = {Springer Nature},
  title        = {{Vamos: Middleware for best-effort third-party monitoring}},
  doi          = {10.1007/978-3-031-30826-0_15},
  volume       = {13991},
  year         = {2023},
}

@article{12876,
  abstract     = {Motivation: The problem of model inference is of fundamental importance to systems biology. Logical models (e.g. Boolean networks; BNs) represent a computationally attractive approach capable of handling large biological networks. The models are typically inferred from experimental data. However, even with a substantial amount of experimental data supported by some prior knowledge, existing inference methods often focus on a small sample of admissible candidate models only.

Results: We propose Boolean network sketches as a new formal instrument for the inference of Boolean networks. A sketch integrates (typically partial) knowledge about the network’s topology and the update logic (obtained through, e.g. a biological knowledge base or a literature search), as well as further assumptions about the properties of the network’s transitions (e.g. the form of its attractor landscape), and additional restrictions on the model dynamics given by the measured experimental data. Our new BNs inference algorithm starts with an ‘initial’ sketch, which is extended by adding restrictions representing experimental data to a ‘data-informed’ sketch and subsequently computes all BNs consistent with the data-informed sketch. Our algorithm is based on a symbolic representation and coloured model-checking. Our approach is unique in its ability to cover a broad spectrum of knowledge and efficiently produce a compact representation of all inferred BNs. We evaluate the method on a non-trivial collection of real-world and simulated data.},
  author       = {Beneš, Nikola and Brim, Luboš and Huvar, Ondřej and Pastva, Samuel and Šafránek, David},
  issn         = {1367-4811},
  journal      = {Bioinformatics},
  number       = {4},
  publisher    = {Oxford Academic},
  title        = {{Boolean network sketches: A unifying framework for logical model inference}},
  doi          = {10.1093/bioinformatics/btad158},
  volume       = {39},
  year         = {2023},
}

@inproceedings{10774,
  abstract     = {We study the problem of specifying sequential information-flow properties of systems. Information-flow properties are hyperproperties, as they compare different traces of a system. Sequential information-flow properties can express changes, over time, in the information-flow constraints. For example, information-flow constraints during an initialization phase of a system may be different from information-flow constraints that are required during the operation phase. We formalize several variants of interpreting sequential information-flow constraints, which arise from different assumptions about what can be observed of the system. For this purpose, we introduce a first-order logic, called Hypertrace Logic, with both trace and time quantifiers for specifying linear-time hyperproperties. We prove that HyperLTL, which corresponds to a fragment of Hypertrace Logic with restricted quantifier prefixes, cannot specify the majority of the studied variants of sequential information flow, including all variants in which the transition between sequential phases (such as initialization and operation) happens asynchronously. Our results rely on new equivalences between sets of traces that cannot be distinguished by certain classes of formulas from Hypertrace Logic. This presents a new approach to proving inexpressiveness results for HyperLTL.},
  author       = {Bartocci, Ezio and Ferrere, Thomas and Henzinger, Thomas A and Nickovic, Dejan and Da Costa, Ana Oliveira},
  booktitle    = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
  isbn         = {9783030945824},
  issn         = {16113349},
  location     = {Philadelphia, PA, United States},
  pages        = {1--19},
  publisher    = {Springer Nature},
  title        = {{Flavors of sequential information flow}},
  doi          = {10.1007/978-3-030-94583-1_1},
  volume       = {13182},
  year         = {2022},
}

@inproceedings{10891,
  abstract     = {We present a formal framework for the online black-box monitoring of software using monitors with quantitative verdict functions. Quantitative verdict functions have several advantages. First, quantitative monitors can be approximate, i.e., the value of the verdict function does not need to correspond exactly to the value of the property under observation. Second, quantitative monitors can be quantified universally, i.e., for every possible observed behavior, the monitor tries to make the best effort to estimate the value of the property under observation. Third, quantitative monitors can watch boolean as well as quantitative properties, such as average response time. Fourth, quantitative monitors can use non-finite-state resources, such as counters. As a consequence, quantitative monitors can be compared according to how many resources they use (e.g., the number of counters) and how precisely they approximate the property under observation. This allows for a rich spectrum of cost-precision trade-offs in monitoring software.},
  author       = {Henzinger, Thomas A},
  booktitle    = {Software Verification},
  isbn         = {9783030955601},
  issn         = {1611-3349},
  location     = {New Haven, CT, United States},
  pages        = {3--6},
  publisher    = {Springer Nature},
  title        = {{Quantitative monitoring of software}},
  doi          = {10.1007/978-3-030-95561-8_1},
  volume       = {13124},
  year         = {2022},
}

@inproceedings{11355,
  abstract     = {Contract-based design is a promising methodology for taming the complexity of developing sophisticated systems. A formal contract distinguishes between assumptions, which are constraints that the designer of a component puts on the environments in which the component can be used safely, and guarantees, which are promises that the designer asks from the team that implements the component. A theory of formal contracts can be formalized as an interface theory, which supports the composition and refinement of both assumptions and guarantees.
Although there is a rich landscape of contract-based design methods that address functional and extra-functional properties, we present the first interface theory that is designed for ensuring system-wide security properties. Our framework provides a refinement relation and a composition operation that support both incremental design and independent implementability. We develop our theory for both stateless and stateful interfaces. We illustrate the applicability of our framework with an example inspired from the automotive domain.},
  author       = {Bartocci, Ezio and Ferrere, Thomas and Henzinger, Thomas A and Nickovic, Dejan and Da Costa, Ana Oliveira},
  booktitle    = {Fundamental Approaches to Software Engineering},
  isbn         = {9783030994280},
  issn         = {1611-3349},
  location     = {Munich, Germany},
  pages        = {3--22},
  publisher    = {Springer Nature},
  title        = {{Information-flow interfaces}},
  doi          = {10.1007/978-3-030-99429-7_1},
  volume       = {13241},
  year         = {2022},
}

@phdthesis{11362,
  abstract     = {Deep learning has enabled breakthroughs in challenging computing problems and has emerged as the standard problem-solving tool for computer vision and natural language processing tasks.
One exception to this trend is safety-critical tasks where robustness and resilience requirements contradict the black-box nature of neural networks. 
To deploy deep learning methods for these tasks, it is vital to provide guarantees on neural network agents' safety and robustness criteria. 
This can be achieved by developing formal verification methods to verify the safety and robustness properties of neural networks.

Our goal is to design, develop and assess safety verification methods for neural networks to improve their reliability and trustworthiness in real-world applications.
This thesis establishes techniques for the verification of compressed and adversarially trained models as well as the design of novel neural networks for verifiably safe decision-making.

First, we establish the problem of verifying quantized neural networks. Quantization is a technique that trades numerical precision for the computational efficiency of running a neural network and is widely adopted in industry.
We show that neglecting the reduced precision when verifying a neural network can lead to wrong conclusions about the robustness and safety of the network, highlighting that novel techniques for quantized network verification are necessary. We introduce several bit-exact verification methods explicitly designed for quantized neural networks and experimentally confirm on realistic networks that the network's robustness and other formal properties are affected by the quantization.

Furthermore, we perform a case study providing evidence that adversarial training, a standard technique for making neural networks more robust, has detrimental effects on the network's performance. This robustness-accuracy tradeoff has been studied before regarding the accuracy obtained on classification datasets where each data point is independent of all other data points. On the other hand, we investigate the tradeoff empirically in robot learning settings where a both, a high accuracy and a high robustness, are desirable.
Our results suggest that the negative side-effects of adversarial training outweigh its robustness benefits in practice.

Finally, we consider the problem of verifying safety when running a Bayesian neural network policy in a feedback loop with systems over the infinite time horizon. Bayesian neural networks are probabilistic models for learning uncertainties in the data and are therefore often used on robotic and healthcare applications where data is inherently stochastic.
We introduce a method for recalibrating Bayesian neural networks so that they yield probability distributions over safe decisions only.
Our method learns a safety certificate that guarantees safety over the infinite time horizon to determine which decisions are safe in every possible state of the system.
We demonstrate the effectiveness of our approach on a series of reinforcement learning benchmarks.},
  author       = {Lechner, Mathias},
  isbn         = {978-3-99078-017-6},
  keywords     = {neural networks, verification, machine learning},
  pages        = {124},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Learning verifiable representations}},
  doi          = {10.15479/at:ista:11362},
  year         = {2022},
}

@unpublished{11366,
  abstract     = {Adversarial training (i.e., training on adversarially perturbed input data) is a well-studied method for making neural networks robust to potential adversarial attacks during inference. However, the improved robustness does not
come for free but rather is accompanied by a decrease in overall model accuracy and performance. Recent work has shown that, in practical robot learning applications, the effects of adversarial training do not pose a fair trade-off
but inflict a net loss when measured in holistic robot performance. This work revisits the robustness-accuracy trade-off in robot learning by systematically analyzing if recent advances in robust training methods and theory in
conjunction with adversarial robot learning can make adversarial training suitable for real-world robot applications. We evaluate a wide variety of robot learning tasks ranging from autonomous driving in a high-fidelity environment
amenable to sim-to-real deployment, to mobile robot gesture recognition. Our results demonstrate that, while these techniques make incremental improvements on the trade-off on a relative scale, the negative side-effects caused by
adversarial training still outweigh the improvements by an order of magnitude. We conclude that more substantial advances in robust learning methods are necessary before they can benefit robot learning tasks in practice.},
  author       = {Lechner, Mathias and Amini, Alexander and Rus, Daniela and Henzinger, Thomas A},
  booktitle    = {arXiv},
  title        = {{Revisiting the adversarial robustness-accuracy tradeoff in robot learning}},
  doi          = {10.48550/arXiv.2204.07373},
  year         = {2022},
}

@inproceedings{11775,
  abstract     = {Quantitative monitoring can be universal and approximate: For every finite sequence of observations, the specification provides a value and the monitor outputs a best-effort approximation of it. The quality of the approximation may depend on the resources that are available to the monitor. By taking to the limit the sequences of specification values and monitor outputs, we obtain precision-resource trade-offs also for limit monitoring. This paper provides a formal framework for studying such trade-offs using an abstract interpretation for monitors: For each natural number n, the aggregate semantics of a monitor at time n is an equivalence relation over all sequences of at most n observations so that two equivalent sequences are indistinguishable to the monitor and thus mapped to the same output. This abstract interpretation of quantitative monitors allows us to measure the number of equivalence classes (or “resource use”) that is necessary for a certain precision up to a certain time, or at any time. Our framework offers several insights. For example, we identify a family of specifications for which any resource-optimal exact limit monitor is independent of any error permitted over finite traces. Moreover, we present a specification for which any resource-optimal approximate limit monitor does not minimize its resource use at any time. },
  author       = {Henzinger, Thomas A and Mazzocchi, Nicolas Adrien and Sarac, Naci E},
  booktitle    = {22nd International Conference on Runtime Verification},
  issn         = {0302-9743},
  location     = {Tbilisi, Georgia},
  pages        = {200--220},
  publisher    = {Springer Nature},
  title        = {{Abstract monitors for quantitative specifications}},
  doi          = {10.1007/978-3-031-17196-3_11},
  volume       = {13498},
  year         = {2022},
}

@unpublished{14600,
  abstract     = {We study the problem of learning controllers for discrete-time non-linear stochastic dynamical systems with formal reach-avoid guarantees. This work presents the first method for providing formal reach-avoid guarantees, which combine and generalize stability and safety guarantees, with a tolerable probability threshold $p\in[0,1]$ over the infinite time horizon. Our method leverages advances in machine learning literature and it represents formal certificates as neural networks. In particular, we learn a certificate in the form of a reach-avoid supermartingale (RASM), a novel notion that we introduce in this work. Our RASMs provide reachability and avoidance guarantees by imposing constraints on what can be viewed as a stochastic extension of level sets of Lyapunov functions for deterministic systems. Our approach solves several important problems -- it can be used to learn a control policy from scratch, to verify a reach-avoid specification for a fixed control policy, or to fine-tune a pre-trained policy if it does not satisfy the reach-avoid specification. We validate our approach on $3$ stochastic non-linear reinforcement learning tasks.},
  author       = {Zikelic, Dorde and Lechner, Mathias and Henzinger, Thomas A and Chatterjee, Krishnendu},
  booktitle    = {arXiv},
  title        = {{Learning control policies for stochastic systems with reach-avoid guarantees}},
  doi          = {10.48550/ARXIV.2210.05308},
  year         = {2022},
}

@unpublished{14601,
  abstract     = {In this work, we address the problem of learning provably stable neural
network policies for stochastic control systems. While recent work has
demonstrated the feasibility of certifying given policies using martingale
theory, the problem of how to learn such policies is little explored. Here, we
study the effectiveness of jointly learning a policy together with a martingale
certificate that proves its stability using a single learning algorithm. We
observe that the joint optimization problem becomes easily stuck in local
minima when starting from a randomly initialized policy. Our results suggest
that some form of pre-training of the policy is required for the joint
optimization to repair and verify the policy successfully.},
  author       = {Zikelic, Dorde and Lechner, Mathias and Chatterjee, Krishnendu and Henzinger, Thomas A},
  booktitle    = {arXiv},
  title        = {{Learning stabilizing policies in stochastic control systems}},
  doi          = {10.48550/arXiv.2205.11991},
  year         = {2022},
}

@inproceedings{12010,
  abstract     = {World models learn behaviors in a latent imagination space to enhance the sample-efficiency of deep reinforcement learning (RL) algorithms. While learning world models for high-dimensional observations (e.g., pixel inputs) has become practicable on standard RL benchmarks and some games, their effectiveness in real-world robotics applications has not been explored. In this paper, we investigate how such agents generalize to real-world autonomous vehicle control tasks, where advanced model-free deep RL algorithms fail. In particular, we set up a series of time-lap tasks for an F1TENTH racing robot, equipped with a high-dimensional LiDAR sensor, on a set of test tracks with a gradual increase in their complexity. In this continuous-control setting, we show that model-based agents capable of learning in imagination substantially outperform model-free agents with respect to performance, sample efficiency, successful task completion, and generalization. Moreover, we show that the generalization ability of model-based agents strongly depends on the choice of their observation model. We provide extensive empirical evidence for the effectiveness of world models provided with long enough memory horizons in sim2real tasks.},
  author       = {Brunnbauer, Axel and Berducci, Luigi and Brandstatter, Andreas and Lechner, Mathias and Hasani, Ramin and Rus, Daniela and Grosu, Radu},
  booktitle    = {2022 International Conference on Robotics and Automation},
  isbn         = {9781728196817},
  issn         = {1050-4729},
  location     = {Philadelphia, PA, United States},
  pages        = {7513--7520},
  publisher    = {IEEE},
  title        = {{Latent imagination facilitates zero-shot transfer in autonomous racing}},
  doi          = {10.1109/ICRA46639.2022.9811650},
  year         = {2022},
}

@article{12147,
  abstract     = {Continuous-time neural networks are a class of machine learning systems that can tackle representation learning on spatiotemporal decision-making tasks. These models are typically represented by continuous differential equations. However, their expressive power when they are deployed on computers is bottlenecked by numerical differential equation solvers. This limitation has notably slowed down the scaling and understanding of numerous natural physical phenomena such as the dynamics of nervous systems. Ideally, we would circumvent this bottleneck by solving the given dynamical system in closed form. This is known to be intractable in general. Here, we show that it is possible to closely approximate the interaction between neurons and synapses—the building blocks of natural and artificial neural networks—constructed by liquid time-constant networks efficiently in closed form. To this end, we compute a tightly bounded approximation of the solution of an integral appearing in liquid time-constant dynamics that has had no known closed-form solution so far. This closed-form solution impacts the design of continuous-time and continuous-depth neural models. For instance, since time appears explicitly in closed form, the formulation relaxes the need for complex numerical solvers. Consequently, we obtain models that are between one and five orders of magnitude faster in training and inference compared with differential equation-based counterparts. More importantly, in contrast to ordinary differential equation-based continuous networks, closed-form networks can scale remarkably well compared with other deep learning instances. Lastly, as these models are derived from liquid networks, they show good performance in time-series modelling compared with advanced recurrent neural network models.},
  author       = {Hasani, Ramin and Lechner, Mathias and Amini, Alexander and Liebenwein, Lucas and Ray, Aaron and Tschaikowski, Max and Teschl, Gerald and Rus, Daniela},
  issn         = {2522-5839},
  journal      = {Nature Machine Intelligence},
  keywords     = {Artificial Intelligence, Computer Networks and Communications, Computer Vision and Pattern Recognition, Human-Computer Interaction, Software},
  number       = {11},
  pages        = {992--1003},
  publisher    = {Springer Nature},
  title        = {{Closed-form continuous-time neural networks}},
  doi          = {10.1038/s42256-022-00556-7},
  volume       = {4},
  year         = {2022},
}