@unpublished{15039,
  abstract     = {A crucial property for achieving secure, trustworthy and interpretable deep learning systems is their robustness: small changes to a system's inputs should not result in large changes to its outputs. Mathematically, this means one strives for networks with a small Lipschitz constant. Several recent works have focused on how to construct such Lipschitz networks, typically by imposing constraints on the weight matrices. In this work, we study an orthogonal aspect, namely the role of the activation function. We show that commonly used activation functions, such as MaxMin, as well as all piece-wise linear ones with two segments unnecessarily restrict the class of representable functions, even in the simplest one-dimensional setting. We furthermore introduce the new N-activation function that is provably more expressive than currently popular activation functions. We provide code at this https URL.},
  author       = {Prach, Bernd and Lampert, Christoph},
  booktitle    = {arXiv},
  title        = {{1-Lipschitz neural networks are more expressive with N-activations}},
  doi          = {10.48550/ARXIV.2311.06103},
  year         = {2023},
}

@inproceedings{11839,
  abstract     = {It is a highly desirable property for deep networks to be robust against
small input changes. One popular way to achieve this property is by designing
networks with a small Lipschitz constant. In this work, we propose a new
technique for constructing such Lipschitz networks that has a number of
desirable properties: it can be applied to any linear network layer
(fully-connected or convolutional), it provides formal guarantees on the
Lipschitz constant, it is easy to implement and efficient to run, and it can be
combined with any training objective and optimization method. In fact, our
technique is the first one in the literature that achieves all of these
properties simultaneously. Our main contribution is a rescaling-based weight
matrix parametrization that guarantees each network layer to have a Lipschitz
constant of at most 1 and results in the learned weight matrices to be close to
orthogonal. Hence we call such layers almost-orthogonal Lipschitz (AOL).
Experiments and ablation studies in the context of image classification with
certified robust accuracy confirm that AOL layers achieve results that are on
par with most existing methods. Yet, they are simpler to implement and more
broadly applicable, because they do not require computationally expensive
matrix orthogonalization or inversion steps as part of the network
architecture. We provide code at https://github.com/berndprach/AOL.},
  author       = {Prach, Bernd and Lampert, Christoph},
  booktitle    = {Computer Vision – ECCV 2022},
  isbn         = {9783031198021},
  location     = {Tel Aviv, Israel},
  pages        = {350--365},
  publisher    = {Springer Nature},
  title        = {{Almost-orthogonal layers for efficient general-purpose Lipschitz networks}},
  doi          = {10.1007/978-3-031-19803-8_21},
  volume       = {13681},
  year         = {2022},
}