@misc{14861,
  abstract     = {Cover Page},
  author       = {Becker, Lea Marie and Berbon, Mélanie and Vallet, Alicia and Grelard, Axelle and Morvan, Estelle and Bardiaux, Benjamin and Lichtenecker, Roman and Ernst, Matthias and Loquet, Antoine and Schanda, Paul},
  booktitle    = {Angewandte Chemie International Edition},
  issn         = {1521-3773},
  keywords     = {General Chemistry, Catalysis},
  number       = {19},
  publisher    = {Wiley},
  title        = {{Cover Picture: The rigid core and flexible surface of amyloid fibrils probed by Magic‐Angle‐Spinning NMR spectroscopy of aromatic residues}},
  doi          = {10.1002/anie.202304138},
  volume       = {62},
  year         = {2023},
}

@inproceedings{14862,
  author       = {Rella, Simon and Kulikova, Y and Minnegalieva, Aygul and Kondrashov, Fyodor},
  booktitle    = {European Journal of Public Health},
  issn         = {1464-360X},
  keywords     = {Public Health, Environmental and Occupational Health},
  number       = {Supplement_2},
  publisher    = {Oxford University Press},
  title        = {{Complex vaccination strategies prevent the emergence of vaccine resistance}},
  doi          = {10.1093/eurpub/ckad160.597},
  volume       = {33},
  year         = {2023},
}

@inproceedings{14863,
  author       = {Polesello, Andrea and Muller, Caroline J and Pasquero, Claudia and Meroni, Agostino N.},
  booktitle    = {EGU General Assembly 2023},
  location     = {Vienna, Austria & Virtual},
  publisher    = {European Geosciences Union},
  title        = {{Intensification mechanisms of tropical cyclones}},
  doi          = {10.5194/egusphere-egu23-6157},
  year         = {2023},
}

@inproceedings{14864,
  author       = {Stöllner, Andrea and Lenton, Isaac C and Muller, Caroline J and Waitukaitis, Scott R},
  booktitle    = {EGU General Assembly 2023},
  location     = {Vienna, Austria & Virtual},
  publisher    = {European Geosciences Union},
  title        = {{Measuring spontaneous charging of single aerosol particles}},
  doi          = {10.5194/egusphere-egu23-6166},
  year         = {2023},
}

@inproceedings{14865,
  author       = {Hwong, Yi-Ling and Colin, Maxime and Aglas, Philipp and Muller, Caroline J and Sherwood, Steven},
  booktitle    = {EGU General Assembly 2023},
  location     = {Vienna, Austria & Virtual},
  publisher    = {European Geosciences Union},
  title        = {{Evaluating memory properties in convection schemes using idealised tests}},
  doi          = {10.5194/egusphere-egu23-4968},
  year         = {2023},
}

@inproceedings{14866,
  author       = {Abramian, Sophie and Muller, Caroline J and Risi, Camille},
  booktitle    = {EGU General Assembly 2023},
  location     = {Vienna, Austria & Virtual},
  publisher    = {European Geosciences Union},
  title        = {{Extreme precipitation in tropical squall lines}},
  doi          = {10.5194/egusphere-egu23-15870},
  year         = {2023},
}

@inproceedings{14867,
  abstract     = {<jats:p>Starting with the empty graph on $[n]$, at each round, a set of $K=K(n)$ edges is presented chosen uniformly at random from the ones that have not been presented yet. We are then asked to choose at most one of the presented edges and add it to the current graph. Our goal is to construct a Hamiltonian graph with $(1+o(1))n$ edges within as few rounds as possible. We show that in this process, one can build a Hamiltonian graph of size $(1+o(1))n$ in $(1+o(1))(1+(\log n)/2K) n$ rounds w.h.p. The case $K=1$ implies that w.h.p. one can build a Hamiltonian graph by choosing $(1+o(1))n$ edges in an online fashion as they appear along the first $(0.5+o(1))n\log n$ rounds of the random graph process. This answers a question of Frieze, Krivelevich and Michaeli. Observe that the number of rounds is asymptotically optimal as the first $0.5n\log n$ edges do not span a Hamilton cycle w.h.p. The case $K=\Theta(\log n)$ implies that the Hamiltonicity threshold of the corresponding Achlioptas process is at most $(1+o(1))(1+(\log n)/2K) n$. This matches the $(1-o(1))(1+(\log n)/2K) n$ lower bound due to Krivelevich, Lubetzky and Sudakov and resolves the problem of determining the Hamiltonicity threshold of the Achlioptas process with $K=\Theta(\log n)$. We also show that in the above process one can construct a graph $G$ that spans a matching of size $\lfloor V(G)/2) \rfloor$ and $(0.5+o(1))n$ edges within $(1+o(1))(0.5+(\log n)/2K) n$ rounds w.h.p. Our proof relies on a robust Hamiltonicity property of the strong $4$-core of the binomial random graph which we use as a black-box. This property allows it to absorb paths covering vertices outside the strong $4$-core into a cycle.</jats:p>},
  author       = {Anastos, Michael},
  booktitle    = {Proceedings of the 12th European Conference on Combinatorics, Graph Theory and Applications},
  issn         = {2788-3116},
  location     = {Prague, Czech Republic},
  pages        = {36--41},
  publisher    = {Masaryk University Press},
  title        = {{Constructing Hamilton cycles and perfect matchings efficiently}},
  doi          = {10.5817/cz.muni.eurocomb23-005},
  year         = {2023},
}

@inproceedings{14872,
  abstract     = {We entangled microwave and optical photons for the first time as verified by a measured two-mode vacuum squeezing of 0.7 dB. This electro-optic entanglement is the key resource needed to connect cryogenic quantum circuits.},
  author       = {Sahu, Rishabh and Qiu, Liu and Hease, William J and Arnold, Georg M and Minoguchi, Yuri and Rabl, Peter and Fink, Johannes M},
  booktitle    = {Frontiers in Optics + Laser Science 2023},
  isbn         = {9781957171296},
  location     = {Tacoma, WA, United States},
  publisher    = {Optica Publishing Group},
  title        = {{Entangling microwaves and telecom wavelength light}},
  doi          = {10.1364/ls.2023.lm1f.3},
  year         = {2023},
}

@misc{14892,
  abstract     = {Code and data necessary to reproduce the simulations and data analyses reported in our manuscript: Tomé, D.F., Zhang, Y., Aida, T., Mosto, O., Lu, Y., Chen, M., Sadeh, S., Roy, D. S., Clopath, C. Dynamic and selective engrams emerge with memory consolidation. 2023.},
  author       = {Feitosa Tomé, Douglas},
  publisher    = {Zenodo},
  title        = {{douglastome/dynamic-engrams: Dynamic and selective engrams emerge with memory consolidation}},
  doi          = {10.5281/ZENODO.10251087},
  year         = {2023},
}

@misc{14919,
  abstract     = {GLACIER METEOROLOGICAL DATA SWISS ALPS -2022
},
  author       = {Shaw, Thomas and Buri, Pascal and McCarthy, Michael and Miles, Evan and Pellicciotti, Francesca},
  publisher    = {Zenodo},
  title        = {{Air temperature and near-surface meteorology datasets on three Swiss glaciers - Extreme 2022 Summer}},
  doi          = {10.5281/ZENODO.8277285},
  year         = {2023},
}

@article{14920,
  abstract     = {We consider fixpoint algorithms for two-player games on graphs with $\omega$-regular winning conditions, where the environment is constrained by a strong transition fairness assumption. Strong transition fairness is a widely occurring special case of strong fairness, which requires that any execution is strongly fair with respect to a specified set of live edges: whenever the
source vertex of a live edge is visited infinitely often along a play, the edge itself is traversed infinitely often along the play as well. We show that, surprisingly, strong transition fairness retains the algorithmic characteristics of the fixpoint algorithms for $\omega$-regular games -- the new algorithms have the same alternation depth as the classical algorithms but invoke a new type of predecessor operator. For Rabin games with $k$ pairs, the complexity of the new algorithm is $O(n^{k+2}k!)$ symbolic steps, which is independent of the number of live edges in the strong transition fairness assumption. Further, we show that GR(1) specifications with strong transition fairness assumptions can be solved with a 3-nested fixpoint algorithm, same as the usual algorithm. In contrast, strong fairness necessarily requires increasing the alternation depth depending on the number of fairness assumptions. We get symbolic algorithms for (generalized) Rabin, parity and GR(1) objectives under strong transition fairness assumptions as well as a direct symbolic algorithm for qualitative winning in stochastic
$\omega$-regular games that runs in $O(n^{k+2}k!)$ symbolic steps, improving the state of the art. Finally, we have implemented a BDD-based synthesis engine based on our algorithm. We show on a set of synthetic and real benchmarks that our algorithm is scalable, parallelizable, and outperforms previous algorithms by orders of magnitude.},
  author       = {Banerjee, Tamajit and Majumdar, Rupak and Mallik, Kaushik and Schmuck, Anne-Kathrin and Soudjani, Sadegh},
  issn         = {2751-4838},
  journal      = {TheoretiCS},
  publisher    = {EPI Sciences},
  title        = {{Fast symbolic algorithms for mega-regular games under strong transition fairness}},
  doi          = {10.46298/theoretics.23.4},
  volume       = {2},
  year         = {2023},
}

@inproceedings{14921,
  abstract     = {Neural collapse (NC) refers to the surprising structure of the last layer of deep neural networks in the terminal phase of gradient descent training. Recently, an increasing amount of experimental evidence has pointed to the propagation of NC to earlier layers of neural networks. However, while the NC in the last layer is well studied theoretically, much less is known about its multi-layered counterpart - deep neural collapse (DNC). In particular, existing work focuses either on linear layers or only on the last two layers at the price of an extra assumption. Our paper fills this gap by generalizing the established analytical framework for NC - the unconstrained features model - to multiple non-linear layers. Our key technical contribution is to show that, in a deep unconstrained features model, the unique global optimum for binary classification exhibits all the properties typical of DNC. This explains the existing experimental evidence of DNC. We also empirically show that (i) by optimizing deep unconstrained features models via gradient descent, the resulting solution agrees well with our theory, and (ii) trained networks recover the unconstrained features suitable for the occurrence of DNC, thus supporting the validity of this modeling principle.},
  author       = {Súkeník, Peter and Mondelli, Marco and Lampert, Christoph},
  booktitle    = {37th Annual Conference on Neural Information Processing Systems},
  location     = {New Orleans, LA, United States},
  title        = {{Deep neural collapse is provably optimal for the deep unconstrained features model}},
  year         = {2023},
}

@inproceedings{14922,
  abstract     = {We propose a novel approach to concentration for non-independent random variables. The main idea is to ``pretend'' that the random variables are independent and pay a multiplicative price measuring how far they are from actually being independent. This price is encapsulated in the Hellinger integral between the joint and the product of the marginals, which is then upper bounded leveraging tensorisation properties. Our bounds represent a natural generalisation of concentration inequalities in the presence of dependence: we recover exactly the classical bounds (McDiarmid's inequality) when the random variables are independent. Furthermore, in a ``large deviations'' regime, we obtain the same decay in the probability as for the independent case, even when the random variables display non-trivial dependencies. To show this, we consider a number of applications of interest. First, we provide a bound for Markov chains with finite state space. Then, we consider the Simple Symmetric Random Walk, which is a non-contracting Markov chain, and a non-Markovian setting in which the stochastic process depends on its entire past. To conclude, we propose an application to Markov Chain Monte Carlo methods, where our approach leads to an improved lower bound on the minimum burn-in period required to reach a certain accuracy. In all of these settings, we provide a regime of parameters in which our bound fares better than what the state of the art can provide.},
  author       = {Esposito, Amedeo Roberto and Mondelli, Marco},
  booktitle    = {Proceedings of 2023 IEEE International Symposium on Information Theory},
  location     = {Taipei, Taiwan},
  publisher    = {IEEE},
  title        = {{Concentration without independence via information measures}},
  doi          = {10.1109/isit54713.2023.10206899},
  year         = {2023},
}

@inproceedings{14923,
  abstract     = {We study the performance of a Bayesian statistician who estimates a rank-one signal corrupted by non-symmetric rotationally invariant noise with a generic distribution of singular values. As the signal-to-noise ratio and the noise structure are unknown, a Gaussian setup is incorrectly assumed. We derive the exact analytic expression for the error of the mismatched Bayes estimator and also provide the analysis of an approximate message passing (AMP) algorithm. The first result exploits the asymptotic behavior of spherical integrals for rectangular matrices and of low-rank matrix perturbations; the second one relies on the design and analysis of an auxiliary AMP. The numerical experiments show that there is a performance gap between the AMP and Bayes estimators, which is due to the incorrect estimation of the signal norm.},
  author       = {Fu, Teng and Liu, YuHao and Barbier, Jean and Mondelli, Marco and Liang, ShanSuo and Hou, TianQi},
  booktitle    = {Proceedings of 2023 IEEE International Symposium on Information Theory},
  location     = {Taipei, Taiwan},
  publisher    = {IEEE},
  title        = {{Mismatched estimation of non-symmetric rank-one matrices corrupted by structured noise}},
  doi          = {10.1109/isit54713.2023.10206671},
  year         = {2023},
}

@inproceedings{14924,
  abstract     = {The stochastic heavy ball method (SHB), also known as stochastic gradient descent (SGD) with Polyak's momentum, is widely used in training neural networks. However, despite the remarkable success of such algorithm in practice, its theoretical characterization remains limited. In this paper, we focus on neural networks with two and three layers and provide a rigorous understanding of the properties of the solutions found by SHB: \emph{(i)} stability after dropping out part of the neurons, \emph{(ii)} connectivity along a low-loss path, and \emph{(iii)} convergence to the global optimum.
To achieve this goal, we take a mean-field view and relate the SHB dynamics to a certain partial differential equation in the limit of large network widths. This mean-field perspective has inspired a recent line of work focusing on SGD while, in contrast, our paper considers an algorithm with momentum. More specifically, after proving existence and uniqueness of the limit differential equations, we show convergence to the global optimum and give a quantitative bound between the mean-field limit and the SHB dynamics of a finite-width network. Armed with this last bound, we are able to establish the dropout-stability and connectivity of SHB solutions.},
  author       = {Wu, Diyuan and Kungurtsev, Vyacheslav and Mondelli, Marco},
  booktitle    = {Transactions on Machine Learning Research},
  publisher    = {ML Research Press},
  title        = {{Mean-field analysis for heavy ball methods: Dropout-stability, connectivity, and global convergence}},
  year         = {2023},
}

@unpublished{14946,
  abstract     = {We present a unified framework for studying the identifiability of
representations learned from simultaneously observed views, such as different
data modalities. We allow a partially observed setting in which each view
constitutes a nonlinear mixture of a subset of underlying latent variables,
which can be causally related. We prove that the information shared across all
subsets of any number of views can be learned up to a smooth bijection using
contrastive learning and a single encoder per view. We also provide graphical
criteria indicating which latent variables can be identified through a simple
set of rules, which we refer to as identifiability algebra. Our general
framework and theoretical results unify and extend several previous works on
multi-view nonlinear ICA, disentanglement, and causal representation learning.
We experimentally validate our claims on numerical, image, and multi-modal data
sets. Further, we demonstrate that the performance of prior methods is
recovered in different special cases of our setup. Overall, we find that access
to multiple partial views enables us to identify a more fine-grained
representation, under the generally milder assumption of partial observability.},
  author       = {Yao, Dingling and Xu, Danru and Lachapelle, Sébastien and Magliacane, Sara and Taslakian, Perouz and Martius, Georg and Kügelgen, Julius von and Locatello, Francesco},
  booktitle    = {arXiv},
  title        = {{Multi-view causal representation learning with partial observability}},
  doi          = {10.48550/arXiv.2311.04056},
  year         = {2023},
}

@unpublished{14948,
  abstract     = {The extraction of modular object-centric representations for downstream tasks
is an emerging area of research. Learning grounded representations of objects
that are guaranteed to be stable and invariant promises robust performance
across different tasks and environments. Slot Attention (SA) learns
object-centric representations by assigning objects to \textit{slots}, but
presupposes a \textit{single} distribution from which all slots are randomly
initialised. This results in an inability to learn \textit{specialized} slots
which bind to specific object types and remain invariant to identity-preserving
changes in object appearance. To address this, we present
\emph{\textsc{Co}nditional \textsc{S}lot \textsc{A}ttention} (\textsc{CoSA})
using a novel concept of \emph{Grounded Slot Dictionary} (GSD) inspired by
vector quantization. Our proposed GSD comprises (i) canonical object-level
property vectors and (ii) parametric Gaussian distributions, which define a
prior over the slots. We demonstrate the benefits of our method in multiple
downstream tasks such as scene generation, composition, and task adaptation,
whilst remaining competitive with SA in popular object discovery benchmarks.},
  author       = {Kori, Avinash and Locatello, Francesco and Ribeiro, Fabio De Sousa and Toni, Francesca and Glocker, Ben},
  booktitle    = {arXiv},
  title        = {{Grounded object centric learning}},
  doi          = {10.48550/arXiv.2307.09437},
  year         = {2023},
}

@article{14949,
  abstract     = {Many approaches have been proposed to use diffusion models to augment training datasets for downstream tasks, such as classification. However, diffusion models are themselves trained on large datasets, often with noisy annotations, and it remains an open question to which extent these models contribute to downstream classification performance. In particular, it remains unclear if they generalize enough to improve over directly using the additional data of their pre-training process for augmentation. We systematically evaluate a range of existing methods to generate images from diffusion models and study new extensions to assess their benefit for data augmentation. Personalizing diffusion models towards the target data outperforms simpler prompting strategies. However, using the pre-training data of the diffusion model alone, via a simple nearest-neighbor retrieval procedure, leads to even stronger downstream performance. Our study explores the potential of diffusion models in generating new training data, and surprisingly finds that these sophisticated models are not yet able to beat a simple and strong image retrieval baseline on simple downstream vision tasks.},
  author       = {Burg, Max and Wenzel, Florian and Zietlow, Dominik and Horn, Max and Makansi, Osama and Locatello, Francesco and Russell, Chris},
  issn         = {2835-8856},
  journal      = {Journal of Machine Learning Research},
  publisher    = {ML Research Press},
  title        = {{Image retrieval outperforms diffusion models on data augmentation}},
  year         = {2023},
}

@unpublished{14952,
  abstract     = {While different neural models often exhibit latent spaces that are alike when exposed to semantically related data, this intrinsic similarity is not always immediately discernible. Towards a better understanding of this phenomenon, our work shows how representations learned from these neural modules can be translated between different pre-trained networks via simpler transformations than previously thought. An advantage of this approach is the ability to
estimate these transformations using standard, well-understood algebraic procedures that have closed-form solutions. Our method directly estimates a transformation between two given latent spaces, thereby enabling effective stitching of encoders and decoders without additional training. We extensively validate the adaptability of this translation procedure in different
experimental settings: across various trainings, domains, architectures (e.g., ResNet, CNN, ViT), and in multiple downstream tasks (classification, reconstruction). Notably, we show how it is possible to zero-shot stitch text encoders and vision decoders, or vice-versa, yielding surprisingly good classification performance in this multimodal setting.},
  author       = {Maiorca, Valentino and Moschella, Luca and Norelli, Antonio and Fumero, Marco and Locatello, Francesco and Rodolà, Emanuele},
  booktitle    = {arXiv},
  title        = {{Latent space translation via semantic alignment}},
  doi          = {10.48550/arXiv.2311.00664},
  year         = {2023},
}

@unpublished{14953,
  abstract     = {This paper provides statistical sample complexity bounds for score-matching and its applications in causal discovery. We demonstrate that accurate estimation of the score function is achievable by training a standard deep ReLU neural network using stochastic gradient descent. We establish bounds on the error rate of recovering causal relationships using the score-matching-based causal discovery method of Rolland et al. [2022], assuming a sufficiently good estimation of the score function. Finally, we analyze the upper bound of score-matching estimation within the score-based generative modeling, which has been applied for causal discovery but is also of independent interest within the domain of generative models.},
  author       = {Zhu, Zhenyu and Locatello, Francesco and Cevher, Volkan},
  booktitle    = {arXiv},
  title        = {{Sample complexity bounds for score-matching: Causal discovery and generative modeling}},
  doi          = {10.48550/arXiv.2310.18123},
  year         = {2023},
}