@inproceedings{14924,
  abstract     = {The stochastic heavy ball method (SHB), also known as stochastic gradient descent (SGD) with Polyak's momentum, is widely used in training neural networks. However, despite the remarkable success of such algorithm in practice, its theoretical characterization remains limited. In this paper, we focus on neural networks with two and three layers and provide a rigorous understanding of the properties of the solutions found by SHB: \emph{(i)} stability after dropping out part of the neurons, \emph{(ii)} connectivity along a low-loss path, and \emph{(iii)} convergence to the global optimum.
To achieve this goal, we take a mean-field view and relate the SHB dynamics to a certain partial differential equation in the limit of large network widths. This mean-field perspective has inspired a recent line of work focusing on SGD while, in contrast, our paper considers an algorithm with momentum. More specifically, after proving existence and uniqueness of the limit differential equations, we show convergence to the global optimum and give a quantitative bound between the mean-field limit and the SHB dynamics of a finite-width network. Armed with this last bound, we are able to establish the dropout-stability and connectivity of SHB solutions.},
  author       = {Wu, Diyuan and Kungurtsev, Vyacheslav and Mondelli, Marco},
  booktitle    = {Transactions on Machine Learning Research},
  publisher    = {ML Research Press},
  title        = {{Mean-field analysis for heavy ball methods: Dropout-stability, connectivity, and global convergence}},
  year         = {2023},
}

@unpublished{14946,
  abstract     = {We present a unified framework for studying the identifiability of
representations learned from simultaneously observed views, such as different
data modalities. We allow a partially observed setting in which each view
constitutes a nonlinear mixture of a subset of underlying latent variables,
which can be causally related. We prove that the information shared across all
subsets of any number of views can be learned up to a smooth bijection using
contrastive learning and a single encoder per view. We also provide graphical
criteria indicating which latent variables can be identified through a simple
set of rules, which we refer to as identifiability algebra. Our general
framework and theoretical results unify and extend several previous works on
multi-view nonlinear ICA, disentanglement, and causal representation learning.
We experimentally validate our claims on numerical, image, and multi-modal data
sets. Further, we demonstrate that the performance of prior methods is
recovered in different special cases of our setup. Overall, we find that access
to multiple partial views enables us to identify a more fine-grained
representation, under the generally milder assumption of partial observability.},
  author       = {Yao, Dingling and Xu, Danru and Lachapelle, Sébastien and Magliacane, Sara and Taslakian, Perouz and Martius, Georg and Kügelgen, Julius von and Locatello, Francesco},
  booktitle    = {arXiv},
  title        = {{Multi-view causal representation learning with partial observability}},
  doi          = {10.48550/arXiv.2311.04056},
  year         = {2023},
}

@unpublished{14948,
  abstract     = {The extraction of modular object-centric representations for downstream tasks
is an emerging area of research. Learning grounded representations of objects
that are guaranteed to be stable and invariant promises robust performance
across different tasks and environments. Slot Attention (SA) learns
object-centric representations by assigning objects to \textit{slots}, but
presupposes a \textit{single} distribution from which all slots are randomly
initialised. This results in an inability to learn \textit{specialized} slots
which bind to specific object types and remain invariant to identity-preserving
changes in object appearance. To address this, we present
\emph{\textsc{Co}nditional \textsc{S}lot \textsc{A}ttention} (\textsc{CoSA})
using a novel concept of \emph{Grounded Slot Dictionary} (GSD) inspired by
vector quantization. Our proposed GSD comprises (i) canonical object-level
property vectors and (ii) parametric Gaussian distributions, which define a
prior over the slots. We demonstrate the benefits of our method in multiple
downstream tasks such as scene generation, composition, and task adaptation,
whilst remaining competitive with SA in popular object discovery benchmarks.},
  author       = {Kori, Avinash and Locatello, Francesco and Ribeiro, Fabio De Sousa and Toni, Francesca and Glocker, Ben},
  booktitle    = {arXiv},
  title        = {{Grounded object centric learning}},
  doi          = {10.48550/arXiv.2307.09437},
  year         = {2023},
}

@article{14949,
  abstract     = {Many approaches have been proposed to use diffusion models to augment training datasets for downstream tasks, such as classification. However, diffusion models are themselves trained on large datasets, often with noisy annotations, and it remains an open question to which extent these models contribute to downstream classification performance. In particular, it remains unclear if they generalize enough to improve over directly using the additional data of their pre-training process for augmentation. We systematically evaluate a range of existing methods to generate images from diffusion models and study new extensions to assess their benefit for data augmentation. Personalizing diffusion models towards the target data outperforms simpler prompting strategies. However, using the pre-training data of the diffusion model alone, via a simple nearest-neighbor retrieval procedure, leads to even stronger downstream performance. Our study explores the potential of diffusion models in generating new training data, and surprisingly finds that these sophisticated models are not yet able to beat a simple and strong image retrieval baseline on simple downstream vision tasks.},
  author       = {Burg, Max and Wenzel, Florian and Zietlow, Dominik and Horn, Max and Makansi, Osama and Locatello, Francesco and Russell, Chris},
  issn         = {2835-8856},
  journal      = {Journal of Machine Learning Research},
  publisher    = {ML Research Press},
  title        = {{Image retrieval outperforms diffusion models on data augmentation}},
  year         = {2023},
}

@unpublished{14952,
  abstract     = {While different neural models often exhibit latent spaces that are alike when exposed to semantically related data, this intrinsic similarity is not always immediately discernible. Towards a better understanding of this phenomenon, our work shows how representations learned from these neural modules can be translated between different pre-trained networks via simpler transformations than previously thought. An advantage of this approach is the ability to
estimate these transformations using standard, well-understood algebraic procedures that have closed-form solutions. Our method directly estimates a transformation between two given latent spaces, thereby enabling effective stitching of encoders and decoders without additional training. We extensively validate the adaptability of this translation procedure in different
experimental settings: across various trainings, domains, architectures (e.g., ResNet, CNN, ViT), and in multiple downstream tasks (classification, reconstruction). Notably, we show how it is possible to zero-shot stitch text encoders and vision decoders, or vice-versa, yielding surprisingly good classification performance in this multimodal setting.},
  author       = {Maiorca, Valentino and Moschella, Luca and Norelli, Antonio and Fumero, Marco and Locatello, Francesco and Rodolà, Emanuele},
  booktitle    = {arXiv},
  title        = {{Latent space translation via semantic alignment}},
  doi          = {10.48550/arXiv.2311.00664},
  year         = {2023},
}

@unpublished{14953,
  abstract     = {This paper provides statistical sample complexity bounds for score-matching and its applications in causal discovery. We demonstrate that accurate estimation of the score function is achievable by training a standard deep ReLU neural network using stochastic gradient descent. We establish bounds on the error rate of recovering causal relationships using the score-matching-based causal discovery method of Rolland et al. [2022], assuming a sufficiently good estimation of the score function. Finally, we analyze the upper bound of score-matching estimation within the score-based generative modeling, which has been applied for causal discovery but is also of independent interest within the domain of generative models.},
  author       = {Zhu, Zhenyu and Locatello, Francesco and Cevher, Volkan},
  booktitle    = {arXiv},
  title        = {{Sample complexity bounds for score-matching: Causal discovery and generative modeling}},
  doi          = {10.48550/arXiv.2310.18123},
  year         = {2023},
}

@unpublished{14954,
  abstract     = {When domain knowledge is limited and experimentation is restricted by ethical, financial, or time constraints, practitioners turn to observational causal discovery methods to recover the causal structure, exploiting the statistical properties of their data. Because causal discovery without further assumptions is an ill-posed problem, each algorithm comes with its own set of
usually untestable assumptions, some of which are hard to meet in real datasets. Motivated by these considerations, this paper extensively benchmarks the empirical performance of recent causal discovery methods on observational i.i.d. data generated under different background conditions, allowing for violations of the critical assumptions required by each selected approach. Our experimental findings show that score matching-based methods demonstrate
surprising performance in the false positive and false negative rate of the inferred graph in these challenging scenarios, and we provide theoretical insights into their performance. This work is also the first effort to benchmark the stability of causal discovery algorithms with respect to the values of their hyperparameters. Finally, we hope this paper will set a new standard for the evaluation of causal discovery methods and can serve as an accessible entry point for practitioners interested in the field, highlighting the empirical implications of different algorithm choices.},
  author       = {Montagna, Francesco and Mastakouri, Atalanti A. and Eulig, Elias and Noceti, Nicoletta and Rosasco, Lorenzo and Janzing, Dominik and Aragam, Bryon and Locatello, Francesco},
  booktitle    = {arXiv},
  title        = {{Assumption violations in causal discovery and the robustness of score matching}},
  doi          = {10.48550/arXiv.2310.13387},
  year         = {2023},
}

@inproceedings{14958,
  abstract     = {Causal representation learning (CRL) aims at identifying high-level causal variables from low-level data, e.g. images. Current methods usually assume that all causal variables are captured in the high-dimensional observations. In this work, we focus on learning causal representations from data under partial observability, i.e., when some of the causal variables are not observed in the measurements, and the set of masked variables changes across the different samples. We introduce some initial theoretical results for identifying causal variables under partial observability by exploiting a sparsity regularizer, focusing in particular on the linear and piecewise linear mixing function case. We provide a theorem that allows us to identify the causal variables up to permutation and element-wise linear transformations in the linear case and a lemma that allows us to identify causal variables up to linear transformation in the piecewise case. Finally, we provide a conjecture that would allow us to identify the causal variables up to permutation and element-wise linear transformations also in the piecewise linear case. We test the theorem and conjecture on simulated data, showing the effectiveness of our method.},
  author       = {Xu, Danru and Yao, Dingling and Lachapelle, Sebastien and Taslakian, Perouz and von Kügelgen, Julius and Locatello, Francesco and Magliacane, Sara},
  booktitle    = {Causal Representation Learning Workshop at NeurIPS 2023},
  location     = {New Orleans, LA, United States},
  publisher    = {OpenReview},
  title        = {{A sparsity principle for partially observable causal representation learning}},
  year         = {2023},
}

@unpublished{14961,
  abstract     = {The use of simulated data in the field of causal discovery is ubiquitous due to the scarcity of annotated real data. Recently, Reisach et al., 2021 highlighted the emergence of patterns in simulated linear data, which displays increasing marginal variance in the casual direction. As an ablation in their experiments, Montagna et al., 2023 found that similar patterns may emerge in
nonlinear models for the variance of the score vector $\nabla \log p_{\mathbf{X}}$, and introduced the ScoreSort algorithm. In this work, we formally define and characterize this score-sortability pattern of nonlinear additive noise models. We find that it defines a class of identifiable (bivariate) causal models overlapping with nonlinear additive noise models. We
theoretically demonstrate the advantages of ScoreSort in terms of statistical efficiency compared to prior state-of-the-art score matching-based methods and empirically show the score-sortability of the most common synthetic benchmarks in the literature. Our findings remark (1) the lack of diversity in the data as an important limitation in the evaluation of nonlinear causal discovery approaches, (2) the importance of thoroughly testing different settings within a problem class, and (3) the importance of analyzing statistical properties in
causal discovery, where research is often limited to defining identifiability conditions of the model. },
  author       = {Montagna, Francesco and Noceti, Nicoletta and Rosasco, Lorenzo and Locatello, Francesco},
  booktitle    = {arXiv},
  title        = {{Shortcuts for causal discovery of nonlinear models by score matching}},
  doi          = {10.48550/arXiv.2310.14246},
  year         = {2023},
}

@unpublished{14962,
  abstract     = {In this paper, we show that recent advances in video representation learning
and pre-trained vision-language models allow for substantial improvements in
self-supervised video object localization. We propose a method that first
localizes objects in videos via a slot attention approach and then assigns text
to the obtained slots. The latter is achieved by an unsupervised way to read
localized semantic information from the pre-trained CLIP model. The resulting
video object localization is entirely unsupervised apart from the implicit
annotation contained in CLIP, and it is effectively the first unsupervised
approach that yields good results on regular video benchmarks.},
  author       = {Fan, Ke and Bai, Zechen and Xiao, Tianjun and Zietlow, Dominik and Horn, Max and Zhao, Zixu and Carl-Johann Simon-Gabriel, Carl-Johann Simon-Gabriel and Shou, Mike Zheng and Locatello, Francesco and Schiele, Bernt and Brox, Thomas and Zhang, Zheng and Fu, Yanwei and He, Tong},
  booktitle    = {arXiv},
  title        = {{Unsupervised open-vocabulary object localization in videos}},
  doi          = {10.48550/arXiv.2309.09858},
  year         = {2023},
}

@unpublished{14963,
  abstract     = {Unsupervised object-centric learning methods allow the partitioning of scenes
into entities without additional localization information and are excellent
candidates for reducing the annotation burden of multiple-object tracking (MOT)
pipelines. Unfortunately, they lack two key properties: objects are often split
into parts and are not consistently tracked over time. In fact,
state-of-the-art models achieve pixel-level accuracy and temporal consistency
by relying on supervised object detection with additional ID labels for the
association through time. This paper proposes a video object-centric model for
MOT. It consists of an index-merge module that adapts the object-centric slots
into detection outputs and an object memory module that builds complete object
prototypes to handle occlusions. Benefited from object-centric learning, we
only require sparse detection labels (0%-6.25%) for object localization and
feature binding. Relying on our self-supervised
Expectation-Maximization-inspired loss for object association, our approach
requires no ID labels. Our experiments significantly narrow the gap between the
existing object-centric model and the fully supervised state-of-the-art and
outperform several unsupervised trackers.},
  author       = {Zhao, Zixu and Wang, Jiaze and Horn, Max and Ding, Yizhuo and He, Tong and Bai, Zechen and Zietlow, Dominik and Carl-Johann Simon-Gabriel, Carl-Johann Simon-Gabriel and Shuai, Bing and Tu, Zhuowen and Brox, Thomas and Schiele, Bernt and Fu, Yanwei and Locatello, Francesco and Zhang, Zheng and Xiao, Tianjun},
  booktitle    = {arXiv},
  title        = {{Object-centric multiple object tracking}},
  doi          = {10.48550/arXiv.2309.00233},
  year         = {2023},
}

@article{14985,
  abstract     = {Lead sulfide (PbS) presents large potential in thermoelectric application due to its earth-abundant S element. However, its inferior average ZT (ZTave) value makes PbS less competitive with its analogs PbTe and PbSe. To promote its thermoelectric performance, this study implements strategies of continuous Se alloying and Cu interstitial doping to synergistically tune thermal and electrical transport properties in n-type PbS. First, the lattice parameter of 5.93 Å in PbS is linearly expanded to 6.03 Å in PbS0.5Se0.5 with increasing Se alloying content. This expanded lattice in Se-alloyed PbS not only intensifies phonon scattering but also facilitates the formation of Cu interstitials. Based on the PbS0.6Se0.4 content with the minimal lattice thermal conductivity, Cu interstitials are introduced to improve the electron density, thus boosting the peak power factor, from 3.88 μW cm−1 K−2 in PbS0.6Se0.4 to 20.58 μW cm−1 K−2 in PbS0.6Se0.4−1%Cu. Meanwhile, the lattice thermal conductivity in PbS0.6Se0.4−x%Cu (x = 0–2) is further suppressed due to the strong strain field caused by Cu interstitials. Finally, with the lowered thermal conductivity and high electrical transport properties, a peak ZT ~1.1 and ZTave ~0.82 can be achieved in PbS0.6Se0.4 − 1%Cu at 300–773K, which outperforms previously reported n-type PbS.},
  author       = {Liu, Zhengtao and Hong, Tao and Xu, Liqing and Wang, Sining and Gao, Xiang and Chang, Cheng and Ding, Xiangdong and Xiao, Yu and Zhao, Li‐Dong},
  issn         = {2767-441X},
  journal      = {Interdisciplinary Materials},
  number       = {1},
  pages        = {161--170},
  publisher    = {Wiley},
  title        = {{Lattice expansion enables interstitial doping to achieve a high average ZT in n‐type PbS}},
  doi          = {10.1002/idm2.12056},
  volume       = {2},
  year         = {2023},
}

@inproceedings{14989,
  abstract     = {Encryption alone is not enough for secure end-to end encrypted messaging: a server must also honestly serve public keys to users. Key transparency has been presented as an efficient
solution for detecting (and hence deterring) a server that attempts to dishonestly serve keys. Key transparency involves two major components: (1) a username to public key mapping, stored and cryptographically committed to by the server, and, (2) an outof-band consistency protocol for serving short commitments to users. In the setting of real-world deployments and supporting production scale, new challenges must be considered for both of these components. We enumerate these challenges and provide solutions to address them. In particular, we design and implement a memory-optimized and privacy-preserving verifiable data structure for committing to the username to public key store.
To make this implementation viable for production, we also integrate support for persistent and distributed storage. We also propose a future-facing solution, termed “compaction”, as
a mechanism for mitigating practical issues that arise from dealing with infinitely growing server data structures. Finally, we implement a consensusless solution that achieves the minimum requirements for a service that consistently distributes commitments for a transparency application, providing a much more efficient protocol for distributing small and consistent
commitments to users. This culminates in our production-grade implementation of a key transparency system (Parakeet) which we have open-sourced, along with a demonstration of feasibility through our benchmarks.},
  author       = {Malvai, Harjasleen and Kokoris Kogias, Eleftherios and Sonnino, Alberto and Ghosh, Esha and Oztürk, Ercan and Lewi, Kevin and Lawlor, Sean},
  booktitle    = {Proceedings of the 2023 Network and Distributed System Security Symposium},
  isbn         = {1891562835},
  location     = {San Diego, CA, United States},
  publisher    = {Internet Society},
  title        = {{Parakeet: Practical key transparency for end-to-end eEncrypted messaging}},
  doi          = {10.14722/ndss.2023.24545},
  year         = {2023},
}

@misc{14990,
  abstract     = {The software artefact to evaluate the approximation of stationary distributions implementation.},
  author       = {Meggendorfer, Tobias},
  publisher    = {Zenodo},
  title        = {{Artefact for: Correct Approximation of Stationary Distributions}},
  doi          = {10.5281/ZENODO.7548214},
  year         = {2023},
}

@misc{14991,
  abstract     = {This repository contains the data, scripts, WRF codes and files required to reproduce the results of the manuscript "Assessing Memory in Convection Schemes Using Idealized Tests" submitted to the Journal of Advances in Modeling Earth Systems (JAMES).},
  author       = {Hwong, Yi-Ling and Colin, Maxime and Aglas, Philipp and Muller, Caroline J and Sherwood, Steven C.},
  publisher    = {Zenodo},
  title        = {{Data-assessing memory in convection schemes using idealized tests}},
  doi          = {10.5281/ZENODO.7757041},
  year         = {2023},
}

@inbook{14992,
  abstract     = {In this chapter we first review the Levy–Lieb functional, which gives the lowest kinetic and interaction energy that can be reached with all possible quantum states having a given density. We discuss two possible convex generalizations of this functional, corresponding to using mixed canonical and grand-canonical states, respectively. We present some recent works about the local density approximation, in which the functionals get replaced by purely local functionals constructed using the uniform electron gas energy per unit volume. We then review the known upper and lower bounds on the Levy–Lieb functionals. We start with the kinetic energy alone, then turn to the classical interaction alone, before we are able to put everything together. A later section is devoted to the Hohenberg–Kohn theorem and the role of many-body unique continuation in its proof.},
  author       = {Lewin, Mathieu and Lieb, Elliott H. and Seiringer, Robert},
  booktitle    = {Density Functional Theory},
  editor       = {Cances, Eric and Friesecke, Gero},
  isbn         = {9783031223396},
  issn         = {3005-0286},
  pages        = {115--182},
  publisher    = {Springer},
  title        = {{Universal Functionals in Density Functional Theory}},
  doi          = {10.1007/978-3-031-22340-2_3},
  year         = {2023},
}

@inproceedings{14993,
  abstract     = {Traditional top-down approaches for global health have historically failed to achieve social progress (Hoffman et al., 2015; Hoffman & Røttingen, 2015). Recently, however, a more holistic, multi-level approach termed One Health (OH) (Osterhaus et al., 2020) is being adopted. Several sets of challenges have been identified for the implementation of OH (dos S. Ribeiro et al., 2019), including policy and funding, education and training, and multi-actor, multi-domain, and multi-level collaborations. These exist despite the increasing accessibility to
knowledge and digital collaborative research tools through the internet. To address some of these challenges, we propose a general framework for grassroots community-based means of participatory research. Additionally, we present a specific roadmap to create a Machine Learning for Global Health community in Africa. The proposed framework aims to enable any small group of individuals with scarce resources to build and sustain an online community within approximately two years. We provide a discussion on the potential impact of the proposed framework for global health research collaborations.},
  author       = {Currin, Christopher and Asiedu , Mercy Nyamewaa and Fourie, Chris and Rosman, Benjamin and Turki, Houcemeddine and Lambebo Tonja, Atnafu and Abbott, Jade and Ajala, Marvellous and Adedayo, Sadiq Adewale and Emezue, Chris Chinenye and Machangara, Daphne},
  booktitle    = {1st Workshop on Machine Learning & Global Health},
  location     = {Kigali, Rwanda},
  publisher    = {OpenReview},
  title        = {{A framework for grassroots research collaboration in machine learning and global health}},
  year         = {2023},
}

@misc{14994,
  abstract     = {This resource contains the artifacts for reproducing the experimental results presented in the paper titled "A Flexible Toolchain for Symbolic Rabin Games under Fair and Stochastic Uncertainties" that has been submitted in CAV 2023.},
  author       = {Majumdar, Rupak and Mallik, Kaushik and Rychlicki, Mateusz and Schmuck, Anne-Kathrin and Soudjani, Sadegh},
  publisher    = {Zenodo},
  title        = {{A flexible toolchain for symbolic rabin games under fair and stochastic uncertainties}},
  doi          = {10.5281/ZENODO.7877790},
  year         = {2023},
}

@misc{14995,
  abstract     = {Lincheck is a new practical and user-friendly framework for testing concurrent data structures on the Java Virtual Machine (JVM). It provides a simple and declarative way to write concurrent tests. Instead of describing how to perform the test, users specify what to test by declaring all the operations to examine; the framework automatically handles the rest. As a result, tests written with Lincheck are concise and easy to understand. 
The artifact presents a collection of Lincheck tests that discover new bugs in popular libraries and implementations from the concurrency literature -- they are listed in Table 1, Section 3. To evaluate the performance of Lincheck analysis, the collection of tests also includes those which check correct data structures and, thus, always succeed. Similarly to Table 2, Section 3, the experiments demonstrate the reasonable time to perform a test. Finally, Lincheck provides user-friendly output with an easy-to-follow trace to reproduce a detected error, significantly simplifying further investigation.},
  author       = {Koval, Nikita and Fedorov, Alexander and Sokolova, Maria and Tsitelov, Dmitry and Alistarh, Dan-Adrian},
  publisher    = {Zenodo},
  title        = {{Lincheck: A practical framework for testing concurrent data structures on JVM}},
  doi          = {10.5281/ZENODO.7877757},
  year         = {2023},
}

@inproceedings{15023,
  abstract     = {Reinforcement learning has shown promising results in learning neural network policies for complicated control tasks. However, the lack of formal guarantees about the behavior of such policies remains an impediment to their deployment. We propose a novel method for learning a composition of neural network policies in stochastic environments, along with a formal certificate which guarantees that a specification over the policy's behavior is satisfied with the desired probability. Unlike prior work on verifiable RL, our approach leverages the compositional nature of logical specifications provided in SpectRL, to learn over graphs of probabilistic reach-avoid specifications. The formal guarantees are provided by learning neural network policies together with reach-avoid supermartingales (RASM) for the graph’s sub-tasks and then composing them into a global policy. We also derive a tighter lower bound compared to previous work on the probability of reach-avoidance implied by a RASM, which is required to find a compositional policy with an acceptable probabilistic threshold for complex tasks with multiple edge policies. We implement a prototype of our approach and evaluate it on a Stochastic Nine Rooms environment.},
  author       = {Zikelic, Dorde and Lechner, Mathias and Verma, Abhinav and Chatterjee, Krishnendu and Henzinger, Thomas A},
  booktitle    = {37th Conference on Neural Information Processing Systems},
  location     = {New Orleans, LO, United States},
  title        = {{Compositional policy learning in stochastic control systems with formal guarantees}},
  year         = {2023},
}