@unpublished{14954,
  abstract     = {When domain knowledge is limited and experimentation is restricted by ethical, financial, or time constraints, practitioners turn to observational causal discovery methods to recover the causal structure, exploiting the statistical properties of their data. Because causal discovery without further assumptions is an ill-posed problem, each algorithm comes with its own set of
usually untestable assumptions, some of which are hard to meet in real datasets. Motivated by these considerations, this paper extensively benchmarks the empirical performance of recent causal discovery methods on observational i.i.d. data generated under different background conditions, allowing for violations of the critical assumptions required by each selected approach. Our experimental findings show that score matching-based methods demonstrate
surprising performance in the false positive and false negative rate of the inferred graph in these challenging scenarios, and we provide theoretical insights into their performance. This work is also the first effort to benchmark the stability of causal discovery algorithms with respect to the values of their hyperparameters. Finally, we hope this paper will set a new standard for the evaluation of causal discovery methods and can serve as an accessible entry point for practitioners interested in the field, highlighting the empirical implications of different algorithm choices.},
  author       = {Montagna, Francesco and Mastakouri, Atalanti A. and Eulig, Elias and Noceti, Nicoletta and Rosasco, Lorenzo and Janzing, Dominik and Aragam, Bryon and Locatello, Francesco},
  booktitle    = {arXiv},
  title        = {{Assumption violations in causal discovery and the robustness of score matching}},
  doi          = {10.48550/arXiv.2310.13387},
  year         = {2023},
}

@inproceedings{14958,
  abstract     = {Causal representation learning (CRL) aims at identifying high-level causal variables from low-level data, e.g. images. Current methods usually assume that all causal variables are captured in the high-dimensional observations. In this work, we focus on learning causal representations from data under partial observability, i.e., when some of the causal variables are not observed in the measurements, and the set of masked variables changes across the different samples. We introduce some initial theoretical results for identifying causal variables under partial observability by exploiting a sparsity regularizer, focusing in particular on the linear and piecewise linear mixing function case. We provide a theorem that allows us to identify the causal variables up to permutation and element-wise linear transformations in the linear case and a lemma that allows us to identify causal variables up to linear transformation in the piecewise case. Finally, we provide a conjecture that would allow us to identify the causal variables up to permutation and element-wise linear transformations also in the piecewise linear case. We test the theorem and conjecture on simulated data, showing the effectiveness of our method.},
  author       = {Xu, Danru and Yao, Dingling and Lachapelle, Sebastien and Taslakian, Perouz and von Kügelgen, Julius and Locatello, Francesco and Magliacane, Sara},
  booktitle    = {Causal Representation Learning Workshop at NeurIPS 2023},
  location     = {New Orleans, LA, United States},
  publisher    = {OpenReview},
  title        = {{A sparsity principle for partially observable causal representation learning}},
  year         = {2023},
}

@unpublished{14961,
  abstract     = {The use of simulated data in the field of causal discovery is ubiquitous due to the scarcity of annotated real data. Recently, Reisach et al., 2021 highlighted the emergence of patterns in simulated linear data, which displays increasing marginal variance in the casual direction. As an ablation in their experiments, Montagna et al., 2023 found that similar patterns may emerge in
nonlinear models for the variance of the score vector $\nabla \log p_{\mathbf{X}}$, and introduced the ScoreSort algorithm. In this work, we formally define and characterize this score-sortability pattern of nonlinear additive noise models. We find that it defines a class of identifiable (bivariate) causal models overlapping with nonlinear additive noise models. We
theoretically demonstrate the advantages of ScoreSort in terms of statistical efficiency compared to prior state-of-the-art score matching-based methods and empirically show the score-sortability of the most common synthetic benchmarks in the literature. Our findings remark (1) the lack of diversity in the data as an important limitation in the evaluation of nonlinear causal discovery approaches, (2) the importance of thoroughly testing different settings within a problem class, and (3) the importance of analyzing statistical properties in
causal discovery, where research is often limited to defining identifiability conditions of the model. },
  author       = {Montagna, Francesco and Noceti, Nicoletta and Rosasco, Lorenzo and Locatello, Francesco},
  booktitle    = {arXiv},
  title        = {{Shortcuts for causal discovery of nonlinear models by score matching}},
  doi          = {10.48550/arXiv.2310.14246},
  year         = {2023},
}

@unpublished{14962,
  abstract     = {In this paper, we show that recent advances in video representation learning
and pre-trained vision-language models allow for substantial improvements in
self-supervised video object localization. We propose a method that first
localizes objects in videos via a slot attention approach and then assigns text
to the obtained slots. The latter is achieved by an unsupervised way to read
localized semantic information from the pre-trained CLIP model. The resulting
video object localization is entirely unsupervised apart from the implicit
annotation contained in CLIP, and it is effectively the first unsupervised
approach that yields good results on regular video benchmarks.},
  author       = {Fan, Ke and Bai, Zechen and Xiao, Tianjun and Zietlow, Dominik and Horn, Max and Zhao, Zixu and Carl-Johann Simon-Gabriel, Carl-Johann Simon-Gabriel and Shou, Mike Zheng and Locatello, Francesco and Schiele, Bernt and Brox, Thomas and Zhang, Zheng and Fu, Yanwei and He, Tong},
  booktitle    = {arXiv},
  title        = {{Unsupervised open-vocabulary object localization in videos}},
  doi          = {10.48550/arXiv.2309.09858},
  year         = {2023},
}

@unpublished{14963,
  abstract     = {Unsupervised object-centric learning methods allow the partitioning of scenes
into entities without additional localization information and are excellent
candidates for reducing the annotation burden of multiple-object tracking (MOT)
pipelines. Unfortunately, they lack two key properties: objects are often split
into parts and are not consistently tracked over time. In fact,
state-of-the-art models achieve pixel-level accuracy and temporal consistency
by relying on supervised object detection with additional ID labels for the
association through time. This paper proposes a video object-centric model for
MOT. It consists of an index-merge module that adapts the object-centric slots
into detection outputs and an object memory module that builds complete object
prototypes to handle occlusions. Benefited from object-centric learning, we
only require sparse detection labels (0%-6.25%) for object localization and
feature binding. Relying on our self-supervised
Expectation-Maximization-inspired loss for object association, our approach
requires no ID labels. Our experiments significantly narrow the gap between the
existing object-centric model and the fully supervised state-of-the-art and
outperform several unsupervised trackers.},
  author       = {Zhao, Zixu and Wang, Jiaze and Horn, Max and Ding, Yizhuo and He, Tong and Bai, Zechen and Zietlow, Dominik and Carl-Johann Simon-Gabriel, Carl-Johann Simon-Gabriel and Shuai, Bing and Tu, Zhuowen and Brox, Thomas and Schiele, Bernt and Fu, Yanwei and Locatello, Francesco and Zhang, Zheng and Xiao, Tianjun},
  booktitle    = {arXiv},
  title        = {{Object-centric multiple object tracking}},
  doi          = {10.48550/arXiv.2309.00233},
  year         = {2023},
}

@article{14985,
  abstract     = {Lead sulfide (PbS) presents large potential in thermoelectric application due to its earth-abundant S element. However, its inferior average ZT (ZTave) value makes PbS less competitive with its analogs PbTe and PbSe. To promote its thermoelectric performance, this study implements strategies of continuous Se alloying and Cu interstitial doping to synergistically tune thermal and electrical transport properties in n-type PbS. First, the lattice parameter of 5.93 Å in PbS is linearly expanded to 6.03 Å in PbS0.5Se0.5 with increasing Se alloying content. This expanded lattice in Se-alloyed PbS not only intensifies phonon scattering but also facilitates the formation of Cu interstitials. Based on the PbS0.6Se0.4 content with the minimal lattice thermal conductivity, Cu interstitials are introduced to improve the electron density, thus boosting the peak power factor, from 3.88 μW cm−1 K−2 in PbS0.6Se0.4 to 20.58 μW cm−1 K−2 in PbS0.6Se0.4−1%Cu. Meanwhile, the lattice thermal conductivity in PbS0.6Se0.4−x%Cu (x = 0–2) is further suppressed due to the strong strain field caused by Cu interstitials. Finally, with the lowered thermal conductivity and high electrical transport properties, a peak ZT ~1.1 and ZTave ~0.82 can be achieved in PbS0.6Se0.4 − 1%Cu at 300–773K, which outperforms previously reported n-type PbS.},
  author       = {Liu, Zhengtao and Hong, Tao and Xu, Liqing and Wang, Sining and Gao, Xiang and Chang, Cheng and Ding, Xiangdong and Xiao, Yu and Zhao, Li‐Dong},
  issn         = {2767-441X},
  journal      = {Interdisciplinary Materials},
  number       = {1},
  pages        = {161--170},
  publisher    = {Wiley},
  title        = {{Lattice expansion enables interstitial doping to achieve a high average ZT in n‐type PbS}},
  doi          = {10.1002/idm2.12056},
  volume       = {2},
  year         = {2023},
}

@inproceedings{14989,
  abstract     = {Encryption alone is not enough for secure end-to end encrypted messaging: a server must also honestly serve public keys to users. Key transparency has been presented as an efficient
solution for detecting (and hence deterring) a server that attempts to dishonestly serve keys. Key transparency involves two major components: (1) a username to public key mapping, stored and cryptographically committed to by the server, and, (2) an outof-band consistency protocol for serving short commitments to users. In the setting of real-world deployments and supporting production scale, new challenges must be considered for both of these components. We enumerate these challenges and provide solutions to address them. In particular, we design and implement a memory-optimized and privacy-preserving verifiable data structure for committing to the username to public key store.
To make this implementation viable for production, we also integrate support for persistent and distributed storage. We also propose a future-facing solution, termed “compaction”, as
a mechanism for mitigating practical issues that arise from dealing with infinitely growing server data structures. Finally, we implement a consensusless solution that achieves the minimum requirements for a service that consistently distributes commitments for a transparency application, providing a much more efficient protocol for distributing small and consistent
commitments to users. This culminates in our production-grade implementation of a key transparency system (Parakeet) which we have open-sourced, along with a demonstration of feasibility through our benchmarks.},
  author       = {Malvai, Harjasleen and Kokoris Kogias, Eleftherios and Sonnino, Alberto and Ghosh, Esha and Oztürk, Ercan and Lewi, Kevin and Lawlor, Sean},
  booktitle    = {Proceedings of the 2023 Network and Distributed System Security Symposium},
  isbn         = {1891562835},
  location     = {San Diego, CA, United States},
  publisher    = {Internet Society},
  title        = {{Parakeet: Practical key transparency for end-to-end eEncrypted messaging}},
  doi          = {10.14722/ndss.2023.24545},
  year         = {2023},
}

@misc{14990,
  abstract     = {The software artefact to evaluate the approximation of stationary distributions implementation.},
  author       = {Meggendorfer, Tobias},
  publisher    = {Zenodo},
  title        = {{Artefact for: Correct Approximation of Stationary Distributions}},
  doi          = {10.5281/ZENODO.7548214},
  year         = {2023},
}

@misc{14991,
  abstract     = {This repository contains the data, scripts, WRF codes and files required to reproduce the results of the manuscript "Assessing Memory in Convection Schemes Using Idealized Tests" submitted to the Journal of Advances in Modeling Earth Systems (JAMES).},
  author       = {Hwong, Yi-Ling and Colin, Maxime and Aglas, Philipp and Muller, Caroline J and Sherwood, Steven C.},
  publisher    = {Zenodo},
  title        = {{Data-assessing memory in convection schemes using idealized tests}},
  doi          = {10.5281/ZENODO.7757041},
  year         = {2023},
}

@inbook{14992,
  abstract     = {In this chapter we first review the Levy–Lieb functional, which gives the lowest kinetic and interaction energy that can be reached with all possible quantum states having a given density. We discuss two possible convex generalizations of this functional, corresponding to using mixed canonical and grand-canonical states, respectively. We present some recent works about the local density approximation, in which the functionals get replaced by purely local functionals constructed using the uniform electron gas energy per unit volume. We then review the known upper and lower bounds on the Levy–Lieb functionals. We start with the kinetic energy alone, then turn to the classical interaction alone, before we are able to put everything together. A later section is devoted to the Hohenberg–Kohn theorem and the role of many-body unique continuation in its proof.},
  author       = {Lewin, Mathieu and Lieb, Elliott H. and Seiringer, Robert},
  booktitle    = {Density Functional Theory},
  editor       = {Cances, Eric and Friesecke, Gero},
  isbn         = {9783031223396},
  issn         = {3005-0286},
  pages        = {115--182},
  publisher    = {Springer},
  title        = {{Universal Functionals in Density Functional Theory}},
  doi          = {10.1007/978-3-031-22340-2_3},
  year         = {2023},
}

@inproceedings{14993,
  abstract     = {Traditional top-down approaches for global health have historically failed to achieve social progress (Hoffman et al., 2015; Hoffman & Røttingen, 2015). Recently, however, a more holistic, multi-level approach termed One Health (OH) (Osterhaus et al., 2020) is being adopted. Several sets of challenges have been identified for the implementation of OH (dos S. Ribeiro et al., 2019), including policy and funding, education and training, and multi-actor, multi-domain, and multi-level collaborations. These exist despite the increasing accessibility to
knowledge and digital collaborative research tools through the internet. To address some of these challenges, we propose a general framework for grassroots community-based means of participatory research. Additionally, we present a specific roadmap to create a Machine Learning for Global Health community in Africa. The proposed framework aims to enable any small group of individuals with scarce resources to build and sustain an online community within approximately two years. We provide a discussion on the potential impact of the proposed framework for global health research collaborations.},
  author       = {Currin, Christopher and Asiedu , Mercy Nyamewaa and Fourie, Chris and Rosman, Benjamin and Turki, Houcemeddine and Lambebo Tonja, Atnafu and Abbott, Jade and Ajala, Marvellous and Adedayo, Sadiq Adewale and Emezue, Chris Chinenye and Machangara, Daphne},
  booktitle    = {1st Workshop on Machine Learning & Global Health},
  location     = {Kigali, Rwanda},
  publisher    = {OpenReview},
  title        = {{A framework for grassroots research collaboration in machine learning and global health}},
  year         = {2023},
}

@misc{14994,
  abstract     = {This resource contains the artifacts for reproducing the experimental results presented in the paper titled "A Flexible Toolchain for Symbolic Rabin Games under Fair and Stochastic Uncertainties" that has been submitted in CAV 2023.},
  author       = {Majumdar, Rupak and Mallik, Kaushik and Rychlicki, Mateusz and Schmuck, Anne-Kathrin and Soudjani, Sadegh},
  publisher    = {Zenodo},
  title        = {{A flexible toolchain for symbolic rabin games under fair and stochastic uncertainties}},
  doi          = {10.5281/ZENODO.7877790},
  year         = {2023},
}

@misc{14995,
  abstract     = {Lincheck is a new practical and user-friendly framework for testing concurrent data structures on the Java Virtual Machine (JVM). It provides a simple and declarative way to write concurrent tests. Instead of describing how to perform the test, users specify what to test by declaring all the operations to examine; the framework automatically handles the rest. As a result, tests written with Lincheck are concise and easy to understand. 
The artifact presents a collection of Lincheck tests that discover new bugs in popular libraries and implementations from the concurrency literature -- they are listed in Table 1, Section 3. To evaluate the performance of Lincheck analysis, the collection of tests also includes those which check correct data structures and, thus, always succeed. Similarly to Table 2, Section 3, the experiments demonstrate the reasonable time to perform a test. Finally, Lincheck provides user-friendly output with an easy-to-follow trace to reproduce a detected error, significantly simplifying further investigation.},
  author       = {Koval, Nikita and Fedorov, Alexander and Sokolova, Maria and Tsitelov, Dmitry and Alistarh, Dan-Adrian},
  publisher    = {Zenodo},
  title        = {{Lincheck: A practical framework for testing concurrent data structures on JVM}},
  doi          = {10.5281/ZENODO.7877757},
  year         = {2023},
}

@inproceedings{15023,
  abstract     = {Reinforcement learning has shown promising results in learning neural network policies for complicated control tasks. However, the lack of formal guarantees about the behavior of such policies remains an impediment to their deployment. We propose a novel method for learning a composition of neural network policies in stochastic environments, along with a formal certificate which guarantees that a specification over the policy's behavior is satisfied with the desired probability. Unlike prior work on verifiable RL, our approach leverages the compositional nature of logical specifications provided in SpectRL, to learn over graphs of probabilistic reach-avoid specifications. The formal guarantees are provided by learning neural network policies together with reach-avoid supermartingales (RASM) for the graph’s sub-tasks and then composing them into a global policy. We also derive a tighter lower bound compared to previous work on the probability of reach-avoidance implied by a RASM, which is required to find a compositional policy with an acceptable probabilistic threshold for complex tasks with multiple edge policies. We implement a prototype of our approach and evaluate it on a Stochastic Nine Rooms environment.},
  author       = {Zikelic, Dorde and Lechner, Mathias and Verma, Abhinav and Chatterjee, Krishnendu and Henzinger, Thomas A},
  booktitle    = {37th Conference on Neural Information Processing Systems},
  location     = {New Orleans, LO, United States},
  title        = {{Compositional policy learning in stochastic control systems with formal guarantees}},
  year         = {2023},
}

@misc{15027,
  abstract     = {This data repository underpins the paper, published in PNAS (doi pending) and bioarxiv (doi: https://doi.org/10.1101/2023.07.05.547777).},
  author       = {Curk, Samo},
  publisher    = {Figshare},
  title        = {{aggregation_data}},
  year         = {2023},
}

@misc{15035,
  abstract     = {This artifact aims to reproduce experiments from the paper Monitoring Hyperproperties With Prefix Transducers accepted at RV'23, and give further pointers to implementation of prefix transducers.
It has two parts: a pre-compiled docker image and sources that one can use to compile (locally or in docker) the software and run the experiments.},
  author       = {Chalupa, Marek and Henzinger, Thomas A},
  publisher    = {Zenodo},
  title        = {{Monitoring hyperproperties with prefix transducers}},
  doi          = {10.5281/ZENODO.8191723},
  year         = {2023},
}

@unpublished{15039,
  abstract     = {A crucial property for achieving secure, trustworthy and interpretable deep learning systems is their robustness: small changes to a system's inputs should not result in large changes to its outputs. Mathematically, this means one strives for networks with a small Lipschitz constant. Several recent works have focused on how to construct such Lipschitz networks, typically by imposing constraints on the weight matrices. In this work, we study an orthogonal aspect, namely the role of the activation function. We show that commonly used activation functions, such as MaxMin, as well as all piece-wise linear ones with two segments unnecessarily restrict the class of representable functions, even in the simplest one-dimensional setting. We furthermore introduce the new N-activation function that is provably more expressive than currently popular activation functions. We provide code at this https URL.},
  author       = {Prach, Bernd and Lampert, Christoph},
  booktitle    = {arXiv},
  title        = {{1-Lipschitz neural networks are more expressive with N-activations}},
  doi          = {10.48550/ARXIV.2311.06103},
  year         = {2023},
}

@inproceedings{13053,
  abstract     = {Deep neural networks (DNNs) often have to be compressed, via pruning and/or quantization, before they can be deployed in practical settings. In this work we propose a new compression-aware minimizer dubbed CrAM that modifies the optimization step in a principled way, in order to produce models whose local loss behavior is stable under compression operations such as pruning. Thus, dense models trained via CrAM should be compressible post-training, in a single step, without significant accuracy loss. Experimental results on standard benchmarks, such as residual networks for ImageNet classification and BERT models for language modelling, show that CrAM produces dense models that can be more accurate than the standard SGD/Adam-based baselines, but which are stable under weight pruning: specifically, we can prune models in one-shot to 70-80% sparsity with almost no accuracy loss, and to 90% with reasonable (∼1%) accuracy loss, which is competitive with gradual compression methods. Additionally, CrAM can produce sparse models which perform well for transfer learning, and it also works for semi-structured 2:4 pruning patterns supported by GPU hardware. The code for reproducing the results is available at this https URL .},
  author       = {Peste, Elena-Alexandra and Vladu, Adrian and Kurtic, Eldar and Lampert, Christoph and Alistarh, Dan-Adrian},
  booktitle    = {11th International Conference on Learning Representations },
  location     = {Kigali, Rwanda },
  title        = {{CrAM: A Compression-Aware Minimizer}},
  year         = {2023},
}

@phdthesis{13074,
  abstract     = {Deep learning has become an integral part of a large number of important applications, and many of the recent breakthroughs have been enabled by the ability to train very large models, capable to capture complex patterns and relationships from the data. At the same time, the massive sizes of modern deep learning models have made their deployment to smaller devices more challenging; this is particularly important, as in many applications the users rely on accurate deep learning predictions, but they only have access to devices with limited memory and compute power. One solution to this problem is to prune neural networks, by setting as many of their parameters as possible to zero, to obtain accurate sparse models with lower memory footprint. Despite the great research progress in obtaining sparse models that preserve accuracy, while satisfying memory and computational constraints, there are still many challenges associated with efficiently training sparse models, as well as understanding their generalization properties.

The focus of this thesis is to investigate how the training process of sparse models can be made more efficient, and to understand the differences between sparse and dense models in terms of how well they can generalize to changes in the data distribution. We first study a method for co-training sparse and dense models, at a lower cost compared to regular training. With our method we can obtain very accurate sparse networks, and dense models that can recover the baseline accuracy. Furthermore, we are able to more easily analyze the differences, at prediction level, between the sparse-dense model pairs. Next, we investigate the generalization properties of sparse neural networks in more detail, by studying how well different sparse models trained on a larger task can adapt to smaller, more specialized tasks, in a transfer learning scenario. Our analysis across multiple pruning methods and sparsity levels reveals that sparse models provide features that can transfer similarly to or better than the dense baseline. However, the choice of the pruning method plays an important role, and can influence the results when the features are fixed (linear finetuning), or when they are allowed to adapt to the new task (full finetuning). Using sparse models with fixed masks for finetuning on new tasks has an important practical advantage, as it enables training neural networks on smaller devices. However, one drawback of current pruning methods is that the entire training cycle has to be repeated to obtain the initial sparse model, for every sparsity target; in consequence, the entire training process is costly and also multiple models need to be stored. In the last part of the thesis we propose a method that can train accurate dense models that are compressible in a single step, to multiple sparsity levels, without additional finetuning. Our method results in sparse models that can be competitive with existing pruning methods, and which can also successfully generalize to new tasks.},
  author       = {Peste, Elena-Alexandra},
  issn         = {2663-337X},
  pages        = {147},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Efficiency and generalization of sparse neural networks}},
  doi          = {10.15479/at:ista:13074},
  year         = {2023},
}

@phdthesis{13081,
  abstract     = {During development, tissues undergo changes in size and shape to form functional organs. Distinct cellular processes such as cell division and cell rearrangements underlie tissue morphogenesis. Yet how the distinct processes are controlled and coordinated, and how they contribute to morphogenesis is poorly understood. In our study, we addressed these questions using the developing mouse neural tube. This epithelial organ transforms from a flat epithelial sheet to an epithelial tube while increasing in size and undergoing morpho-gen-mediated patterning. The extent and mechanism of neural progenitor rearrangement within the developing mouse neuroepithelium is unknown. To investigate this, we per-formed high resolution lineage tracing analysis to quantify the extent of epithelial rear-rangement at different stages of neural tube development. We quantitatively described the relationship between apical cell size with cell cycle dependent interkinetic nuclear migra-tions (IKNM) and performed high cellular resolution live imaging of the neuroepithelium to study the dynamics of junctional remodeling.  Furthermore, developed a vertex model of the neuroepithelium to investigate the quantitative contribution of cell proliferation, cell differentiation and mechanical properties to the epithelial rearrangement dynamics and validated the model predictions through functional experiments. Our analysis revealed that at early developmental stages, the apical cell area kinetics driven by IKNM induce high lev-els of cell rearrangements in a regime of high junctional tension and contractility. After E9.5, there is a sharp decline in the extent of cell rearrangements, suggesting that the epi-thelium transitions from a fluid-like to a solid-like state. We found that this transition is regulated by the growth rate of the tissue, rather than by changes in cell-cell adhesion and contractile forces. Overall, our study provides a quantitative description of the relationship between tissue growth, cell cycle dynamics, epithelia rearrangements and the emergent tissue material properties, and novel insights on how epithelial cell dynamics influences tissue morphogenesis.},
  author       = {Bocanegra, Laura},
  issn         = {2663 - 337X},
  pages        = {93},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Epithelial dynamics during mouse neural tube development}},
  doi          = {10.15479/at:ista:13081},
  year         = {2023},
}

