@inproceedings{14168,
  abstract     = {Recent work has seen the development of general purpose neural architectures
that can be trained to perform tasks across diverse data modalities. General
purpose models typically make few assumptions about the underlying
data-structure and are known to perform well in the large-data regime. At the
same time, there has been growing interest in modular neural architectures that
represent the data using sparsely interacting modules. These models can be more
robust out-of-distribution, computationally efficient, and capable of
sample-efficient adaptation to new data. However, they tend to make
domain-specific assumptions about the data, and present challenges in how
module behavior (i.e., parameterization) and connectivity (i.e., their layout)
can be jointly learned. In this work, we introduce a general purpose, yet
modular neural architecture called Neural Attentive Circuits (NACs) that
jointly learns the parameterization and a sparse connectivity of neural modules
without using domain knowledge. NACs are best understood as the combination of
two systems that are jointly trained end-to-end: one that determines the module
configuration and the other that executes it on an input. We demonstrate
qualitatively that NACs learn diverse and meaningful module configurations on
the NLVR2 dataset without additional supervision. Quantitatively, we show that
by incorporating modularity in this way, NACs improve upon a strong non-modular
baseline in terms of low-shot adaptation on CIFAR and CUBs dataset by about
10%, and OOD robustness on Tiny ImageNet-R by about 2.5%. Further, we find that
NACs can achieve an 8x speedup at inference time while losing less than 3%
performance. Finally, we find NACs to yield competitive results on diverse data
modalities spanning point-cloud classification, symbolic processing and
text-classification from ASCII bytes, thereby confirming its general purpose
nature.},
  author       = {Rahaman, Nasim and Weiss, Martin and Locatello, Francesco and Pal, Chris and Bengio, Yoshua and Schölkopf, Bernhard and Li, Li Erran and Ballas, Nicolas},
  booktitle    = {36th Conference on Neural Information Processing Systems},
  location     = {New Orleans, United States},
  title        = {{Neural attentive circuits}},
  volume       = {35},
  year         = {2022},
}

@inproceedings{14170,
  abstract     = {The idea behind object-centric representation learning is that natural scenes can better be modeled as compositions of objects and their relations as opposed to distributed representations. This inductive bias can be injected into neural networks to potentially improve systematic generalization and performance of downstream tasks in scenes with multiple objects. In this paper, we train state-of-the-art unsupervised models on five common multi-object datasets and evaluate segmentation metrics and downstream object property prediction. In addition, we study generalization and robustness by investigating the settings where either a single object is out of distribution -- e.g., having an unseen color, texture, or shape -- or global properties of the scene are altered -- e.g., by occlusions, cropping, or increasing the number of objects. From our experimental study, we find object-centric representations to be useful for
downstream tasks and generally robust to most distribution shifts affecting objects. However, when the distribution shift affects the input in a less structured manner, robustness in terms of segmentation and downstream task performance may vary significantly across models and distribution shifts. },
  author       = {Dittadi, Andrea and Papa, Samuele and Vita, Michele De and Schölkopf, Bernhard and Winther, Ole and Locatello, Francesco},
  booktitle    = {Proceedings of the 39th International Conference on Machine Learning},
  location     = {Baltimore, MD, United States},
  pages        = {5221--5285},
  publisher    = {ML Research Press},
  title        = {{Generalization and robustness implications in object-centric learning}},
  volume       = {2022},
  year         = {2022},
}

@inproceedings{14171,
  abstract     = {This paper demonstrates how to recover causal graphs from the score of the
data distribution in non-linear additive (Gaussian) noise models. Using score
matching algorithms as a building block, we show how to design a new generation
of scalable causal discovery methods. To showcase our approach, we also propose
a new efficient method for approximating the score's Jacobian, enabling to
recover the causal graph. Empirically, we find that the new algorithm, called
SCORE, is competitive with state-of-the-art causal discovery methods while
being significantly faster.},
  author       = {Rolland, Paul and Cevher, Volkan and Kleindessner, Matthäus and Russel, Chris and Schölkopf, Bernhard and Janzing, Dominik and Locatello, Francesco},
  booktitle    = {Proceedings of the 39th International Conference on Machine Learning},
  location     = {Baltimore, MD, United States},
  pages        = {18741--18753},
  publisher    = {ML Research Press},
  title        = {{Score matching enables causal discovery of nonlinear additive noise  models}},
  volume       = {162},
  year         = {2022},
}

@inproceedings{14172,
  abstract     = {An important component for generalization in machine learning is to uncover underlying latent factors of variation as well as the mechanism through which each factor acts in the world. In this paper, we test whether 17 unsupervised, weakly supervised, and fully supervised representation learning approaches correctly infer the generative factors of variation in simple datasets (dSprites, Shapes3D, MPI3D) from controlled environments, and on our contributed CelebGlow dataset. In contrast to prior robustness work that introduces novel factors of variation during test time, such as blur or other (un)structured noise, we here recompose, interpolate, or extrapolate only existing factors of variation from the training data set (e.g., small and medium-sized objects during training and large objects during testing). Models
that learn the correct mechanism should be able to generalize to this benchmark. In total, we train and test 2000+ models and observe that all of them struggle to learn the underlying mechanism regardless of supervision signal and architectural bias. Moreover, the generalization capabilities of all tested models drop significantly as we move from artificial datasets towards
more realistic real-world datasets. Despite their inability to identify the correct mechanism, the models are quite modular as their ability to infer other in-distribution factors remains fairly stable, providing only a single factoris out-of-distribution. These results point to an important yet understudied problem of learning mechanistic models of observations that can facilitate
generalization.},
  author       = {Schott, Lukas and Kügelgen, Julius von and Träuble, Frederik and Gehler, Peter and Russell, Chris and Bethge, Matthias and Schölkopf, Bernhard and Locatello, Francesco and Brendel, Wieland},
  booktitle    = {10th International Conference on Learning Representations},
  location     = {Virtual},
  title        = {{Visual representation learning does not generalize strongly within the  same domain}},
  year         = {2022},
}

@inproceedings{14173,
  abstract     = {Since out-of-distribution generalization is a generally ill-posed problem, various proxy targets (e.g., calibration, adversarial robustness, algorithmic corruptions, invariance across shifts) were studied across different research programs resulting in different recommendations. While sharing the same aspirational goal, these approaches have never been tested under the same
experimental conditions on real data. In this paper, we take a unified view of previous work, highlighting message discrepancies that we address empirically, and providing recommendations on how to measure the robustness of a model and how to improve it. To this end, we collect 172 publicly available dataset pairs for training and out-of-distribution evaluation of accuracy, calibration error, adversarial attacks, environment invariance, and synthetic corruptions. We fine-tune over 31k networks, from nine different architectures in the many- and
few-shot setting. Our findings confirm that in- and out-of-distribution accuracies tend to increase jointly, but show that their relation is largely dataset-dependent, and in general more nuanced and more complex than posited by previous, smaller scale studies.},
  author       = {Wenzel, Florian and Dittadi, Andrea and Gehler, Peter Vincent and Carl-Johann Simon-Gabriel, Carl-Johann Simon-Gabriel and Horn, Max and Zietlow, Dominik and Kernert, David and Russell, Chris and Brox, Thomas and Schiele, Bernt and Schölkopf, Bernhard and Locatello, Francesco},
  booktitle    = {36th Conference on Neural Information Processing Systems},
  isbn         = {9781713871088},
  location     = {New Orleans, LA, United States},
  pages        = {7181--7198},
  publisher    = {Neural Information Processing Systems Foundation},
  title        = {{Assaying out-of-distribution generalization in transfer learning}},
  volume       = {35},
  year         = {2022},
}

@inproceedings{14174,
  abstract     = {Building sample-efficient agents that generalize out-of-distribution (OOD) in real-world settings remains a fundamental unsolved problem on the path towards achieving higher-level cognition. One particularly promising approach is to begin with low-dimensional, pretrained representations of our world, which should facilitate efficient downstream learning and generalization. By training 240 representations and over 10,000 reinforcement learning (RL) policies on a simulated robotic setup, we evaluate to what extent different properties of
pretrained VAE-based representations affect the OOD generalization of downstream agents. We observe that many agents are surprisingly robust to realistic distribution shifts, including the challenging sim-to-real case. In addition, we find that the generalization performance of a simple downstream proxy task reliably predicts the generalization performance of our RL agents
under a wide range of OOD settings. Such proxy tasks can thus be used to select pretrained representations that will lead to agents that generalize.},
  author       = {Dittadi, Andrea and Träuble, Frederik and Wüthrich, Manuel and Widmaier, Felix and Gehler, Peter and Winther, Ole and Locatello, Francesco and Bachem, Olivier and Schölkopf, Bernhard and Bauer, Stefan},
  booktitle    = {10th International Conference on Learning Representations},
  location     = {Virtual},
  title        = {{The role of pretrained representations for the OOD generalization of  reinforcement learning agents}},
  year         = {2022},
}

@inproceedings{14175,
  abstract     = {Predicting the future trajectory of a moving agent can be easy when the past trajectory continues smoothly but is challenging when complex interactions with other agents are involved. Recent deep learning approaches for trajectory prediction show promising performance and partially attribute this to successful reasoning about agent-agent interactions. However, it remains unclear which features such black-box models actually learn to use for making predictions. This paper proposes a procedure that quantifies the contributions
of different cues to model performance based on a variant of Shapley values. Applying this procedure to state-of-the-art trajectory prediction methods on standard benchmark datasets shows that they are, in fact, unable to reason about interactions. Instead, the past trajectory of the target is the only feature used for predicting its future. For a task with richer social
interaction patterns, on the other hand, the tested models do pick up such interactions to a certain extent, as quantified by our feature attribution method. We discuss the limits of the proposed method and its links to causality.},
  author       = {Makansi, Osama and Kügelgen, Julius von and Locatello, Francesco and Gehler, Peter and Janzing, Dominik and Brox, Thomas and Schölkopf, Bernhard},
  booktitle    = {10th International Conference on Learning Representations},
  location     = {Virtual},
  title        = {{You mostly walk alone: Analyzing feature attribution in trajectory prediction}},
  year         = {2022},
}

@inproceedings{14215,
  abstract     = {Geospatial Information Systems are used by researchers and Humanitarian Assistance and Disaster Response (HADR) practitioners to support a wide variety of important applications. However, collaboration between these actors is difficult due to the heterogeneous nature of geospatial data modalities (e.g., multi-spectral images of various resolutions, timeseries, weather data) and diversity of tasks (e.g., regression of human activity indicators or detecting forest fires). In this work, we present a roadmap towards the construction of a general-purpose neural architecture (GPNA) with a geospatial inductive bias, pre-trained on large amounts of unlabelled earth observation data in a self-supervised manner. We envision how such a model may facilitate cooperation between members of the community. We show preliminary results on the first step of the roadmap, where we instantiate an architecture that can process a wide variety of geospatial data modalities and demonstrate that it can achieve competitive performance with domain-specific architectures on tasks relating to the U.N.'s Sustainable Development Goals.},
  author       = {Rahaman, Nasim and Weiss, Martin and Träuble, Frederik and Locatello, Francesco and Lacoste, Alexandre and Bengio, Yoshua and Pal, Chris and Li, Li Erran and Schölkopf, Bernhard},
  booktitle    = {36th Conference on Neural Information Processing Systems},
  location     = {New Orleans, LA, United States},
  title        = {{A general purpose neural architecture for geospatial systems}},
  year         = {2022},
}

@unpublished{14216,
  abstract     = {CLIP proved that aligning visual and language spaces is key to solving many vision tasks without explicit training, but required to train image and text encoders from scratch on a huge dataset. LiT improved this by only training the text encoder and using a pre-trained vision network. In this paper, we show that a common space can be created without any training at all, using single-domain encoders (trained with or without supervision) and a much smaller amount of image-text pairs. Furthermore, our model has unique properties. Most notably, deploying a new version with updated training samples can be done in a matter of seconds. Additionally, the representations in the common space are easily interpretable as every dimension corresponds to the similarity of the input to a unique entry in the multimodal dataset. Experiments on standard zero-shot visual benchmarks demonstrate the typical transfer ability of image-text models. Overall, our method represents a simple yet surprisingly strong baseline for foundation multi-modal models, raising important questions on their data efficiency and on the role of retrieval in machine learning.},
  author       = {Norelli, Antonio and Fumero, Marco and Maiorca, Valentino and Moschella, Luca and Rodolà, Emanuele and Locatello, Francesco},
  booktitle    = {arXiv},
  title        = {{ASIF: Coupled data turns unimodal models to multimodal without training}},
  doi          = {10.48550/arXiv.2210.01738},
  year         = {2022},
}

@unpublished{14220,
  abstract     = {Although reinforcement learning has seen remarkable progress over the last years, solving robust dexterous object-manipulation tasks in multi-object settings remains a challenge. In this paper, we focus on models that can learn manipulation tasks in fixed multi-object settings and extrapolate this skill zero-shot without any drop in performance when the number of objects changes. We consider the generic task of bringing a specific cube out of a set to a goal position. We find that previous approaches, which primarily leverage attention and graph neural network-based architectures, do not generalize their skills when the number of input objects changes while scaling as K2. We propose an alternative plug-and-play module based on relational inductive biases to overcome these limitations. Besides exceeding performances in their training environment, we show that our approach, which scales linearly in K, allows agents to extrapolate and generalize zero-shot to any new object number.},
  author       = {Mambelli, Davide and Träuble, Frederik and Bauer, Stefan and Schölkopf, Bernhard and Locatello, Francesco},
  booktitle    = {arXiv},
  title        = {{Compositional multi-object reinforcement learning with linear relation networks}},
  doi          = {10.48550/arXiv.2201.13388},
  year         = {2022},
}

@article{9199,
  abstract     = {We associate a certain tensor product lattice to any primitive integer lattice and ask about its typical shape. These lattices are related to the tangent bundle of Grassmannians and their study is motivated by Peyre's programme on "freeness" for rational points of bounded height on Fano
varieties.},
  author       = {Browning, Timothy D and Horesh, Tal and Wilsch, Florian Alexander},
  issn         = {1944-7833},
  journal      = {Algebra & Number Theory},
  number       = {10},
  pages        = {2385--2407},
  publisher    = {Mathematical Sciences Publishers},
  title        = {{Equidistribution and freeness on Grassmannians}},
  doi          = {10.2140/ant.2022.16.2385},
  volume       = {16},
  year         = {2022},
}

@article{9311,
  abstract     = {Partially observable Markov decision processes (POMDPs) are standard models for dynamic systems with probabilistic and nondeterministic behaviour in uncertain environments. We prove that in POMDPs with long-run average objective, the decision maker has approximately optimal strategies with finite memory. This implies notably that approximating the long-run value is recursively enumerable, as well as a weak continuity property of the value with respect to the transition function. },
  author       = {Chatterjee, Krishnendu and Saona Urmeneta, Raimundo J and Ziliotto, Bruno},
  issn         = {1526-5471},
  journal      = {Mathematics of Operations Research},
  keywords     = {Management Science and Operations Research, General Mathematics, Computer Science Applications},
  number       = {1},
  pages        = {100--119},
  publisher    = {Institute for Operations Research and the Management Sciences},
  title        = {{Finite-memory strategies in POMDPs with long-run average objectives}},
  doi          = {10.1287/moor.2020.1116},
  volume       = {47},
  year         = {2022},
}

@article{9364,
  abstract     = {Let t : Fp → C be a complex valued function on Fp. A classical problem in analytic number theory is bounding the maximum M(t) := max 0≤H<p ∣ 1/√p ∑ 0≤n<H t (n) ∣ of the absolute value of the incomplete sums(1/√p)∑0≤n<H t (n). In this very general context one of the most important results is the Pólya–Vinogradov bound M(t)≤IIˆtII∞ log 3p, where ˆt : Fp → C is the normalized Fourier transform of t. In this paper we provide a lower bound for certain incomplete Kloosterman sums, namely we prove that for any ε > 0 there exists a large subset of a ∈ F×p such that for kl a,1,p : x → e((ax+x) / p) we have M(kla,1,p) ≥ (1−ε/√2π + o(1)) log log p, as p→∞. Finally, we prove a result on the growth of the moments of {M (kla,1,p)}a∈F×p. 2020 Mathematics Subject Classification: 11L03, 11T23 (Primary); 14F20, 60F10 (Secondary).},
  author       = {Bonolis, Dante},
  issn         = {1469-8064},
  journal      = {Mathematical Proceedings of the Cambridge Philosophical Society},
  number       = {3},
  pages        = {563 -- 590},
  publisher    = {Cambridge University Press},
  title        = {{On the size of the maximum of incomplete Kloosterman sums}},
  doi          = {10.1017/S030500412100030X},
  volume       = {172},
  year         = {2022},
}

@article{9649,
  abstract     = {Isomanifolds are the generalization of isosurfaces to arbitrary dimension and codimension, i.e. manifolds defined as the zero set of some multivariate vector-valued smooth function f : Rd → Rd−n. A natural (and efficient) way to approximate an isomanifold is to consider its Piecewise-Linear (PL) approximation based on a triangulation T of the ambient space Rd. In this paper, we give conditions under which the PL-approximation of an isomanifold is topologically equivalent to the isomanifold. The conditions are easy to satisfy in the sense that they can always be met by taking a sufficiently
fine triangulation T . This contrasts with previous results on the triangulation of manifolds where, in arbitrary dimensions, delicate perturbations are needed to guarantee topological correctness, which leads to strong limitations in practice. We further give a bound on the Fréchet distance between the original isomanifold and its PL-approximation. Finally we show analogous results for the PL-approximation of an isomanifold with boundary.},
  author       = {Boissonnat, Jean-Daniel and Wintraecken, Mathijs},
  issn         = {1615-3383},
  journal      = {Foundations of Computational Mathematics },
  pages        = {967--1012},
  publisher    = {Springer Nature},
  title        = {{The topological correctness of PL approximations of isomanifolds}},
  doi          = {10.1007/s10208-021-09520-0},
  volume       = {22},
  year         = {2022},
}

@article{10016,
  abstract     = {Auxin has always been at the forefront of research in plant physiology and development. Since the earliest contemplations by Julius von Sachs and Charles Darwin, more than a century-long struggle has been waged to understand its function. This largely reflects the failures, successes, and inevitable progress in the entire field of plant signaling and development. Here I present 14 stations on our long and sometimes mystical journey to understand auxin. These highlights were selected to give a flavor of the field and to show the scope and limits of our current knowledge. A special focus is put on features that make auxin unique among phytohormones, such as its dynamic, directional transport network, which integrates external and internal signals, including self-organizing feedback. Accented are persistent mysteries and controversies. The unexpected discoveries related to rapid auxin responses and growth regulation recently disturbed our contentment regarding understanding of the auxin signaling mechanism. These new revelations, along with advances in technology, usher us into a new, exciting era in auxin research. },
  author       = {Friml, Jiří},
  issn         = {1943-0264},
  journal      = {Cold Spring Harbor Perspectives in Biology},
  number       = {5},
  publisher    = {Cold Spring Harbor Laboratory},
  title        = {{Fourteen stations of auxin}},
  doi          = {10.1101/cshperspect.a039859 },
  volume       = {14},
  year         = {2022},
}

@article{10018,
  abstract     = {In order to study integral points of bounded log-anticanonical height on weak del Pezzo surfaces, we classify weak del Pezzo pairs. As a representative example, we consider a quartic del Pezzo surface of singularity type A1 + A3 and prove an analogue of Manin's conjecture for integral points with respect to its singularities and its lines.},
  author       = {Derenthal, Ulrich and Wilsch, Florian Alexander},
  issn         = {1475-3030 },
  journal      = {Journal of the Institute of Mathematics of Jussieu},
  keywords     = {Integral points, del Pezzo surface, universal torsor, Manin’s conjecture},
  publisher    = {Cambridge University Press},
  title        = {{Integral points on singular del Pezzo surfaces}},
  doi          = {10.1017/S1474748022000482},
  year         = {2022},
}

@article{10042,
  abstract     = {SnSe has emerged as one of the most promising materials for thermoelectric energy conversion due to its extraordinary performance in its single-crystal form and its low-cost constituent elements. However, to achieve an economic impact, the polycrystalline counterpart needs to replicate the performance of the single crystal. Herein, we optimize the thermoelectric performance of polycrystalline SnSe produced by consolidating solution-processed and surface-engineered SnSe particles. In particular, the SnSe particles are coated with CdSe molecular complexes that crystallize during the sintering process, forming CdSe nanoparticles. The presence of CdSe nanoparticles inhibits SnSe grain growth during the consolidation step due to Zener pinning, yielding a material with a high density of grain boundaries. Moreover, the resulting SnSe–CdSe nanocomposites present a large number of defects at different length scales, which significantly reduce the thermal conductivity. The produced SnSe–CdSe nanocomposites exhibit thermoelectric figures of merit up to 2.2 at 786 K, which is among the highest reported for solution-processed SnSe.},
  author       = {Liu, Yu and Calcabrini, Mariano and Yu, Yuan and Lee, Seungho and Chang, Cheng and David, Jérémy and Ghosh, Tanmoy and Spadaro, Maria Chiara and Xie, Chenyang and Cojocaru-Mirédin, Oana and Arbiol, Jordi and Ibáñez, Maria},
  issn         = {1936-086X},
  journal      = {ACS Nano},
  keywords     = {tin selenide, nanocomposite, grain growth, Zener pinning, thermoelectricity, annealing, solution processing},
  number       = {1},
  pages        = {78--88},
  publisher    = {American Chemical Society },
  title        = {{Defect engineering in solution-processed polycrystalline SnSe leads to high thermoelectric performance}},
  doi          = {10.1021/acsnano.1c06720},
  volume       = {16},
  year         = {2022},
}

@article{10182,
  abstract     = {The mitochondrial oxidative phosphorylation system is central to cellular metabolism. It comprises five enzymatic complexes and two mobile electron carriers that work in a mitochondrial respiratory chain. By coupling the oxidation of reducing equivalents coming into mitochondria to the generation and subsequent dissipation of a proton gradient across the inner mitochondrial membrane, this electron transport chain drives the production of ATP, which is then used as a primary energy carrier in virtually all cellular processes. Minimal perturbations of the respiratory chain activity are linked to diseases; therefore, it is necessary to understand how these complexes are assembled and regulated and how they function. In this Review, we outline the latest assembly models for each individual complex, and we also highlight the recent discoveries indicating that the formation of larger assemblies, known as respiratory supercomplexes, originates from the association of the intermediates of individual complexes. We then discuss how recent cryo-electron microscopy structures have been key to answering open questions on the function of the electron transport chain in mitochondrial respiration and how supercomplexes and other factors, including metabolites, can regulate the activity of the single complexes. When relevant, we discuss how these mechanisms contribute to physiology and outline their deregulation in human diseases.},
  author       = {Vercellino, Irene and Sazanov, Leonid A},
  issn         = {1471-0080},
  journal      = {Nature Reviews Molecular Cell Biology},
  pages        = {141–161},
  publisher    = {Springer Nature},
  title        = {{The assembly, regulation and function of the mitochondrial respiratory chain}},
  doi          = {10.1038/s41580-021-00415-0},
  volume       = {23},
  year         = {2022},
}

@article{10208,
  abstract     = {It is practical to collect a huge amount of movement data and environmental context information along with the health signals of individuals because there is the emergence of new generations of positioning and tracking technologies and rapid advancements of health sensors. The study of the relations between these datasets and their sequence similarity analysis is of interest to many applications such as health monitoring and recommender systems. However, entering all movement parameters and health signals can lead to the complexity of the problem and an increase in its computational load. In this situation, dimension reduction techniques can be used to avoid consideration of simultaneous dependent parameters in the process of similarity measurement of the trajectories. The present study provides a framework, named CaDRAW, to use spatial–temporal data and movement parameters along with independent context information in the process of measuring the similarity of trajectories. In this regard, the omission of dependent movement characteristic signals is conducted by using an unsupervised feature selection dimension reduction technique. To evaluate the effectiveness of the proposed framework, it was applied to a real contextualized movement and related health signal datasets of individuals. The results indicated the capability of the proposed framework in measuring the similarity and in decreasing the characteristic signals in such a way that the similarity results -before and after reduction of dependent characteristic signals- have small differences. The mean differences between the obtained results before and after reducing the dimension were 0.029 and 0.023 for the round path, respectively.},
  author       = {Goudarzi, Samira and Sharif, Mohammad and Karimipour, Farid},
  issn         = {1868-5145},
  journal      = {Journal of Ambient Intelligence and Humanized Computing},
  keywords     = {general computer science},
  pages        = {2621–2635},
  publisher    = {Springer Nature},
  title        = {{A context-aware dimension reduction framework for trajectory and health signal analyses}},
  doi          = {10.1007/s12652-021-03569-z},
  volume       = {13},
  year         = {2022},
}

@article{10284,
  abstract     = {Infections early in life can have enduring effects on an organism's development and immunity. In this study, we show that this equally applies to developing ‘superorganisms’––incipient social insect colonies. When we exposed newly mated Lasius niger ant queens to a low pathogen dose, their colonies grew more slowly than controls before winter, but reached similar sizes afterwards. Independent of exposure, queen hibernation survival improved when the ratio of pupae to workers was small. Queens that reared fewer pupae before worker emergence exhibited lower pathogen levels, indicating that high brood rearing efforts interfere with the ability of the queen's immune system to suppress pathogen proliferation. Early-life queen pathogen exposure also improved the immunocompetence of her worker offspring, as demonstrated by challenging the workers to the same pathogen a year later. Transgenerational transfer of the queen's pathogen experience to her workforce can hence durably reduce the disease susceptibility of the whole superorganism.},
  author       = {Casillas Perez, Barbara E and Pull, Christopher and Naiser, Filip and Naderlinger, Elisabeth and Matas, Jiri and Cremer, Sylvia},
  issn         = {1461-0248},
  journal      = {Ecology Letters},
  number       = {1},
  pages        = {89--100},
  publisher    = {Wiley},
  title        = {{Early queen infection shapes developmental dynamics and induces long-term disease protection in incipient ant colonies}},
  doi          = {10.1111/ele.13907},
  volume       = {25},
  year         = {2022},
}