@article{10838,
  abstract     = {Combining hybrid zone analysis with genomic data is a promising approach to understanding the genomic basis of adaptive divergence. It allows for the identification of genomic regions underlying barriers to gene flow. It also provides insights into spatial patterns of allele frequency change, informing about the interplay between environmental factors, dispersal and selection. However, when only a single hybrid zone is analysed, it is difficult to separate patterns generated by selection from those resulting from chance. Therefore, it is beneficial to look for repeatable patterns across replicate hybrid zones in the same system. We applied this approach to the marine snail Littorina saxatilis, which contains two ecotypes, adapted to wave-exposed rocks vs. high-predation boulder fields. The existence of numerous hybrid zones between ecotypes offered the opportunity to test for the repeatability of genomic architectures and spatial patterns of divergence. We sampled and phenotyped snails from seven replicate hybrid zones on the Swedish west coast and genotyped them for thousands of single nucleotide polymorphisms. Shell shape and size showed parallel clines across all zones. Many genomic regions showing steep clines and/or high differentiation were shared among hybrid zones, consistent with a common evolutionary history and extensive gene flow between zones, and supporting the importance of these regions for divergence. In particular, we found that several large putative inversions contribute to divergence in all locations. Additionally, we found evidence for consistent displacement of clines from the boulder–rock transition. Our results demonstrate patterns of spatial variation that would not be accessible without continuous spatial sampling, a large genomic data set and replicate hybrid zones.},
  author       = {Westram, Anja M and Faria, Rui and Johannesson, Kerstin and Butlin, Roger},
  issn         = {1365-294X},
  journal      = {Molecular Ecology},
  keywords     = {Genetics, Ecology, Evolution, Behavior and Systematics},
  number       = {15},
  pages        = {3797--3814},
  publisher    = {Wiley},
  title        = {{Using replicate hybrid zones to understand the genomic basis of adaptive divergence}},
  doi          = {10.1111/mec.15861},
  volume       = {30},
  year         = {2021},
}

@inproceedings{10847,
  abstract     = {We study the two-player zero-sum extension of the partially observable stochastic shortest-path problem where one agent has only partial information about the environment. We formulate this problem as a partially observable stochastic game (POSG): given a set of target states and negative rewards for each transition, the player with imperfect information maximizes the expected undiscounted total reward until a target state is reached. The second player with the perfect information aims for the opposite. We base our formalism on POSGs with one-sided observability (OS-POSGs) and give the following contributions: (1) we introduce a novel heuristic search value iteration algorithm that iteratively solves depth-limited variants of the game, (2) we derive the bound on the depth guaranteeing an arbitrary precision, (3) we propose a novel upper-bound estimation that allows early terminations, and (4) we experimentally evaluate the algorithm on a pursuit-evasion game.},
  author       = {Tomášek, Petr and Horák, Karel and Aradhye, Aditya and Bošanský, Branislav and Chatterjee, Krishnendu},
  booktitle    = {30th International Joint Conference on Artificial Intelligence},
  isbn         = {9780999241196},
  issn         = {1045-0823},
  location     = {Virtual, Online},
  pages        = {4182--4189},
  publisher    = {International Joint Conferences on Artificial Intelligence},
  title        = {{Solving partially observable stochastic shortest-path games}},
  doi          = {10.24963/ijcai.2021/575},
  year         = {2021},
}

@article{10852,
  abstract     = { We review old and new results on the Fröhlich polaron model. The discussion includes the validity of the (classical) Pekar approximation in the strong coupling limit, quantum corrections to this limit, as well as the divergence of the effective polaron mass.},
  author       = {Seiringer, Robert},
  issn         = {1793-6659},
  journal      = {Reviews in Mathematical Physics},
  keywords     = {Mathematical Physics, Statistical and Nonlinear Physics},
  number       = {01},
  publisher    = {World Scientific Publishing},
  title        = {{The polaron at strong coupling}},
  doi          = {10.1142/s0129055x20600120},
  volume       = {33},
  year         = {2021},
}

@inproceedings{10853,
  abstract     = {Dynamic Connectivity is a fundamental algorithmic graph problem, motivated by a wide range of applications to social and communication networks and used as a building block in various other algorithms, such as the bi-connectivity and the dynamic minimal spanning tree problems. In brief, we wish to maintain the connected components of the graph under dynamic edge insertions and deletions. In the sequential case, the problem has been well-studied from both theoretical and practical perspectives. However, much less is known about efficient concurrent solutions to this problem. This is the gap we address in this paper. We start from one of the classic data structures used to solve this problem, the Euler Tour Tree. Our first contribution is a non-blocking single-writer implementation of it. We leverage this data structure to obtain the first truly concurrent generalization of dynamic connectivity, which preserves the time complexity of its sequential counterpart, but is also scalable in practice. To achieve this, we rely on three main techniques. The first is to ensure that connectivity queries, which usually dominate real-world workloads, are non-blocking. The second non-trivial technique expands the above idea by making all queries that do not change the connectivity structure non-blocking. The third ingredient is applying fine-grained locking for updating the connected components, which allows operations on disjoint components to occur in parallel. We evaluate the resulting algorithm on various workloads, executing on both real and synthetic graphs. The results show the efficiency of each of the proposed optimizations; the most efficient variant improves the performance of a coarse-grained based implementation on realistic scenarios up to 6x on average and up to 30x when connectivity queries dominate.},
  author       = {Fedorov, Alexander and Koval, Nikita and Alistarh, Dan-Adrian},
  booktitle    = {Proceedings of the 33rd ACM Symposium on Parallelism in Algorithms and Architectures},
  isbn         = {9781450380706},
  location     = {Virtual, Online},
  pages        = {208--220},
  publisher    = {Association for Computing Machinery},
  title        = {{A scalable concurrent algorithm for dynamic connectivity}},
  doi          = {10.1145/3409964.3461810},
  year         = {2021},
}

@inproceedings{10854,
  abstract     = {Consider a distributed task where the communication network is fixed but the local inputs given to the nodes of the distributed system may change over time. In this work, we explore the following question: if some of the local inputs change, can an existing solution be updated efficiently, in a dynamic and distributed manner?
To address this question, we define the batch dynamic CONGEST model in which we are given a bandwidth-limited communication network and a dynamic edge labelling defines the problem input. The task is to maintain a solution to a graph problem on the labelled graph under batch changes. We investigate, when a batch of alpha edge label changes arrive, - how much time as a function of alpha we need to update an existing solution, and - how much information the nodes have to keep in local memory between batches in order to update the solution quickly.
Our work lays the foundations for the theory of input-dynamic distributed network algorithms. We give a general picture of the complexity landscape in this model, design both universal algorithms and algorithms for concrete problems, and present a general framework for lower bounds. The diverse time complexity of our model spans from constant time, through time polynomial in alpha, and to alpha time, which we show to be enough for any task.},
  author       = {Foerster, Klaus-Tycho and Korhonen, Janne and Paz, Ami and Rybicki, Joel and Schmid, Stefan},
  booktitle    = {Abstract Proceedings of the 2021 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems},
  isbn         = {9781450380720},
  location     = {Virtual, Online},
  pages        = {71--72},
  publisher    = {Association for Computing Machinery},
  title        = {{Input-dynamic distributed algorithms for communication networks}},
  doi          = {10.1145/3410220.3453923},
  year         = {2021},
}

@article{10855,
  abstract     = {Consider a distributed task where the communication network is fixed but the local inputs given to the nodes of the distributed system may change over time. In this work, we explore the following question: if some of the local inputs change, can an existing solution be updated efficiently, in a dynamic and distributed manner? To address this question, we define the batch dynamic \congest model in which we are given a bandwidth-limited communication network and a dynamic edge labelling defines the problem input. The task is to maintain a solution to a graph problem on the labeled graph under batch changes. We investigate, when a batch of α edge label changes arrive, \beginitemize \item how much time as a function of α we need to update an existing solution, and \item how much information the nodes have to keep in local memory between batches in order to update the solution quickly. \enditemize Our work lays the foundations for the theory of input-dynamic distributed network algorithms. We give a general picture of the complexity landscape in this model, design both universal algorithms and algorithms for concrete problems, and present a general framework for lower bounds. In particular, we derive non-trivial upper bounds for two selected, contrasting problems: maintaining a minimum spanning tree and detecting cliques.},
  author       = {Foerster, Klaus-Tycho and Korhonen, Janne and Paz, Ami and Rybicki, Joel and Schmid, Stefan},
  issn         = {2476-1249},
  journal      = {Proceedings of the ACM on Measurement and Analysis of Computing Systems},
  keywords     = {Computer Networks and Communications, Hardware and Architecture, Safety, Risk, Reliability and Quality, Computer Science (miscellaneous)},
  number       = {1},
  pages        = {1--33},
  publisher    = {Association for Computing Machinery},
  title        = {{Input-dynamic distributed algorithms for communication networks}},
  doi          = {10.1145/3447384},
  volume       = {5},
  year         = {2021},
}

@article{10856,
  abstract     = {We study the properties of the maximal volume k-dimensional sections of the n-dimensional cube [−1, 1]n. We obtain a first order necessary condition for a k-dimensional subspace to be a local maximizer of the volume of such sections, which we formulate in a geometric way. We estimate the length of the projection of a vector of the standard basis of Rn onto a k-dimensional subspace that maximizes the volume of the intersection. We nd the optimal upper bound on the volume of a planar section of the cube [−1, 1]n , n ≥ 2.},
  author       = {Ivanov, Grigory and Tsiutsiurupa, Igor},
  issn         = {2299-3274},
  journal      = {Analysis and Geometry in Metric Spaces},
  keywords     = {Applied Mathematics, Geometry and Topology, Analysis},
  number       = {1},
  pages        = {1--18},
  publisher    = {De Gruyter},
  title        = {{On the volume of sections of the cube}},
  doi          = {10.1515/agms-2020-0103},
  volume       = {9},
  year         = {2021},
}

@article{10858,
  abstract     = {The cost-effective conversion of low-grade heat into electricity using thermoelectric devices requires developing alternative materials and material processing technologies able to reduce the currently high device manufacturing costs. In this direction, thermoelectric materials that do not rely on rare or toxic elements such as tellurium or lead need to be produced using high-throughput technologies not involving high temperatures and long processes. Bi2Se3 is an obvious possible Te-free alternative to Bi2Te3 for ambient temperature thermoelectric applications, but its performance is still low for practical applications, and additional efforts toward finding proper dopants are required. Here, we report a scalable method to produce Bi2Se3 nanosheets at low synthesis temperatures. We studied the influence of different dopants on the thermoelectric properties of this material. Among the elements tested, we demonstrated that Sn doping resulted in the best performance. Sn incorporation resulted in a significant improvement to the Bi2Se3 Seebeck coefficient and a reduction in the thermal conductivity in the direction of the hot-press axis, resulting in an overall 60% improvement in the thermoelectric figure of merit of Bi2Se3.},
  author       = {Li, Mengyao and Zhang, Yu and Zhang, Ting and Zuo, Yong and Xiao, Ke and Arbiol, Jordi and Llorca, Jordi and Liu, Yu and Cabot, Andreu},
  issn         = {2079-4991},
  journal      = {Nanomaterials},
  keywords     = {General Materials Science, General Chemical Engineering},
  number       = {7},
  publisher    = {MDPI},
  title        = {{Enhanced thermoelectric performance of n-type Bi2Se3 nanosheets through Sn doping}},
  doi          = {10.3390/nano11071827},
  volume       = {11},
  year         = {2021},
}

@article{10860,
  abstract     = {A tight frame is the orthogonal projection of some orthonormal basis of Rn onto Rk. We show that a set of vectors is a tight frame if and only if the set of all cross products of these vectors is a tight frame. We reformulate a range of problems on the volume of projections (or sections) of regular polytopes in terms of tight frames and write a first-order necessary condition for local extrema of these problems. As applications, we prove new results for the problem of maximization of the volume of zonotopes.},
  author       = {Ivanov, Grigory},
  issn         = {1496-4287},
  journal      = {Canadian Mathematical Bulletin},
  keywords     = {General Mathematics, Tight frame, Grassmannian, zonotope},
  number       = {4},
  pages        = {942--963},
  publisher    = {Canadian Mathematical Society},
  title        = {{Tight frames and related geometric problems}},
  doi          = {10.4153/s000843952000096x},
  volume       = {64},
  year         = {2021},
}

@unpublished{10912,
  abstract     = {Brain dynamics display collective phenomena as diverse as neuronal oscillations and avalanches. Oscillations are rhythmic, with fluctuations occurring at a characteristic scale, whereas avalanches are scale-free cascades of neural activity. Here we show that such antithetic features can coexist in a very generic class of adaptive neural networks. In the most simple yet fully microscopic model from this class we make direct contact with human brain resting-state activity recordings via tractable inference of the model's two essential parameters. The inferred model quantitatively captures the dynamics over a broad range of scales, from single sensor fluctuations, collective behaviors of nearly-synchronous extreme events on multiple sensors, to neuronal avalanches unfolding over multiple sensors across multiple time-bins. Importantly, the inferred parameters correlate with model-independent signatures of "closeness to criticality", suggesting that the coexistence of scale-specific (neural oscillations) and scale-free (neuronal avalanches) dynamics in brain activity occurs close to a non-equilibrium critical point at the onset of self-sustained oscillations.},
  author       = {Lombardi, Fabrizio and Pepic, Selver and Shriki, Oren and Tkačik, Gašper and De Martino, Daniele},
  pages        = {37},
  publisher    = {arXiv},
  title        = {{Quantifying the coexistence of neuronal oscillations and avalanches}},
  doi          = {10.48550/ARXIV.2108.06686},
  year         = {2021},
}

@article{11052,
  abstract     = {In order to combat molecular damage, most cellular proteins undergo rapid turnover. We have previously identified large nuclear protein assemblies that can persist for years in post-mitotic tissues and are subject to age-related decline. Here, we report that mitochondria can be long lived in the mouse brain and reveal that specific mitochondrial proteins have half-lives longer than the average proteome. These mitochondrial long-lived proteins (mitoLLPs) are core components of the electron transport chain (ETC) and display increased longevity in respiratory supercomplexes. We find that COX7C, a mitoLLP that forms a stable contact site between complexes I and IV, is required for complex IV and supercomplex assembly. Remarkably, even upon depletion of COX7C transcripts, ETC function is maintained for days, effectively uncoupling mitochondrial function from ongoing transcription of its mitoLLPs. Our results suggest that modulating protein longevity within the ETC is critical for mitochondrial proteome maintenance and the robustness of mitochondrial function.},
  author       = {Krishna, Shefali and Arrojo e Drigo, Rafael and Capitanio, Juliana S. and Ramachandra, Ranjan and Ellisman, Mark and HETZER, Martin W},
  issn         = {1534-5807},
  journal      = {Developmental Cell},
  keywords     = {Developmental Biology, Cell Biology, General Biochemistry, Genetics and Molecular Biology, Molecular Biology},
  number       = {21},
  pages        = {P2952--2965.e9},
  publisher    = {Elsevier},
  title        = {{Identification of long-lived proteins in the mitochondria reveals increased stability of the electron transport chain}},
  doi          = {10.1016/j.devcel.2021.10.008},
  volume       = {56},
  year         = {2021},
}

@article{11053,
  abstract     = {Understanding basic mechanisms of aging holds great promise for developing interventions that prevent or delay many age-related declines and diseases simultaneously to increase human healthspan. However, a major confounding factor in aging research is the heterogeneity of the aging process itself. At the organismal level, it is clear that chronological age does not always predict biological age or susceptibility to frailty or pathology. While genetics and environment are major factors driving variable rates of aging, additional complexity arises because different organs, tissues, and cell types are intrinsically heterogeneous and exhibit different aging trajectories normally or in response to the stresses of the aging process (e.g., damage accumulation). Tackling the heterogeneity of aging requires new and specialized tools (e.g., single-cell analyses, mass spectrometry-based approaches, and advanced imaging) to identify novel signatures of aging across scales. Cutting-edge computational approaches are then needed to integrate these disparate datasets and elucidate network interactions between known aging hallmarks. There is also a need for improved, human cell-based models of aging to ensure that basic research findings are relevant to human aging and healthspan interventions. The San Diego Nathan Shock Center (SD-NSC) provides access to cutting-edge scientific resources to facilitate the study of the heterogeneity of aging in general and to promote the use of novel human cell models of aging. The center also has a robust Research Development Core that funds pilot projects on the heterogeneity of aging and organizes innovative training activities, including workshops and a personalized mentoring program, to help investigators new to the aging field succeed. Finally, the SD-NSC participates in outreach activities to educate the general community about the importance of aging research and promote the need for basic biology of aging research in particular.},
  author       = {Shadel, Gerald S. and Adams, Peter D. and Berggren, W. Travis and Diedrich, Jolene K. and Diffenderfer, Kenneth E. and Gage, Fred H. and Hah, Nasun and Hansen, Malene and HETZER, Martin W and Molina, Anthony J. A. and Manor, Uri and Marek, Kurt and O’Keefe, David D. and Pinto, Antonio F. M. and Sacco, Alessandra and Sharpee, Tatyana O. and Shokriev, Maxim N. and Zambetti, Stefania},
  issn         = {2509-2715},
  journal      = {GeroScience},
  keywords     = {Geriatrics and Gerontology, Aging},
  number       = {5},
  pages        = {2139--2148},
  publisher    = {Springer Nature},
  title        = {{The San Diego Nathan Shock Center: Tackling the heterogeneity of aging}},
  doi          = {10.1007/s11357-021-00426-x},
  volume       = {43},
  year         = {2021},
}

@inproceedings{11436,
  abstract     = {Asynchronous distributed algorithms are a popular way to reduce synchronization costs in large-scale optimization, and in particular for neural network training. However, for nonsmooth and nonconvex objectives, few convergence guarantees exist beyond cases where closed-form proximal operator solutions are available. As training most popular deep neural networks corresponds to optimizing nonsmooth and nonconvex objectives, there is a pressing need for such convergence guarantees. In this paper, we analyze for the first time the convergence of stochastic asynchronous optimization for this general class of objectives. In particular, we focus on stochastic subgradient methods allowing for block variable partitioning, where the shared model is asynchronously updated by concurrent processes. To this end, we use a probabilistic model which captures key features of real asynchronous scheduling between concurrent processes. Under this model, we establish convergence with probability one to an invariant set for stochastic subgradient methods with momentum. From a practical perspective, one issue with the family of algorithms that we consider is that they are not efficiently supported by machine learning frameworks, which mostly focus on distributed data-parallel strategies. To address this, we propose a new implementation strategy for shared-memory based training of deep neural networks for a partitioned but shared model in single- and multi-GPU settings. Based on this implementation, we achieve on average1.2x speed-up in comparison to state-of-the-art training methods for popular image classification tasks, without compromising accuracy.},
  author       = {Kungurtsev, Vyacheslav and Egan, Malcolm and Chatterjee, Bapi and Alistarh, Dan-Adrian},
  booktitle    = {35th AAAI Conference on Artificial Intelligence, AAAI 2021},
  isbn         = {9781713835974},
  issn         = {2374-3468},
  location     = {Virtual, Online},
  number       = {9B},
  pages        = {8209--8216},
  publisher    = {AAAI Press},
  title        = {{Asynchronous optimization methods for efficient training of deep neural networks with guarantees}},
  volume       = {35},
  year         = {2021},
}

@article{11446,
  abstract     = {Suppose that n is not a prime power and not twice a prime power. We prove that for any Hausdorff compactum X with a free action of the symmetric group Sn, there exists an Sn-equivariant map X→Rn whose image avoids the diagonal {(x,x,…,x)∈Rn∣x∈R}. Previously, the special cases of this statement for certain X were usually proved using the equivartiant obstruction theory. Such calculations are difficult and may become infeasible past the first (primary) obstruction. We take a different approach which allows us to prove the vanishing of all obstructions simultaneously. The essential step in the proof is classifying the possible degrees of Sn-equivariant maps from the boundary ∂Δn−1 of (n−1)-simplex to itself. Existence of equivariant maps between spaces is important for many questions arising from discrete mathematics and geometry, such as Kneser’s conjecture, the Square Peg conjecture, the Splitting Necklace problem, and the Topological Tverberg conjecture, etc. We demonstrate the utility of our result applying it to one such question, a specific instance of envy-free division problem.},
  author       = {Avvakumov, Sergey and Kudrya, Sergey},
  issn         = {1432-0444},
  journal      = {Discrete & Computational Geometry},
  keywords     = {Computational Theory and Mathematics, Discrete Mathematics and Combinatorics, Geometry and Topology, Theoretical Computer Science},
  number       = {3},
  pages        = {1202--1216},
  publisher    = {Springer Nature},
  title        = {{Vanishing of all equivariant obstructions and the mapping degree}},
  doi          = {10.1007/s00454-021-00299-z},
  volume       = {66},
  year         = {2021},
}

@inproceedings{11452,
  abstract     = {We study efficient distributed algorithms for the fundamental problem of principal component analysis and leading eigenvector computation on the sphere, when the data are randomly distributed among a set of computational nodes. We propose a new quantized variant of Riemannian gradient descent to solve this problem, and prove that the algorithm converges with high probability under a set of necessary spherical-convexity properties. We give bounds on the number of bits transmitted by the algorithm under common initialization schemes, and investigate the dependency on the problem dimension in each case.},
  author       = {Alimisis, Foivos and Davies, Peter and Vandereycken, Bart and Alistarh, Dan-Adrian},
  booktitle    = {Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems},
  isbn         = {9781713845393},
  issn         = {1049-5258},
  location     = {Virtual, Online},
  pages        = {2823--2834},
  publisher    = {Neural Information Processing Systems Foundation},
  title        = {{Distributed principal component analysis with limited communication}},
  volume       = {4},
  year         = {2021},
}

@inproceedings{11453,
  abstract     = {Neuronal computations depend on synaptic connectivity and intrinsic electrophysiological properties. Synaptic connectivity determines which inputs from presynaptic neurons are integrated, while cellular properties determine how inputs are filtered over time. Unlike their biological counterparts, most computational approaches to learning in simulated neural networks are limited to changes in synaptic connectivity. However, if intrinsic parameters change, neural computations are altered drastically. Here, we include the parameters that determine the intrinsic properties,
e.g., time constants and reset potential, into the learning paradigm. Using sparse feedback signals that indicate target spike times, and gradient-based parameter updates, we show that the intrinsic parameters can be learned along with the synaptic weights to produce specific input-output functions. Specifically, we use a teacher-student paradigm in which a randomly initialised leaky integrate-and-fire or resonate-and-fire neuron must recover the parameters of a teacher neuron. We show that complex temporal functions can be learned online and without backpropagation through time, relying on event-based updates only. Our results are a step towards online learning of neural computations from ungraded and unsigned sparse feedback signals with a biologically inspired learning mechanism.},
  author       = {Braun, Lukas and Vogels, Tim P},
  booktitle    = {Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems},
  isbn         = {9781713845393},
  issn         = {1049-5258},
  location     = {Virtual, Online},
  pages        = {16437--16450},
  publisher    = {Neural Information Processing Systems Foundation},
  title        = {{Online learning of neural computations from sparse temporal feedback}},
  volume       = {20},
  year         = {2021},
}

@inproceedings{11458,
  abstract     = {The increasing computational requirements of deep neural networks (DNNs) have led to significant interest in obtaining DNN models that are sparse, yet accurate. Recent work has investigated the even harder case of sparse training, where the DNN weights are, for as much as possible, already sparse to reduce computational costs during training. Existing sparse training methods are often empirical and can have lower accuracy relative to the dense baseline. In this paper, we present a general approach called Alternating Compressed/DeCompressed (AC/DC) training of DNNs, demonstrate convergence for a variant of the algorithm, and show that AC/DC outperforms existing sparse training methods in accuracy at similar computational budgets; at high sparsity levels, AC/DC even outperforms existing methods that rely on accurate pre-trained dense models. An important property of AC/DC is that it allows co-training of dense and sparse models, yielding accurate sparse–dense model pairs at the end of the training process. This is useful in practice, where compressed variants may be desirable for deployment in resource-constrained settings without re-doing the entire training flow, and also provides us with insights into the accuracy gap between dense and compressed models. The code is available at: https://github.com/IST-DASLab/ACDC.},
  author       = {Peste, Elena-Alexandra and Iofinova, Eugenia B and Vladu, Adrian and Alistarh, Dan-Adrian},
  booktitle    = {35th Conference on Neural Information Processing Systems},
  isbn         = {9781713845393},
  issn         = {1049-5258},
  location     = {Virtual, Online},
  pages        = {8557--8570},
  publisher    = {Curran Associates},
  title        = {{AC/DC: Alternating Compressed/DeCompressed training of deep neural networks}},
  volume       = {34},
  year         = {2021},
}

@inproceedings{11463,
  abstract     = {Efficiently approximating local curvature information of the loss function is a key tool for optimization and compression of deep neural networks. Yet, most existing methods to approximate second-order information have high computational
or storage costs, which limits their practicality. In this work, we investigate matrix-free, linear-time approaches for estimating Inverse-Hessian Vector Products (IHVPs) for the case when the Hessian can be approximated as a sum of rank-one matrices, as in the classic approximation of the Hessian by the empirical Fisher matrix. We propose two new algorithms: the first is tailored towards network compression and can compute the IHVP for dimension d, if the Hessian is given as a sum of m rank-one matrices, using O(dm2) precomputation, O(dm) cost for computing the IHVP, and query cost O(m) for any single element of the inverse Hessian. The second algorithm targets an optimization setting, where we wish to compute the product between the inverse Hessian, estimated over a sliding window of optimization steps, and a given gradient direction, as required for preconditioned SGD. We give an algorithm with cost O(dm + m2) for computing the IHVP and O(dm + m3) for adding or removing any gradient from the sliding window. These
two algorithms yield state-of-the-art results for network pruning and optimization with lower computational overhead relative to existing second-order methods. Implementations are available at [9] and [17].},
  author       = {Frantar, Elias and Kurtic, Eldar and Alistarh, Dan-Adrian},
  booktitle    = {35th Conference on Neural Information Processing Systems},
  isbn         = {9781713845393},
  issn         = {1049-5258},
  location     = {Virtual, Online},
  pages        = {14873--14886},
  publisher    = {Curran Associates},
  title        = {{M-FAC: Efficient matrix-free approximations of second-order information}},
  volume       = {34},
  year         = {2021},
}

@inproceedings{11464,
  abstract     = {We consider a standard distributed optimisation setting where N machines, each holding a d-dimensional function
fi, aim to jointly minimise the sum of the functions ∑Ni=1fi(x). This problem arises naturally in large-scale distributed optimisation, where a standard solution is to apply variants of (stochastic) gradient descent. We focus on the communication complexity of this problem: our main result provides the first fully unconditional bounds on total number of bits which need to be sent and received by the N machines to solve this problem under point-to-point communication, within a given error-tolerance. Specifically, we show that Ω(Ndlogd/Nε) total bits need to be communicated between the machines to find an additive ϵ-approximation to the minimum of ∑Ni=1fi(x). The result holds for both deterministic and randomised algorithms, and, importantly, requires no assumptions on the algorithm structure. The lower bound is tight under certain restrictions on parameter values, and is matched within constant factors for quadratic objectives by a new variant of quantised gradient descent, which we describe and analyse. Our results bring over tools from communication complexity to distributed optimisation, which has potential for further applications.},
  author       = {Alistarh, Dan-Adrian and Korhonen, Janne},
  booktitle    = {35th Conference on Neural Information Processing Systems},
  isbn         = {9781713845393},
  issn         = {1049-5258},
  location     = {Virtual, Online},
  pages        = {7254--7266},
  publisher    = {Curran Associates},
  title        = {{Towards tight communication lower bounds for distributed optimisation}},
  volume       = {34},
  year         = {2021},
}

@article{11498,
  abstract     = {Rest-frame ultraviolet (UV) emission lines probe electron densities, gas-phase abundances, metallicities, and ionization parameters of the emitting star-forming galaxies and their environments. The strongest main UV emission line, Lyα, has been instrumental in advancing the general knowledge of galaxy formation in the early universe. However, observing Lyα emission becomes increasingly challenging at z ≳ 6 when the neutral hydrogen fraction of the circumgalactic and intergalactic media increases. Secondary weaker UV emission lines provide important alternative methods for studying galaxy properties at high redshift. We present a large sample of rest-frame UV emission line sources at intermediate redshift for calibrating and exploring the connection between secondary UV lines and the emitting galaxies’ physical properties and their Lyα emission. The sample of 2052 emission line sources with 1.5 < z < 6.4 was collected from integral field data from the MUSE-Wide and MUSE-Deep surveys taken as part of Guaranteed Time Observations. The objects were selected through untargeted source detection (i.e., no preselection of sources as in dedicated spectroscopic campaigns) in the three-dimensional MUSE data cubes. We searched optimally extracted one-dimensional spectra of the full sample for UV emission features via emission line template matching, resulting in a sample of more than 100 rest-frame UV emission line detections. We show that the detection efficiency of (non-Lyα) UV emission lines increases with survey depth, and that the emission line strength of He IIλ1640 Å, [O III] λ1661 + O III] λ1666, and [Si III] λ1883 + Si III] λ1892 correlate with the strength of [C III] λ1907 + C III] λ1909. The rest-frame equivalent width (EW0) of [C III] λ1907 + C III] λ1909 is found to be roughly 0.22 ± 0.18 of EW0(Lyα). We measured the velocity offsets of resonant emission lines with respect to systemic tracers. For C IVλ1548 + C IVλ1551 we find that ΔvC IV ≲ 250 km s−1, whereas ΔvLyα falls in the range of 250−500 km s−1 which is in agreement with previous results from the literature. The electron density ne measured from [Si III] λ1883 + Si III] λ1892 and [C III] λ1907 + C III] λ1909 line flux ratios is generally < 105 cm−3 and the gas-phase abundance is below solar at 12 + log10(O/H)≈8. Lastly, we used “PhotoIonization Model Probability Density Functions” to infer physical parameters of the full sample and individual systems based on photoionization model parameter grids and observational constraints from our UV emission line searches. This reveals that the UV line emitters generally have ionization parameter log10(U) ≈ −2.5 and metal mass fractions that scatter around Z ≈ 10−2, that is Z ≈ 0.66 Z⊙. Value-added catalogs of the full sample of MUSE objects studied in this work and a collection of UV line emitters from the literature are provided with this paper.},
  author       = {Schmidt, K. B. and Kerutt, J. and Wisotzki, L. and Urrutia, T. and Feltre, A. and Maseda, M. V. and Nanayakkara, T. and Bacon, R. and Boogaard, L. A. and Conseil, S. and Contini, T. and Herenz, E. C. and Kollatschny, W. and Krumpe, M. and Leclercq, F. and Mahler, G. and Matthee, Jorryt J and Mauerhofer, V. and Richard, J. and Schaye, J.},
  issn         = {1432-0746},
  journal      = {Astronomy & Astrophysics},
  keywords     = {Space and Planetary Science, Astronomy and Astrophysics, ultraviolet: galaxies / galaxies: high-redshift / galaxies: ISM / ISM: lines and bands / methods: observational / techniques: imaging spectroscopy},
  publisher    = {EDP Sciences},
  title        = {{Recovery and analysis of rest-frame UV emission lines in 2052 galaxies observed with MUSE at 1.5 < z < 6.4}},
  doi          = {10.1051/0004-6361/202140876},
  volume       = {654},
  year         = {2021},
}