@article{6488,
  abstract     = {We prove a central limit theorem for the difference of linear eigenvalue statistics of a sample covariance matrix W˜ and its minor W. We find that the fluctuation of this difference is much smaller than those of the individual linear statistics, as a consequence of the strong correlation between the eigenvalues of W˜ and W. Our result identifies the fluctuation of the spatial derivative of the approximate Gaussian field in the recent paper by Dumitru and Paquette. Unlike in a similar result for Wigner matrices, for sample covariance matrices, the fluctuation may entirely vanish.},
  author       = {Cipolloni, Giorgio and Erdös, László},
  issn         = {20103271},
  journal      = {Random Matrices: Theory and Application},
  number       = {3},
  publisher    = {World Scientific Publishing},
  title        = {{Fluctuations for differences of linear eigenvalue statistics for sample covariance matrices}},
  doi          = {10.1142/S2010326320500069},
  volume       = {9},
  year         = {2020},
}

@article{6563,
  abstract     = {This paper presents two algorithms. The first decides the existence of a pointed homotopy between given simplicial maps 𝑓,𝑔:𝑋→𝑌, and the second computes the group [𝛴𝑋,𝑌]∗ of pointed homotopy classes of maps from a suspension; in both cases, the target Y is assumed simply connected. More generally, these algorithms work relative to 𝐴⊆𝑋.},
  author       = {Filakovský, Marek and Vokřínek, Lukas},
  issn         = {16153383},
  journal      = {Foundations of Computational Mathematics},
  pages        = {311--330},
  publisher    = {Springer Nature},
  title        = {{Are two given maps homotopic? An algorithmic viewpoint}},
  doi          = {10.1007/s10208-019-09419-x},
  volume       = {20},
  year         = {2020},
}

@article{6593,
  abstract     = {We consider the monotone variational inequality problem in a Hilbert space and describe a projection-type method with inertial terms under the following properties: (a) The method generates a strongly convergent iteration sequence; (b) The method requires, at each iteration, only one projection onto the feasible set and two evaluations of the operator; (c) The method is designed for variational inequality for which the underline operator is monotone and uniformly continuous; (d) The method includes an inertial term. The latter is also shown to speed up the convergence in our numerical results. A comparison with some related methods is given and indicates that the new method is promising.},
  author       = {Shehu, Yekini and Li, Xiao-Huan and Dong, Qiao-Li},
  issn         = {1572-9265},
  journal      = {Numerical Algorithms},
  pages        = {365--388},
  publisher    = {Springer Nature},
  title        = {{An efficient projection-type method for monotone variational inequalities in Hilbert spaces}},
  doi          = {10.1007/s11075-019-00758-y},
  volume       = {84},
  year         = {2020},
}

@article{6649,
  abstract     = {While Hartree–Fock theory is well established as a fundamental approximation for interacting fermions, it has been unclear how to describe corrections to it due to many-body correlations. In this paper we start from the Hartree–Fock state given by plane waves and introduce collective particle–hole pair excitations. These pairs can be approximately described by a bosonic quadratic Hamiltonian. We use Bogoliubov theory to construct a trial state yielding a rigorous Gell-Mann–Brueckner–type upper bound to the ground state energy. Our result justifies the random-phase approximation in the mean-field scaling regime, for repulsive, regular interaction potentials.
},
  author       = {Benedikter, Niels P and Nam, Phan Thành and Porta, Marcello and Schlein, Benjamin and Seiringer, Robert},
  issn         = {1432-0916},
  journal      = {Communications in Mathematical Physics},
  pages        = {2097–2150},
  publisher    = {Springer Nature},
  title        = {{Optimal upper bound for the correlation energy of a Fermi gas in the mean-field regime}},
  doi          = {10.1007/s00220-019-03505-5},
  volume       = {374},
  year         = {2020},
}

@article{6748,
  abstract     = {Fitting a function by using linear combinations of a large number N of `simple' components is one of the most fruitful ideas in statistical learning. This idea lies at the core of a variety of methods, from two-layer neural networks to kernel regression, to boosting. In general, the resulting risk minimization problem is non-convex and is solved by gradient descent or its variants. Unfortunately, little is known about global convergence properties of these approaches.
Here we consider the problem of learning a concave function f on a compact convex domain Ω⊆ℝd, using linear combinations of `bump-like' components (neurons). The parameters to be fitted are the centers of N bumps, and the resulting empirical risk minimization problem is highly non-convex. We prove that, in the limit in which the number of neurons diverges, the evolution of gradient descent converges to a Wasserstein gradient flow in the space of probability distributions over Ω. Further, when the bump width δ tends to 0, this gradient flow has a limit which is a viscous porous medium equation. Remarkably, the cost function optimized by this gradient flow exhibits a special property known as displacement convexity, which implies exponential convergence rates for N→∞, δ→0. Surprisingly, this asymptotic theory appears to capture well the behavior for moderate values of δ,N. Explaining this phenomenon, and understanding the dependence on δ,N in a quantitative manner remains an outstanding challenge.},
  author       = {Javanmard, Adel and Mondelli, Marco and Montanari, Andrea},
  issn         = {1941-7330},
  journal      = {Annals of Statistics},
  number       = {6},
  pages        = {3619--3642},
  publisher    = {Institute of Mathematical Statistics},
  title        = {{Analysis of a two-layer neural network via displacement convexity}},
  doi          = {10.1214/20-AOS1945},
  volume       = {48},
  year         = {2020},
}

@article{6761,
  abstract     = {In resource allocation games, selfish players share resources that are needed in order to fulfill their objectives. The cost of using a resource depends on the load on it. In the traditional setting, the players make their choices concurrently and in one-shot. That is, a strategy for a player is a subset of the resources. We introduce and study dynamic resource allocation games. In this setting, the game proceeds in phases. In each phase each player chooses one resource. A scheduler dictates the order in which the players proceed in a phase, possibly scheduling several players to proceed concurrently. The game ends when each player has collected a set of resources that fulfills his objective. The cost for each player then depends on this set as well as on the load on the resources in it – we consider both congestion and cost-sharing games. We argue that the dynamic setting is the suitable setting for many applications in practice. We study the stability of dynamic resource allocation games, where the appropriate notion of stability is that of subgame perfect equilibrium, study the inefficiency incurred due to selfish behavior, and also study problems that are particular to the dynamic setting, like constraints on the order in which resources can be chosen or the problem of finding a scheduler that achieves stability.},
  author       = {Avni, Guy and Henzinger, Thomas A and Kupferman, Orna},
  issn         = {03043975},
  journal      = {Theoretical Computer Science},
  pages        = {42--55},
  publisher    = {Elsevier},
  title        = {{Dynamic resource allocation games}},
  doi          = {10.1016/j.tcs.2019.06.031},
  volume       = {807},
  year         = {2020},
}

@article{6796,
  abstract     = {Nearby grid cells have been observed to express a remarkable degree of long-rangeorder, which is often idealized as extending potentially to infinity. Yet their strict peri-odic firing and ensemble coherence are theoretically possible only in flat environments, much unlike the burrows which rodents usually live in. Are the symmetrical, coherent grid maps inferred in the lab relevant to chart their way in their natural habitat? We consider spheres as simple models of curved environments and waiting for the appropriate experiments to be performed, we use our adaptation model to predict what grid maps would emerge in a network with the same type of recurrent connections, which on the plane produce coherence among the units. We find that on the sphere such connections distort the maps that single grid units would express on their own, and aggregate them into clusters. When remapping to a different spherical environment, units in each cluster maintain only partial coherence, similar to what is observed in disordered materials, such as spin glasses.},
  author       = {Stella, Federico and Urdapilleta, Eugenio and Luo, Yifan and Treves, Alessandro},
  issn         = {10981063},
  journal      = {Hippocampus},
  number       = {4},
  pages        = {302--313},
  publisher    = {Wiley},
  title        = {{Partial coherence and frustration in self-organizing spherical grids}},
  doi          = {10.1002/hipo.23144},
  volume       = {30},
  year         = {2020},
}

@article{6808,
  abstract     = {Super-resolution fluorescence microscopy has become an important catalyst for discovery in the life sciences. In STimulated Emission Depletion (STED) microscopy, a pattern of light drives fluorophores from a signal-emitting on-state to a non-signalling off-state. Only emitters residing in a sub-diffraction volume around an intensity minimum are allowed to fluoresce, rendering them distinguishable from the nearby, but dark fluorophores. STED routinely achieves resolution in the few tens of nanometers range in biological samples and is suitable for live imaging. Here, we review the working principle of STED and provide general guidelines for successful STED imaging. The strive for ever higher resolution comes at the cost of increased light burden. We discuss techniques to reduce light exposure and mitigate its detrimental effects on the specimen. These include specialized illumination strategies as well as protecting fluorophores from photobleaching mediated by high-intensity STED light. This opens up the prospect of volumetric imaging in living cells and tissues with diffraction-unlimited resolution in all three spatial dimensions.},
  author       = {Jahr, Wiebke and Velicky, Philipp and Danzl, Johann G},
  issn         = {1046-2023},
  journal      = {Methods},
  number       = {3},
  pages        = {27--41},
  publisher    = {Elsevier},
  title        = {{Strategies to maximize performance in STimulated Emission Depletion (STED) nanoscopy of biological specimens}},
  doi          = {10.1016/j.ymeth.2019.07.019},
  volume       = {174},
  year         = {2020},
}

@article{6906,
  abstract     = {We consider systems of bosons trapped in a box, in the Gross–Pitaevskii regime. We show that low-energy states exhibit complete Bose–Einstein condensation with an optimal bound on the number of orthogonal excitations. This extends recent results obtained in Boccato et al. (Commun Math Phys 359(3):975–1026, 2018), removing the assumption of small interaction potential.},
  author       = {Boccato, Chiara and Brennecke, Christian and Cenatiempo, Serena and Schlein, Benjamin},
  issn         = {1432-0916},
  journal      = {Communications in Mathematical Physics},
  pages        = {1311--1395},
  publisher    = {Springer},
  title        = {{Optimal rate for Bose-Einstein condensation in the Gross-Pitaevskii regime}},
  doi          = {10.1007/s00220-019-03555-9},
  volume       = {376},
  year         = {2020},
}

@unpublished{10012,
  abstract     = {We prove that in the absence of topological changes, the notion of BV solutions to planar multiphase mean curvature flow does not allow for a mechanism for (unphysical) non-uniqueness. Our approach is based on the local structure of the energy landscape near a classical evolution by mean curvature. Mean curvature flow being the gradient flow of the surface energy functional, we develop a gradient-flow analogue of the notion of calibrations. Just like the existence of a calibration guarantees that one has reached a global minimum in the energy landscape, the existence of a "gradient flow calibration" ensures that the route of steepest descent in the energy landscape is unique and stable.},
  author       = {Fischer, Julian L and Hensel, Sebastian and Laux, Tim and Simon, Thilo},
  booktitle    = {arXiv},
  title        = {{The local structure of the energy landscape in multiphase mean curvature flow: weak-strong uniqueness and stability of evolutions}},
  year         = {2020},
}

@unpublished{10022,
  abstract     = {We consider finite-volume approximations of Fokker-Planck equations on bounded convex domains in R^d and study the corresponding gradient flow structures. We reprove the convergence of the discrete to continuous Fokker-Planck equation via the method of Evolutionary Γ-convergence, i.e., we pass to the limit at the level of the gradient flow structures, generalising the one-dimensional result obtained by Disser and Liero. The proof is of variational nature and relies on a Mosco convergence result for functionals in the discrete-to-continuum limit that is of independent interest. Our results apply to arbitrary regular meshes, even though the associated discrete transport distances may fail to converge to the Wasserstein distance in this generality.},
  author       = {Forkert, Dominik L and Maas, Jan and Portinale, Lorenzo},
  booktitle    = {arXiv},
  pages        = {33},
  title        = {{Evolutionary Γ-convergence of entropic gradient flow structures for Fokker-Planck equations in multiple dimensions}},
  year         = {2020},
}

@inproceedings{10328,
  abstract     = {We discus noise channels in coherent electro-optic up-conversion between microwave and optical fields, in particular due to optical heating. We also report on a novel configuration, which promises to be flexible and highly efficient.},
  author       = {Lambert, Nicholas J. and Mobassem, Sonia and Rueda Sanchez, Alfredo R and Schwefel, Harald G.L.},
  booktitle    = {OSA Quantum 2.0 Conference},
  isbn         = {9-781-5575-2820-9},
  location     = {Washington, DC, United States},
  publisher    = {Optica Publishing Group},
  title        = {{New designs and noise channels in electro-optic microwave to optical up-conversion}},
  doi          = {10.1364/QUANTUM.2020.QTu8A.1},
  year         = {2020},
}

@inproceedings{10556,
  abstract     = {In this paper, we present the first Asynchronous Distributed Key Generation (ADKG) algorithm which is also the first distributed key generation algorithm that can generate cryptographic keys with a dual (f,2f+1)-threshold (where f is the number of faulty parties). As a result, using our ADKG we remove the trusted setup assumption that the most scalable consensus algorithms make. In order to create a DKG with a dual (f,2f+1)- threshold we first answer in the affirmative the open question posed by Cachin et al. [7] on how to create an Asynchronous Verifiable Secret Sharing (AVSS) protocol with a reconstruction threshold of f+1<k łe 2f+1, which is of independent interest. Our High-threshold-AVSS (HAVSS) uses an asymmetric bivariate polynomial to encode the secret. This enables the reconstruction of the secret only if a set of k nodes contribute while allowing an honest node that did not participate in the sharing phase to recover his share with the help of f+1 honest parties. Once we have HAVSS we can use it to bootstrap scalable partially synchronous consensus protocols, but the question on how to get a DKG in asynchrony remains as we need a way to produce common randomness. The solution comes from a novel Eventually Perfect Common Coin (EPCC) abstraction that enables the generation of a common coin from n concurrent HAVSS invocations. EPCC's key property is that it is eventually reliable, as it might fail to agree at most f times (even if invoked a polynomial number of times). Using EPCC we implement an Eventually Efficient Asynchronous Binary Agreement (EEABA) which is optimal when the EPCC agrees and protects safety when EPCC fails. Finally, using EEABA we construct the first ADKG which has the same overhead and expected runtime as the best partially-synchronous DKG (O(n4) words, O(f) rounds). As a corollary of our ADKG, we can also create the first Validated Asynchronous Byzantine Agreement (VABA) that does not need a trusted dealer to setup threshold signatures of degree n-f. Our VABA has an overhead of expected O(n2) words and O(1) time per instance, after an initial O(n4) words and O(f) time bootstrap via ADKG.},
  author       = {Kokoris Kogias, Eleftherios and Malkhi, Dahlia and Spiegelman, Alexander},
  booktitle    = {Proceedings of the 2020 ACM SIGSAC Conference on Computer and Communications Security},
  isbn         = {978-1-4503-7089-9},
  location     = {Virtual, United States},
  pages        = {1751–1767},
  publisher    = {Association for Computing Machinery},
  title        = {{Asynchronous distributed key generation for computationally-secure randomness, consensus, and threshold signatures}},
  doi          = {10.1145/3372297.3423364},
  year         = {2020},
}

@misc{10557,
  abstract     = {Data storage and retrieval systems, methods, and computer-readable media utilize a cryptographically verifiable data structure that facilitates verification of a transaction in a decentralized peer-to-peer environment using multi-hop backwards and forwards links. Backward links are cryptographic hashes of past records. Forward links are cryptographic signatures of future records that are added retroactively to records once the target block has been appended to the data structure.},
  author       = {Ford, Bryan and Gasse, Linus and Kokoris Kogias, Eleftherios and Jovanovic, Philipp},
  title        = {{Cryptographically verifiable data structure having multi-hop forward and backwards links and associated systems and methods}},
  year         = {2020},
}

@inproceedings{10672,
  abstract     = {The family of feedback alignment (FA) algorithms aims to provide a more biologically motivated alternative to backpropagation (BP), by substituting the computations that are unrealistic to be implemented in physical brains. While FA algorithms have been shown to work well in practice, there is a lack of rigorous theory proofing their learning capabilities. Here we introduce the first feedback alignment algorithm with provable learning guarantees. In contrast to existing work, we do not require any assumption about the size or depth of the network except that it has a single output neuron, i.e., such as for binary classification tasks. We show that our FA algorithm can deliver its theoretical promises in practice, surpassing the learning performance of existing FA methods and matching backpropagation in binary classification tasks. Finally, we demonstrate the limits of our FA variant when the number of output neurons grows beyond a certain quantity.},
  author       = {Lechner, Mathias},
  booktitle    = {8th International Conference on Learning Representations},
  location     = {Virtual ; Addis Ababa, Ethiopia},
  publisher    = {ICLR},
  title        = {{Learning representations for binary-classification without backpropagation}},
  year         = {2020},
}

@inproceedings{10673,
  abstract     = {We propose a neural information processing system obtained by re-purposing the function of a biological neural circuit model to govern simulated and real-world control tasks. Inspired by the structure of the nervous system of the soil-worm, C. elegans, we introduce ordinary neural circuits (ONCs), defined as the model of biological neural circuits reparameterized for the control of alternative tasks. We first demonstrate that ONCs realize networks with higher maximum flow compared to arbitrary wired networks. We then learn instances of ONCs to control a series of robotic tasks, including the autonomous parking of a real-world rover robot. For reconfiguration of the purpose of the neural circuit, we adopt a search-based optimization algorithm. Ordinary neural circuits perform on par and, in some cases, significantly surpass the performance of contemporary deep learning models. ONC networks are compact, 77% sparser than their counterpart neural controllers, and their neural dynamics are fully interpretable at the cell-level.},
  author       = {Hasani, Ramin and Lechner, Mathias and Amini, Alexander and Rus, Daniela and Grosu, Radu},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning},
  issn         = {2640-3498},
  location     = {Virtual},
  pages        = {4082--4093},
  title        = {{A natural lottery ticket winner: Reinforcement learning with ordinary neural circuits}},
  year         = {2020},
}

@inproceedings{9415,
  abstract     = {Optimizing convolutional neural networks for fast inference has recently become an extremely active area of research. One of the go-to solutions in this context is weight pruning, which aims to reduce computational and memory footprint by removing large subsets of the connections in a neural network. Surprisingly, much less attention has been given to exploiting sparsity in the activation maps, which tend to be naturally sparse in many settings thanks to the structure of rectified linear (ReLU) activation functions. In this paper, we present an in-depth analysis of methods for maximizing the sparsity of the activations in a trained neural network, and show that, when coupled with an efficient sparse-input convolution algorithm, we can leverage this sparsity for significant performance gains. To induce highly sparse activation maps without accuracy loss, we introduce a new regularization technique, coupled with a new threshold-based sparsification method based on a parameterized activation function called Forced-Activation-Threshold Rectified Linear Unit (FATReLU). We examine the impact of our methods on popular image classification models, showing that most architectures can adapt to significantly sparser activation maps without any accuracy loss. Our second contribution is showing that these these compression gains can be translated into inference speedups: we provide a new algorithm to enable fast convolution operations over networks with sparse activations, and show that it can enable significant speedups for end-to-end inference on a range of popular models on the large-scale ImageNet image classification task on modern Intel CPUs, with little or no retraining cost. },
  author       = {Kurtz, Mark and Kopinsky, Justin and Gelashvili, Rati and Matveev, Alexander and Carr, John and Goin, Michael and Leiserson, William and Moore, Sage and Nell, Bill and Shavit, Nir and Alistarh, Dan-Adrian},
  booktitle    = {37th International Conference on Machine Learning, ICML 2020},
  issn         = {2640-3498},
  location     = {Online},
  pages        = {5533--5543},
  title        = {{Inducing and exploiting activation sparsity for fast neural network inference}},
  volume       = {119},
  year         = {2020},
}

@article{9526,
  abstract     = {DNA methylation and histone H1 mediate transcriptional silencing of genes and transposable elements, but how they interact is unclear. In plants and animals with mosaic genomic methylation, functionally mysterious methylation is also common within constitutively active housekeeping genes. Here, we show that H1 is enriched in methylated sequences, including genes, of Arabidopsis thaliana, yet this enrichment is independent of DNA methylation. Loss of H1 disperses heterochromatin, globally alters nucleosome organization, and activates H1-bound genes, but only weakly de-represses transposable elements. However, H1 loss strongly activates transposable elements hypomethylated through mutation of DNA methyltransferase MET1. Hypomethylation of genes also activates antisense transcription, which is modestly enhanced by H1 loss. Our results demonstrate that H1 and DNA methylation jointly maintain transcriptional homeostasis by silencing transposable elements and aberrant intragenic transcripts. Such functionality plausibly explains why DNA methylation, a well-known mutagen, has been maintained within coding sequences of crucial plant and animal genes.},
  author       = {Choi, Jaemyung and Lyons, David B. and Kim, M. Yvonne and Moore, Jonathan D. and Zilberman, Daniel},
  issn         = {1097-4164},
  journal      = {Molecular Cell},
  number       = {2},
  pages        = {310--323.e7},
  publisher    = {Elsevier},
  title        = {{DNA methylation and histone H1 jointly repress transposable elements and aberrant intragenic transcripts}},
  doi          = {10.1016/j.molcel.2019.10.011},
  volume       = {77},
  year         = {2020},
}

@article{9630,
  abstract     = {Various kinds of data are routinely represented as discrete probability distributions. Examples include text documents summarized by histograms of word occurrences and images represented as histograms of oriented gradients. Viewing a discrete probability distribution as a point in the standard simplex of the appropriate dimension, we can understand collections of such objects in geometric and topological terms.  Importantly, instead of using the standard Euclidean distance, we look into dissimilarity measures with information-theoretic justification, and we develop the theory needed for applying topological data analysis in this setting. In doing so, we emphasize constructions that enable the usage of existing computational topology software in this context.},
  author       = {Edelsbrunner, Herbert and Virk, Ziga and Wagner, Hubert},
  issn         = {1920180X},
  journal      = {Journal of Computational Geometry},
  number       = {2},
  pages        = {162--182},
  publisher    = {Carleton University},
  title        = {{Topological data analysis in information space}},
  doi          = {10.20382/jocg.v11i2a7},
  volume       = {11},
  year         = {2020},
}

@inproceedings{9631,
  abstract     = {The ability to leverage large-scale hardware parallelism has been one of the key enablers of the accelerated recent progress in machine learning. Consequently, there has been considerable effort invested into developing efficient parallel variants of classic machine learning algorithms. However, despite the wealth of knowledge on parallelization, some classic machine learning algorithms often prove hard to parallelize efficiently while maintaining convergence. In this paper, we focus on efficient parallel algorithms for the key machine learning task of inference on graphical models, in particular on the fundamental belief propagation algorithm. We address the challenge of efficiently parallelizing this classic paradigm by showing how to leverage scalable relaxed schedulers in this context. We present an extensive empirical study, showing that our approach outperforms previous parallel belief propagation implementations both in terms of scalability and in terms of wall-clock convergence time, on a range of practical applications.},
  author       = {Aksenov, Vitaly and Alistarh, Dan-Adrian and Korhonen, Janne},
  booktitle    = {Advances in Neural Information Processing Systems},
  isbn         = {9781713829546},
  issn         = {10495258},
  location     = {Vancouver, Canada},
  pages        = {22361--22372},
  publisher    = {Curran Associates},
  title        = {{Scalable belief propagation via relaxed scheduling}},
  volume       = {33},
  year         = {2020},
}

