[{"department":[{"_id":"DaAl"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","date_published":"2021-12-01T00:00:00Z","external_id":{"arxiv":["2110.14391"]},"scopus_import":"1","date_created":"2022-06-19T22:01:58Z","intvolume":"         4","main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2021/file/1680e9fa7b4dd5d62ece800239bb53bd-Paper.pdf","open_access":"1"}],"publisher":"Neural Information Processing Systems Foundation","quality_controlled":"1","type":"conference","language":[{"iso":"eng"}],"oa":1,"year":"2021","publication":"Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems","date_updated":"2022-06-20T08:31:52Z","page":"2823-2834","day":"01","citation":{"chicago":"Alimisis, Foivos, Peter Davies, Bart Vandereycken, and Dan-Adrian Alistarh. “Distributed Principal Component Analysis with Limited Communication.” In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, 4:2823–34. Neural Information Processing Systems Foundation, 2021.","mla":"Alimisis, Foivos, et al. “Distributed Principal Component Analysis with Limited Communication.” <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, vol. 4, Neural Information Processing Systems Foundation, 2021, pp. 2823–34.","ieee":"F. Alimisis, P. Davies, B. Vandereycken, and D.-A. Alistarh, “Distributed principal component analysis with limited communication,” in <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 4, pp. 2823–2834.","apa":"Alimisis, F., Davies, P., Vandereycken, B., &#38; Alistarh, D.-A. (2021). Distributed principal component analysis with limited communication. In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i> (Vol. 4, pp. 2823–2834). Virtual, Online: Neural Information Processing Systems Foundation.","ista":"Alimisis F, Davies P, Vandereycken B, Alistarh D-A. 2021. Distributed principal component analysis with limited communication. Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 4, 2823–2834.","ama":"Alimisis F, Davies P, Vandereycken B, Alistarh D-A. Distributed principal component analysis with limited communication. In: <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>. Vol 4. Neural Information Processing Systems Foundation; 2021:2823-2834.","short":"F. Alimisis, P. Davies, B. Vandereycken, D.-A. Alistarh, in:, Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 2823–2834."},"month":"12","conference":{"end_date":"2021-12-14","start_date":"2021-12-06","location":"Virtual, Online","name":"NeurIPS: Neural Information Processing Systems"},"oa_version":"Published Version","arxiv":1,"volume":4,"article_processing_charge":"No","author":[{"first_name":"Foivos","full_name":"Alimisis, Foivos","last_name":"Alimisis"},{"orcid":"0000-0002-5646-9524","full_name":"Davies, Peter","last_name":"Davies","id":"11396234-BB50-11E9-B24C-90FCE5697425","first_name":"Peter"},{"first_name":"Bart","full_name":"Vandereycken, Bart","last_name":"Vandereycken"},{"first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X"}],"title":"Distributed principal component analysis with limited communication","_id":"11452","abstract":[{"text":"We study efficient distributed algorithms for the fundamental problem of principal component analysis and leading eigenvector computation on the sphere, when the data are randomly distributed among a set of computational nodes. We propose a new quantized variant of Riemannian gradient descent to solve this problem, and prove that the algorithm converges with high probability under a set of necessary spherical-convexity properties. We give bounds on the number of bits transmitted by the algorithm under common initialization schemes, and investigate the dependency on the problem dimension in each case.","lang":"eng"}],"publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"project":[{"call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223","_id":"268A44D6-B435-11E9-9278-68D0E5697425"},{"name":"ISTplus - Postdoctoral Fellowships","call_identifier":"H2020","grant_number":"754411","_id":"260C2330-B435-11E9-9278-68D0E5697425"}],"acknowledgement":"We would like to thank the anonymous reviewers for helpful comments and suggestions. We also thank Aurelien Lucchi and Antonio Orvieto for fruitful discussions at an early stage of this work. FA is partially supported by the SNSF under research project No. 192363 and conducted part of this work while at IST Austria under the European Union’s Horizon 2020 research and innovation programme (grant agreement No. 805223 ScaleML). PD partly conducted this work while at IST Austria and was supported by the European Union’s Horizon 2020 programme under the Marie Skłodowska-Curie grant agreement No. 754411.","publication_status":"published","status":"public","ec_funded":1},{"acknowledgement":"We would like to thank Professor Dr. Henning Sprekeler for his valuable suggestions and Dr. Andrew Saxe, Milan Klöwer and Anna Wallis for their constructive feedback on the manuscript. Lukas Braun was supported by the Network of European Neuroscience Schools through their NENS Exchange Grant program, by the European Union through their European Community Action Scheme for the Mobility of University Students, the Woodward Scholarship awarded by Wadham College, Oxford and the Medical Research Council [MR/N013468/1]. Tim P. Vogels was supported by a Wellcome Trust Senior Research Fellowship [214316/Z/18/Z].","status":"public","publication_status":"published","publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"project":[{"_id":"c084a126-5a5b-11eb-8a69-d75314a70a87","grant_number":"214316/Z/18/Z","name":"What’s in a memory? Spatiotemporal dynamics in strongly coupled recurrent neuronal networks."}],"abstract":[{"lang":"eng","text":"Neuronal computations depend on synaptic connectivity and intrinsic electrophysiological properties. Synaptic connectivity determines which inputs from presynaptic neurons are integrated, while cellular properties determine how inputs are filtered over time. Unlike their biological counterparts, most computational approaches to learning in simulated neural networks are limited to changes in synaptic connectivity. However, if intrinsic parameters change, neural computations are altered drastically. Here, we include the parameters that determine the intrinsic properties,\r\ne.g., time constants and reset potential, into the learning paradigm. Using sparse feedback signals that indicate target spike times, and gradient-based parameter updates, we show that the intrinsic parameters can be learned along with the synaptic weights to produce specific input-output functions. Specifically, we use a teacher-student paradigm in which a randomly initialised leaky integrate-and-fire or resonate-and-fire neuron must recover the parameters of a teacher neuron. We show that complex temporal functions can be learned online and without backpropagation through time, relying on event-based updates only. Our results are a step towards online learning of neural computations from ungraded and unsigned sparse feedback signals with a biologically inspired learning mechanism."}],"title":"Online learning of neural computations from sparse temporal feedback","author":[{"last_name":"Braun","full_name":"Braun, Lukas","first_name":"Lukas"},{"orcid":"0000-0003-3295-6181","id":"CB6FF8D2-008F-11EA-8E08-2637E6697425","first_name":"Tim P","full_name":"Vogels, Tim P","last_name":"Vogels"}],"_id":"11453","volume":20,"article_processing_charge":"No","oa_version":"Published Version","conference":{"end_date":"2021-12-14","location":"Virtual, Online","start_date":"2021-12-06","name":"NeurIPS: Neural Information Processing Systems"},"citation":{"short":"L. Braun, T.P. Vogels, in:, Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 16437–16450.","ama":"Braun L, Vogels TP. Online learning of neural computations from sparse temporal feedback. In: <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>. Vol 20. Neural Information Processing Systems Foundation; 2021:16437-16450.","ista":"Braun L, Vogels TP. 2021. Online learning of neural computations from sparse temporal feedback. Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 20, 16437–16450.","ieee":"L. Braun and T. P. Vogels, “Online learning of neural computations from sparse temporal feedback,” in <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 20, pp. 16437–16450.","apa":"Braun, L., &#38; Vogels, T. P. (2021). Online learning of neural computations from sparse temporal feedback. In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i> (Vol. 20, pp. 16437–16450). Virtual, Online: Neural Information Processing Systems Foundation.","chicago":"Braun, Lukas, and Tim P Vogels. “Online Learning of Neural Computations from Sparse Temporal Feedback.” In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, 20:16437–50. Neural Information Processing Systems Foundation, 2021.","mla":"Braun, Lukas, and Tim P. Vogels. “Online Learning of Neural Computations from Sparse Temporal Feedback.” <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, vol. 20, Neural Information Processing Systems Foundation, 2021, pp. 16437–50."},"day":"01","month":"12","page":"16437-16450","publication":"Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems","date_updated":"2022-06-20T07:12:58Z","year":"2021","oa":1,"language":[{"iso":"eng"}],"quality_controlled":"1","type":"conference","publisher":"Neural Information Processing Systems Foundation","intvolume":"        20","main_file_link":[{"open_access":"1","url":"https://proceedings.neurips.cc/paper/2021/file/88e1ce84f9feef5a08d0df0334c53468-Paper.pdf"}],"date_published":"2021-12-01T00:00:00Z","date_created":"2022-06-19T22:01:59Z","scopus_import":"1","department":[{"_id":"TiVo"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87"},{"year":"2021","oa":1,"related_material":{"record":[{"id":"13074","relation":"dissertation_contains","status":"public"}]},"page":"8557-8570","publication":"35th Conference on Neural Information Processing Systems","date_updated":"2023-06-01T12:54:45Z","external_id":{"arxiv":["2106.12379"]},"date_published":"2021-12-06T00:00:00Z","date_created":"2022-06-20T12:11:53Z","acknowledged_ssus":[{"_id":"ScienComp"}],"scopus_import":"1","department":[{"_id":"GradSch"},{"_id":"DaAl"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","language":[{"iso":"eng"}],"quality_controlled":"1","type":"conference","publisher":"Curran Associates","intvolume":"        34","main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2021/file/48000647b315f6f00f913caa757a70b3-Paper.pdf","open_access":"1"}],"abstract":[{"lang":"eng","text":"The increasing computational requirements of deep neural networks (DNNs) have led to significant interest in obtaining DNN models that are sparse, yet accurate. Recent work has investigated the even harder case of sparse training, where the DNN weights are, for as much as possible, already sparse to reduce computational costs during training. Existing sparse training methods are often empirical and can have lower accuracy relative to the dense baseline. In this paper, we present a general approach called Alternating Compressed/DeCompressed (AC/DC) training of DNNs, demonstrate convergence for a variant of the algorithm, and show that AC/DC outperforms existing sparse training methods in accuracy at similar computational budgets; at high sparsity levels, AC/DC even outperforms existing methods that rely on accurate pre-trained dense models. An important property of AC/DC is that it allows co-training of dense and sparse models, yielding accurate sparse–dense model pairs at the end of the training process. This is useful in practice, where compressed variants may be desirable for deployment in resource-constrained settings without re-doing the entire training flow, and also provides us with insights into the accuracy gap between dense and compressed models. The code is available at: https://github.com/IST-DASLab/ACDC."}],"title":"AC/DC: Alternating Compressed/DeCompressed training of deep neural networks","author":[{"id":"32D78294-F248-11E8-B48F-1D18A9856A87","first_name":"Elena-Alexandra","full_name":"Peste, Elena-Alexandra","last_name":"Peste"},{"orcid":"0000-0002-7778-3221","last_name":"Iofinova","full_name":"Iofinova, Eugenia B","first_name":"Eugenia B","id":"f9a17499-f6e0-11ea-865d-fdf9a3f77117"},{"first_name":"Adrian","full_name":"Vladu, Adrian","last_name":"Vladu"},{"orcid":"0000-0003-3650-940X","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"}],"_id":"11458","publication_status":"published","status":"public","acknowledgement":"This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML), and a CNRS PEPS grant. This research was supported by the Scientific Service Units (SSU) of IST Austria through resources provided by Scientific Computing (SciComp). We would also like to thank Christoph Lampert for his feedback on an earlier version of this work, as well as for providing hardware for the Transformer-XL experiments.","ec_funded":1,"publication_identifier":{"issn":["1049-5258"],"isbn":["9781713845393"]},"project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning"}],"oa_version":"Published Version","conference":{"name":"NeurIPS: Neural Information Processing Systems","start_date":"2021-12-06","location":"Virtual, Online","end_date":"2021-12-14"},"citation":{"chicago":"Peste, Elena-Alexandra, Eugenia B Iofinova, Adrian Vladu, and Dan-Adrian Alistarh. “AC/DC: Alternating Compressed/DeCompressed Training of Deep Neural Networks.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:8557–70. Curran Associates, 2021.","mla":"Peste, Elena-Alexandra, et al. “AC/DC: Alternating Compressed/DeCompressed Training of Deep Neural Networks.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Curran Associates, 2021, pp. 8557–70.","apa":"Peste, E.-A., Iofinova, E. B., Vladu, A., &#38; Alistarh, D.-A. (2021). AC/DC: Alternating Compressed/DeCompressed training of deep neural networks. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 8557–8570). Virtual, Online: Curran Associates.","ieee":"E.-A. Peste, E. B. Iofinova, A. Vladu, and D.-A. Alistarh, “AC/DC: Alternating Compressed/DeCompressed training of deep neural networks,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 8557–8570.","ista":"Peste E-A, Iofinova EB, Vladu A, Alistarh D-A. 2021. AC/DC: Alternating Compressed/DeCompressed training of deep neural networks. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 8557–8570.","ama":"Peste E-A, Iofinova EB, Vladu A, Alistarh D-A. AC/DC: Alternating Compressed/DeCompressed training of deep neural networks. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Curran Associates; 2021:8557-8570.","short":"E.-A. Peste, E.B. Iofinova, A. Vladu, D.-A. Alistarh, in:, 35th Conference on Neural Information Processing Systems, Curran Associates, 2021, pp. 8557–8570."},"day":"6","month":"12","article_processing_charge":"No","volume":34,"arxiv":1},{"project":[{"call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223","_id":"268A44D6-B435-11E9-9278-68D0E5697425"}],"publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"ec_funded":1,"acknowledgement":"We gratefully acknowledge funding the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML), as well as computational support from Amazon Web Services (AWS) EC2.","publication_status":"published","status":"public","_id":"11463","title":"M-FAC: Efficient matrix-free approximations of second-order information","author":[{"id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f","first_name":"Elias","full_name":"Frantar, Elias","last_name":"Frantar"},{"first_name":"Eldar","id":"47beb3a5-07b5-11eb-9b87-b108ec578218","last_name":"Kurtic","full_name":"Kurtic, Eldar"},{"full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X"}],"abstract":[{"text":"Efficiently approximating local curvature information of the loss function is a key tool for optimization and compression of deep neural networks. Yet, most existing methods to approximate second-order information have high computational\r\nor storage costs, which limits their practicality. In this work, we investigate matrix-free, linear-time approaches for estimating Inverse-Hessian Vector Products (IHVPs) for the case when the Hessian can be approximated as a sum of rank-one matrices, as in the classic approximation of the Hessian by the empirical Fisher matrix. We propose two new algorithms: the first is tailored towards network compression and can compute the IHVP for dimension d, if the Hessian is given as a sum of m rank-one matrices, using O(dm2) precomputation, O(dm) cost for computing the IHVP, and query cost O(m) for any single element of the inverse Hessian. The second algorithm targets an optimization setting, where we wish to compute the product between the inverse Hessian, estimated over a sliding window of optimization steps, and a given gradient direction, as required for preconditioned SGD. We give an algorithm with cost O(dm + m2) for computing the IHVP and O(dm + m3) for adding or removing any gradient from the sliding window. These\r\ntwo algorithms yield state-of-the-art results for network pruning and optimization with lower computational overhead relative to existing second-order methods. Implementations are available at [9] and [17].","lang":"eng"}],"arxiv":1,"article_processing_charge":"No","volume":34,"month":"12","citation":{"chicago":"Frantar, Elias, Eldar Kurtic, and Dan-Adrian Alistarh. “M-FAC: Efficient Matrix-Free Approximations of Second-Order Information.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:14873–86. Curran Associates, 2021.","mla":"Frantar, Elias, et al. “M-FAC: Efficient Matrix-Free Approximations of Second-Order Information.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Curran Associates, 2021, pp. 14873–86.","ama":"Frantar E, Kurtic E, Alistarh D-A. M-FAC: Efficient matrix-free approximations of second-order information. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Curran Associates; 2021:14873-14886.","ista":"Frantar E, Kurtic E, Alistarh D-A. 2021. M-FAC: Efficient matrix-free approximations of second-order information. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 14873–14886.","short":"E. Frantar, E. Kurtic, D.-A. Alistarh, in:, 35th Conference on Neural Information Processing Systems, Curran Associates, 2021, pp. 14873–14886.","ieee":"E. Frantar, E. Kurtic, and D.-A. Alistarh, “M-FAC: Efficient matrix-free approximations of second-order information,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 14873–14886.","apa":"Frantar, E., Kurtic, E., &#38; Alistarh, D.-A. (2021). M-FAC: Efficient matrix-free approximations of second-order information. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 14873–14886). Virtual, Online: Curran Associates."},"day":"06","oa_version":"Published Version","conference":{"end_date":"2021-12-14","start_date":"2021-12-06","location":"Virtual, Online","name":"NeurIPS: Neural Information Processing Systems"},"date_updated":"2022-06-27T07:05:12Z","publication":"35th Conference on Neural Information Processing Systems","page":"14873-14886","oa":1,"year":"2021","publisher":"Curran Associates","intvolume":"        34","main_file_link":[{"open_access":"1","url":"https://proceedings.neurips.cc/paper/2021/file/7cfd5df443b4eb0d69886a583b33de4c-Paper.pdf"}],"quality_controlled":"1","type":"conference","language":[{"iso":"eng"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","department":[{"_id":"DaAl"}],"date_created":"2022-06-26T22:01:35Z","scopus_import":"1","external_id":{"arxiv":["2010.08222"]},"date_published":"2021-12-06T00:00:00Z"},{"oa":1,"year":"2021","date_updated":"2022-06-27T06:54:31Z","publication":"35th Conference on Neural Information Processing Systems","page":"7254-7266","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","department":[{"_id":"DaAl"}],"scopus_import":"1","date_created":"2022-06-26T22:01:35Z","date_published":"2021-12-06T00:00:00Z","external_id":{"arxiv":["2010.08222"]},"intvolume":"        34","main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2021/file/3b92d18aa7a6176dd37d372bc2f1eb71-Paper.pdf","open_access":"1"}],"publisher":"Curran Associates","language":[{"iso":"eng"}],"quality_controlled":"1","type":"conference","_id":"11464","author":[{"orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian"},{"last_name":"Korhonen","full_name":"Korhonen, Janne","first_name":"Janne","id":"C5402D42-15BC-11E9-A202-CA2BE6697425"}],"title":"Towards tight communication lower bounds for distributed optimisation","abstract":[{"text":"We consider a standard distributed optimisation setting where N machines, each holding a d-dimensional function\r\nfi, aim to jointly minimise the sum of the functions ∑Ni=1fi(x). This problem arises naturally in large-scale distributed optimisation, where a standard solution is to apply variants of (stochastic) gradient descent. We focus on the communication complexity of this problem: our main result provides the first fully unconditional bounds on total number of bits which need to be sent and received by the N machines to solve this problem under point-to-point communication, within a given error-tolerance. Specifically, we show that Ω(Ndlogd/Nε) total bits need to be communicated between the machines to find an additive ϵ-approximation to the minimum of ∑Ni=1fi(x). The result holds for both deterministic and randomised algorithms, and, importantly, requires no assumptions on the algorithm structure. The lower bound is tight under certain restrictions on parameter values, and is matched within constant factors for quadratic objectives by a new variant of quantised gradient descent, which we describe and analyse. Our results bring over tools from communication complexity to distributed optimisation, which has potential for further applications.","lang":"eng"}],"project":[{"call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning","_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223"}],"publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"ec_funded":1,"publication_status":"published","status":"public","acknowledgement":"We thank the NeurIPS reviewers for insightful comments that helped us improve the positioning of our results, as well as for pointing out the subsampling approach for complementing the randomised lower bound. We also thank Foivos Alimisis and Peter Davies for useful discussions. This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).","month":"12","day":"06","citation":{"mla":"Alistarh, Dan-Adrian, and Janne Korhonen. “Towards Tight Communication Lower Bounds for Distributed Optimisation.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Curran Associates, 2021, pp. 7254–66.","chicago":"Alistarh, Dan-Adrian, and Janne Korhonen. “Towards Tight Communication Lower Bounds for Distributed Optimisation.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:7254–66. Curran Associates, 2021.","apa":"Alistarh, D.-A., &#38; Korhonen, J. (2021). Towards tight communication lower bounds for distributed optimisation. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 7254–7266). Virtual, Online: Curran Associates.","ieee":"D.-A. Alistarh and J. Korhonen, “Towards tight communication lower bounds for distributed optimisation,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 7254–7266.","ama":"Alistarh D-A, Korhonen J. Towards tight communication lower bounds for distributed optimisation. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Curran Associates; 2021:7254-7266.","short":"D.-A. Alistarh, J. Korhonen, in:, 35th Conference on Neural Information Processing Systems, Curran Associates, 2021, pp. 7254–7266.","ista":"Alistarh D-A, Korhonen J. 2021. Towards tight communication lower bounds for distributed optimisation. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 7254–7266."},"conference":{"start_date":"2021-12-06","location":"Virtual, Online","end_date":"2021-12-14","name":"NeurIPS: Neural Information Processing Systems"},"oa_version":"Published Version","arxiv":1,"article_processing_charge":"No","volume":34},{"publisher":"Neural Information Processing Systems Foundation","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2106.02356"}],"intvolume":"        35","type":"conference","quality_controlled":"1","language":[{"iso":"eng"}],"department":[{"_id":"MaMo"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","external_id":{"arxiv":["2106.02356"]},"date_published":"2021-12-01T00:00:00Z","date_created":"2022-01-03T10:50:02Z","scopus_import":"1","publication":"35th Conference on Neural Information Processing Systems","date_updated":"2024-09-10T13:03:19Z","page":"29616-29629","oa":1,"year":"2021","arxiv":1,"volume":35,"article_processing_charge":"No","citation":{"ama":"Mondelli M, Venkataramanan R. PCA initialization for approximate message passing in rotationally invariant models. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 35. Neural Information Processing Systems Foundation; 2021:29616-29629.","short":"M. Mondelli, R. Venkataramanan, in:, 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 29616–29629.","ista":"Mondelli M, Venkataramanan R. 2021. PCA initialization for approximate message passing in rotationally invariant models. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 35, 29616–29629.","ieee":"M. Mondelli and R. Venkataramanan, “PCA initialization for approximate message passing in rotationally invariant models,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, 2021, vol. 35, pp. 29616–29629.","apa":"Mondelli, M., &#38; Venkataramanan, R. (2021). PCA initialization for approximate message passing in rotationally invariant models. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 35, pp. 29616–29629). Virtual: Neural Information Processing Systems Foundation.","mla":"Mondelli, Marco, and Ramji Venkataramanan. “PCA Initialization for Approximate Message Passing in Rotationally Invariant Models.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 35, Neural Information Processing Systems Foundation, 2021, pp. 29616–29.","chicago":"Mondelli, Marco, and Ramji Venkataramanan. “PCA Initialization for Approximate Message Passing in Rotationally Invariant Models.” In <i>35th Conference on Neural Information Processing Systems</i>, 35:29616–29. Neural Information Processing Systems Foundation, 2021."},"day":"01","month":"12","oa_version":"Preprint","conference":{"start_date":"2021-12-06","location":"Virtual","end_date":"2021-12-14","name":"NeurIPS: Neural Information Processing Systems"},"publication_identifier":{"issn":["1049-5258"],"isbn":["9781713845393"]},"project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"}],"acknowledgement":"M. Mondelli would like to thank László Erdős for helpful discussions. M. Mondelli was partially supported by the 2019 Lopez-Loreta Prize. R. Venkataramanan was partially supported by the Alan Turing Institute under the EPSRC grant EP/N510129/1.\r\n","status":"public","publication_status":"published","title":"PCA initialization for approximate message passing in rotationally invariant models","author":[{"orcid":"0000-0002-3242-7020","full_name":"Mondelli, Marco","last_name":"Mondelli","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco"},{"first_name":"Ramji","last_name":"Venkataramanan","full_name":"Venkataramanan, Ramji"}],"_id":"10593","abstract":[{"text":"We study the problem of estimating a rank-$1$ signal in the presence of rotationally invariant noise-a class of perturbations more general than Gaussian noise. Principal Component Analysis (PCA) provides a natural estimator, and sharp results on its performance have been obtained in the high-dimensional regime. Recently, an Approximate Message Passing (AMP) algorithm has been proposed as an alternative estimator with the potential to improve the accuracy of PCA. However, the existing analysis of AMP requires an initialization that is both correlated with the signal and independent of the noise, which is often unrealistic in practice. In this work, we combine the two methods, and propose to initialize AMP with PCA. Our main result is a rigorous asymptotic characterization of the performance of this estimator. Both the AMP algorithm and its analysis differ from those previously derived in the Gaussian setting: at every iteration, our AMP algorithm requires a specific term to account for PCA initialization, while in the Gaussian case, PCA initialization affects only the first iteration of AMP. The proof is based on a two-phase artificial AMP that first approximates the PCA estimator and then mimics the true AMP. Our numerical simulations show an excellent agreement between AMP results and theoretical predictions, and suggest an interesting open direction on achieving Bayes-optimal performance.","lang":"eng"}]},{"article_processing_charge":"No","volume":35,"arxiv":1,"oa_version":"Preprint","conference":{"end_date":"2021-12-14","start_date":"2021-12-06","location":"Virtual","name":"35th Conference on Neural Information Processing Systems"},"citation":{"chicago":"Nguyen, Quynh, Pierre Bréchet, and Marco Mondelli. “When Are Solutions Connected in Deep Networks?” In <i>35th Conference on Neural Information Processing Systems</i>, Vol. 35. Neural Information Processing Systems Foundation, 2021.","mla":"Nguyen, Quynh, et al. “When Are Solutions Connected in Deep Networks?” <i>35th Conference on Neural Information Processing Systems</i>, vol. 35, Neural Information Processing Systems Foundation, 2021.","apa":"Nguyen, Q., Bréchet, P., &#38; Mondelli, M. (2021). When are solutions connected in deep networks? In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 35). Virtual: Neural Information Processing Systems Foundation.","ieee":"Q. Nguyen, P. Bréchet, and M. Mondelli, “When are solutions connected in deep networks?,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, 2021, vol. 35.","ama":"Nguyen Q, Bréchet P, Mondelli M. When are solutions connected in deep networks? In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 35. Neural Information Processing Systems Foundation; 2021.","short":"Q. Nguyen, P. Bréchet, M. Mondelli, in:, 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021.","ista":"Nguyen Q, Bréchet P, Mondelli M. 2021. When are solutions connected in deep networks? 35th Conference on Neural Information Processing Systems. 35th Conference on Neural Information Processing Systems vol. 35."},"day":"01","month":"12","acknowledgement":"MM was partially supported by the 2019 Lopez-Loreta Prize. QN and PB acknowledge support from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement no 757983).","publication_status":"published","status":"public","publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"}],"abstract":[{"text":"The question of how and why the phenomenon of mode connectivity occurs in training deep neural networks has gained remarkable attention in the research community. From a theoretical perspective, two possible explanations have been proposed: (i) the loss function has connected sublevel sets, and (ii) the solutions found by stochastic gradient descent are dropout stable. While these explanations provide insights into the phenomenon, their assumptions are not always satisfied in practice. In particular, the first approach requires the network to have one layer with order of N neurons (N being the number of training samples), while the second one requires the loss to be almost invariant after removing half of the neurons at each layer (up to some rescaling of the remaining ones). In this work, we improve both conditions by exploiting the quality of the features at every intermediate layer together with a milder over-parameterization condition. More specifically, we show that: (i) under generic assumptions on the features of intermediate layers, it suffices that the last two hidden layers have order of N−−√ neurons, and (ii) if subsets of features at each layer are linearly separable, then no over-parameterization is needed to show the connectivity. Our experiments confirm that the proposed condition ensures the connectivity of solutions found by stochastic gradient descent, even in settings where the previous requirements do not hold.","lang":"eng"}],"title":"When are solutions connected in deep networks?","author":[{"last_name":"Nguyen","full_name":"Nguyen, Quynh","first_name":"Quynh"},{"full_name":"Bréchet, Pierre","last_name":"Bréchet","first_name":"Pierre"},{"id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco","full_name":"Mondelli, Marco","last_name":"Mondelli","orcid":"0000-0002-3242-7020"}],"_id":"10594","type":"conference","quality_controlled":"1","language":[{"iso":"eng"}],"publisher":"Neural Information Processing Systems Foundation","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2102.09671"}],"intvolume":"        35","date_published":"2021-12-01T00:00:00Z","external_id":{"arxiv":["2102.09671"]},"date_created":"2022-01-03T10:56:20Z","department":[{"_id":"MaMo"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publication":"35th Conference on Neural Information Processing Systems","date_updated":"2024-09-10T13:03:19Z","year":"2021","oa":1},{"volume":33,"article_processing_charge":"No","day":"06","citation":{"chicago":"Confavreux, Basile J, Friedemann Zenke, Everton J. Agnes, Timothy Lillicrap, and Tim P Vogels. “A Meta-Learning Approach to (Re)Discover Plasticity Rules That Carve a Desired Function into a Neural Network.” In <i>Advances in Neural Information Processing Systems</i>, 33:16398–408, 2020.","mla":"Confavreux, Basile J., et al. “A Meta-Learning Approach to (Re)Discover Plasticity Rules That Carve a Desired Function into a Neural Network.” <i>Advances in Neural Information Processing Systems</i>, vol. 33, 2020, pp. 16398–408.","short":"B.J. Confavreux, F. Zenke, E.J. Agnes, T. Lillicrap, T.P. Vogels, in:, Advances in Neural Information Processing Systems, 2020, pp. 16398–16408.","ama":"Confavreux BJ, Zenke F, Agnes EJ, Lillicrap T, Vogels TP. A meta-learning approach to (re)discover plasticity rules that carve a desired function into a neural network. In: <i>Advances in Neural Information Processing Systems</i>. Vol 33. ; 2020:16398-16408.","ista":"Confavreux BJ, Zenke F, Agnes EJ, Lillicrap T, Vogels TP. 2020. A meta-learning approach to (re)discover plasticity rules that carve a desired function into a neural network. Advances in Neural Information Processing Systems. NeurIPS: Conference on Neural Information Processing Systems vol. 33, 16398–16408.","apa":"Confavreux, B. J., Zenke, F., Agnes, E. J., Lillicrap, T., &#38; Vogels, T. P. (2020). A meta-learning approach to (re)discover plasticity rules that carve a desired function into a neural network. In <i>Advances in Neural Information Processing Systems</i> (Vol. 33, pp. 16398–16408). Vancouver, Canada.","ieee":"B. J. Confavreux, F. Zenke, E. J. Agnes, T. Lillicrap, and T. P. Vogels, “A meta-learning approach to (re)discover plasticity rules that carve a desired function into a neural network,” in <i>Advances in Neural Information Processing Systems</i>, Vancouver, Canada, 2020, vol. 33, pp. 16398–16408."},"month":"12","conference":{"end_date":"2020-12-12","location":"Vancouver, Canada","start_date":"2020-12-06","name":"NeurIPS: Conference on Neural Information Processing Systems"},"oa_version":"Published Version","publication_identifier":{"issn":["1049-5258"]},"project":[{"grant_number":"214316/Z/18/Z","_id":"c084a126-5a5b-11eb-8a69-d75314a70a87","name":"What’s in a memory? Spatiotemporal dynamics in strongly coupled recurrent neuronal networks."},{"_id":"0aacfa84-070f-11eb-9043-d7eb2c709234","grant_number":"819603","call_identifier":"H2020","name":"Learning the shape of synaptic plasticity rules for neuronal architectures and function through machine learning."}],"publication_status":"published","status":"public","acknowledgement":"We would like to thank Chaitanya Chintaluri, Georgia Christodoulou, Bill Podlaski and Merima Šabanovic for useful discussions and comments. This work was supported by a Wellcome Trust ´ Senior Research Fellowship (214316/Z/18/Z), a BBSRC grant (BB/N019512/1), an ERC consolidator Grant (SYNAPSEEK), a Leverhulme Trust Project Grant (RPG-2016-446), and funding from École Polytechnique, Paris.","ec_funded":1,"author":[{"id":"C7610134-B532-11EA-BD9F-F5753DDC885E","first_name":"Basile J","full_name":"Confavreux, Basile J","last_name":"Confavreux"},{"full_name":"Zenke, Friedemann","last_name":"Zenke","first_name":"Friedemann"},{"first_name":"Everton J.","last_name":"Agnes","full_name":"Agnes, Everton J."},{"first_name":"Timothy","full_name":"Lillicrap, Timothy","last_name":"Lillicrap"},{"orcid":"0000-0003-3295-6181","full_name":"Vogels, Tim P","last_name":"Vogels","id":"CB6FF8D2-008F-11EA-8E08-2637E6697425","first_name":"Tim P"}],"title":"A meta-learning approach to (re)discover plasticity rules that carve a desired function into a neural network","_id":"9633","abstract":[{"lang":"eng","text":"The search for biologically faithful synaptic plasticity rules has resulted in a large body of models. They are usually inspired by – and fitted to – experimental data, but they rarely produce neural dynamics that serve complex functions. These failures suggest that current plasticity models are still under-constrained by existing data. Here, we present an alternative approach that uses meta-learning to discover plausible synaptic plasticity rules. Instead of experimental data, the rules are constrained by the functions they implement and the structure they are meant to produce. Briefly, we parameterize synaptic plasticity rules by a Volterra expansion and then use supervised learning methods (gradient descent or evolutionary strategies) to minimize a problem-dependent loss function that quantifies how effectively a candidate plasticity rule transforms an initially random network into one with the desired function. We first validate our approach by re-discovering previously described plasticity rules, starting at the single-neuron level and “Oja’s rule”, a simple Hebbian plasticity rule that captures the direction of most variability of inputs to a neuron (i.e., the first principal component). We expand the problem to the network level and ask the framework to find Oja’s rule together with an anti-Hebbian rule such that an initially random two-layer firing-rate network will recover several principal components of the input space after learning. Next, we move to networks of integrate-and-fire neurons with plastic inhibitory afferents. We train for rules that achieve a target firing rate by countering tuned excitation. Our algorithm discovers a specific subset of the manifold of rules that can solve this task. Our work is a proof of principle of an automated and unbiased approach to unveil synaptic plasticity rules that obey biological constraints and can solve complex functions."}],"main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2020/hash/bdbd5ebfde4934142c8a88e7a3796cd5-Abstract.html","open_access":"1"}],"intvolume":"        33","quality_controlled":"1","type":"conference","language":[{"iso":"eng"}],"department":[{"_id":"TiVo"}],"user_id":"6785fbc1-c503-11eb-8a32-93094b40e1cf","date_published":"2020-12-06T00:00:00Z","scopus_import":"1","date_created":"2021-07-04T22:01:27Z","publication":"Advances in Neural Information Processing Systems","date_updated":"2023-10-18T09:20:55Z","related_material":{"link":[{"url":"https://doi.org/10.1101/2020.10.24.353409","relation":"is_continued_by"}],"record":[{"id":"14422","status":"public","relation":"dissertation_contains"}]},"page":"16398-16408","oa":1,"year":"2020"},{"year":"2019","oa":1,"page":"927-938","date_updated":"2023-09-08T11:13:52Z","external_id":{"isi":["000534424300084"],"arxiv":["1909.02253"]},"date_published":"2019-12-01T00:00:00Z","date_created":"2020-02-28T10:03:24Z","department":[{"_id":"DaAl"}],"user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","isi":1,"quality_controlled":"1","language":[{"iso":"eng"}],"type":"conference","main_file_link":[{"open_access":"1","url":"http://papers.nips.cc/paper/8379-powerset-convolutional-neural-networks"}],"intvolume":"        32","publisher":"Neural Information Processing Systems Foundation","abstract":[{"text":"We present a novel class of convolutional neural networks (CNNs) for set functions,i.e., data indexed with the powerset of a finite set. The convolutions are derivedas linear, shift-equivariant functions for various notions of shifts on set functions.The framework is fundamentally different from graph convolutions based on theLaplacian, as it provides not one but several basic shifts, one for each element inthe ground set. Prototypical experiments with several set function classificationtasks on synthetic datasets and on datasets derived from real-world hypergraphsdemonstrate the potential of our new powerset CNNs.","lang":"eng"}],"author":[{"full_name":"Wendler, Chris","last_name":"Wendler","first_name":"Chris"},{"orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian"},{"last_name":"Püschel","full_name":"Püschel, Markus","first_name":"Markus"}],"title":"Powerset convolutional neural networks","_id":"7542","status":"public","publication_status":"published","ec_funded":1,"publication_identifier":{"issn":["1049-5258"]},"project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020"}],"conference":{"name":"NIPS: Conference on Neural Information Processing Systems","location":"Vancouver, Canada","start_date":"2019-12-08","end_date":"2019-12-14"},"oa_version":"Published Version","day":"01","citation":{"ama":"Wendler C, Alistarh D-A, Püschel M. Powerset convolutional neural networks. In: Vol 32. Neural Information Processing Systems Foundation; 2019:927-938.","short":"C. Wendler, D.-A. Alistarh, M. Püschel, in:, Neural Information Processing Systems Foundation, 2019, pp. 927–938.","ista":"Wendler C, Alistarh D-A, Püschel M. 2019. Powerset convolutional neural networks. NIPS: Conference on Neural Information Processing Systems vol. 32, 927–938.","ieee":"C. Wendler, D.-A. Alistarh, and M. Püschel, “Powerset convolutional neural networks,” presented at the NIPS: Conference on Neural Information Processing Systems, Vancouver, Canada, 2019, vol. 32, pp. 927–938.","apa":"Wendler, C., Alistarh, D.-A., &#38; Püschel, M. (2019). Powerset convolutional neural networks (Vol. 32, pp. 927–938). Presented at the NIPS: Conference on Neural Information Processing Systems, Vancouver, Canada: Neural Information Processing Systems Foundation.","chicago":"Wendler, Chris, Dan-Adrian Alistarh, and Markus Püschel. “Powerset Convolutional Neural Networks,” 32:927–38. Neural Information Processing Systems Foundation, 2019.","mla":"Wendler, Chris, et al. <i>Powerset Convolutional Neural Networks</i>. Vol. 32, Neural Information Processing Systems Foundation, 2019, pp. 927–38."},"month":"12","article_processing_charge":"No","volume":32,"arxiv":1}]