[{"type":"conference","status":"public","arxiv":1,"author":[{"last_name":"Aksenov","first_name":"Vitaly","full_name":"Aksenov, Vitaly"},{"first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Korhonen","id":"C5402D42-15BC-11E9-A202-CA2BE6697425","first_name":"Janne","full_name":"Korhonen, Janne"}],"user_id":"6785fbc1-c503-11eb-8a32-93094b40e1cf","main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2020/hash/fdb2c3bab9d0701c4a050a4d8d782c7f-Abstract.html","open_access":"1"}],"oa_version":"Published Version","day":"06","month":"12","publication_identifier":{"issn":["10495258"],"isbn":["9781713829546"]},"conference":{"name":"NeurIPS: Conference on Neural Information Processing Systems","start_date":"2020-12-06","location":"Vancouver, Canada","end_date":"2020-12-12"},"language":[{"iso":"eng"}],"publication":"Advances in Neural Information Processing Systems","title":"Scalable belief propagation via relaxed scheduling","article_processing_charge":"No","oa":1,"date_updated":"2023-02-23T14:03:03Z","intvolume":"        33","scopus_import":"1","project":[{"name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223"}],"quality_controlled":"1","volume":33,"page":"22361-22372","department":[{"_id":"DaAl"}],"date_published":"2020-12-06T00:00:00Z","publisher":"Curran Associates","publication_status":"published","abstract":[{"lang":"eng","text":"The ability to leverage large-scale hardware parallelism has been one of the key enablers of the accelerated recent progress in machine learning. Consequently, there has been considerable effort invested into developing efficient parallel variants of classic machine learning algorithms. However, despite the wealth of knowledge on parallelization, some classic machine learning algorithms often prove hard to parallelize efficiently while maintaining convergence. In this paper, we focus on efficient parallel algorithms for the key machine learning task of inference on graphical models, in particular on the fundamental belief propagation algorithm. We address the challenge of efficiently parallelizing this classic paradigm by showing how to leverage scalable relaxed schedulers in this context. We present an extensive empirical study, showing that our approach outperforms previous parallel belief propagation implementations both in terms of scalability and in terms of wall-clock convergence time, on a range of practical applications."}],"external_id":{"arxiv":["2002.11505"]},"_id":"9631","date_created":"2021-07-04T22:01:26Z","citation":{"ama":"Aksenov V, Alistarh D-A, Korhonen J. Scalable belief propagation via relaxed scheduling. In: <i>Advances in Neural Information Processing Systems</i>. Vol 33. Curran Associates; 2020:22361-22372.","ieee":"V. Aksenov, D.-A. Alistarh, and J. Korhonen, “Scalable belief propagation via relaxed scheduling,” in <i>Advances in Neural Information Processing Systems</i>, Vancouver, Canada, 2020, vol. 33, pp. 22361–22372.","short":"V. Aksenov, D.-A. Alistarh, J. Korhonen, in:, Advances in Neural Information Processing Systems, Curran Associates, 2020, pp. 22361–22372.","ista":"Aksenov V, Alistarh D-A, Korhonen J. 2020. Scalable belief propagation via relaxed scheduling. Advances in Neural Information Processing Systems. NeurIPS: Conference on Neural Information Processing Systems vol. 33, 22361–22372.","mla":"Aksenov, Vitaly, et al. “Scalable Belief Propagation via Relaxed Scheduling.” <i>Advances in Neural Information Processing Systems</i>, vol. 33, Curran Associates, 2020, pp. 22361–72.","chicago":"Aksenov, Vitaly, Dan-Adrian Alistarh, and Janne Korhonen. “Scalable Belief Propagation via Relaxed Scheduling.” In <i>Advances in Neural Information Processing Systems</i>, 33:22361–72. Curran Associates, 2020.","apa":"Aksenov, V., Alistarh, D.-A., &#38; Korhonen, J. (2020). Scalable belief propagation via relaxed scheduling. In <i>Advances in Neural Information Processing Systems</i> (Vol. 33, pp. 22361–22372). Vancouver, Canada: Curran Associates."},"year":"2020","acknowledgement":"We thank Marco Mondelli for discussions related to LDPC decoding, and Giorgi Nadiradze for discussions on analysis of relaxed schedulers. This project has received funding from the European Research Council (ERC) under the European\r\nUnion’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).","ec_funded":1},{"publisher":"Curran Associates","date_published":"2020-12-06T00:00:00Z","department":[{"_id":"DaAl"},{"_id":"ToHe"}],"page":"18098-18109","volume":33,"quality_controlled":"1","project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning"}],"external_id":{"arxiv":["2004.14340"]},"abstract":[{"lang":"eng","text":"Second-order information, in the form of Hessian- or Inverse-Hessian-vector products, is a fundamental tool for solving optimization problems. Recently, there has been significant interest in utilizing this information in the context of deep\r\nneural networks; however, relatively little is known about the quality of existing approximations in this context. Our work examines this question, identifies issues with existing approaches, and proposes a method called WoodFisher to compute a faithful and efficient estimate of the inverse Hessian. Our main application is to neural network compression, where we build on the classic Optimal Brain Damage/Surgeon framework. We demonstrate that WoodFisher significantly outperforms popular state-of-the-art methods for oneshot pruning. Further, even when iterative, gradual pruning is allowed, our method results in a gain in test accuracy over the state-of-the-art approaches, for standard image classification datasets such as ImageNet ILSVRC. We examine how our method can be extended to take into account first-order information, as well as\r\nillustrate its ability to automatically set layer-wise pruning thresholds and perform compression in the limited-data regime. The code is available at the following link, https://github.com/IST-DASLab/WoodFisher."}],"publication_status":"published","_id":"9632","ec_funded":1,"acknowledgement":"This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML). Also, we would like to thank Alexander Shevchenko, Alexandra Peste, and other members of the group for fruitful discussions.","year":"2020","citation":{"ama":"Singh SP, Alistarh D-A. WoodFisher: Efficient second-order approximation for neural network compression. In: <i>Advances in Neural Information Processing Systems</i>. Vol 33. Curran Associates; 2020:18098-18109.","ieee":"S. P. Singh and D.-A. Alistarh, “WoodFisher: Efficient second-order approximation for neural network compression,” in <i>Advances in Neural Information Processing Systems</i>, Vancouver, Canada, 2020, vol. 33, pp. 18098–18109.","short":"S.P. Singh, D.-A. Alistarh, in:, Advances in Neural Information Processing Systems, Curran Associates, 2020, pp. 18098–18109.","chicago":"Singh, Sidak Pal, and Dan-Adrian Alistarh. “WoodFisher: Efficient Second-Order Approximation for Neural Network Compression.” In <i>Advances in Neural Information Processing Systems</i>, 33:18098–109. Curran Associates, 2020.","ista":"Singh SP, Alistarh D-A. 2020. WoodFisher: Efficient second-order approximation for neural network compression. Advances in Neural Information Processing Systems. NeurIPS: Conference on Neural Information Processing Systems vol. 33, 18098–18109.","mla":"Singh, Sidak Pal, and Dan-Adrian Alistarh. “WoodFisher: Efficient Second-Order Approximation for Neural Network Compression.” <i>Advances in Neural Information Processing Systems</i>, vol. 33, Curran Associates, 2020, pp. 18098–109.","apa":"Singh, S. P., &#38; Alistarh, D.-A. (2020). WoodFisher: Efficient second-order approximation for neural network compression. In <i>Advances in Neural Information Processing Systems</i> (Vol. 33, pp. 18098–18109). Vancouver, Canada: Curran Associates."},"date_created":"2021-07-04T22:01:26Z","status":"public","type":"conference","conference":{"start_date":"2020-12-06","name":"NeurIPS: Conference on Neural Information Processing Systems","end_date":"2020-12-12","location":"Vancouver, Canada"},"month":"12","publication_identifier":{"issn":["10495258"],"isbn":["9781713829546"]},"main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2020/hash/d1ff1ec86b62cd5f3903ff19c3a326b2-Abstract.html","open_access":"1"}],"day":"06","oa_version":"Published Version","user_id":"6785fbc1-c503-11eb-8a32-93094b40e1cf","author":[{"id":"DD138E24-D89D-11E9-9DC0-DEF6E5697425","last_name":"Singh","full_name":"Singh, Sidak Pal","first_name":"Sidak Pal"},{"full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X"}],"arxiv":1,"publication":"Advances in Neural Information Processing Systems","language":[{"iso":"eng"}],"intvolume":"        33","scopus_import":"1","oa":1,"date_updated":"2023-02-23T14:03:06Z","article_processing_charge":"No","title":"WoodFisher: Efficient second-order approximation for neural network compression"},{"publication":"Advances in Neural Information Processing Systems","language":[{"iso":"eng"}],"intvolume":"        30","article_processing_charge":"No","date_updated":"2021-01-12T08:17:03Z","oa":1,"title":"Cortical microcircuits as gated-recurrent neural networks","type":"conference","status":"public","month":"12","publication_identifier":{"issn":["10495258"]},"conference":{"location":"Long Beach, CA, United States","end_date":"2017-12-09","name":"NIPS: Neural Information Processing System","start_date":"2017-12-04"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Preprint","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1711.02448"}],"day":"01","arxiv":1,"author":[{"first_name":"Rui Ponte","full_name":"Costa, Rui Ponte","last_name":"Costa"},{"full_name":"Assael, Yannis M.","first_name":"Yannis M.","last_name":"Assael"},{"first_name":"Brendan","full_name":"Shillingford, Brendan","last_name":"Shillingford"},{"last_name":"Freitas","full_name":"Freitas, Nando de","first_name":"Nando de"},{"orcid":"0000-0003-3295-6181","id":"CB6FF8D2-008F-11EA-8E08-2637E6697425","last_name":"Vogels","full_name":"Vogels, Tim P","first_name":"Tim P"}],"_id":"8129","year":"2017","citation":{"apa":"Costa, R. P., Assael, Y. M., Shillingford, B., Freitas, N. de, &#38; Vogels, T. P. (2017). Cortical microcircuits as gated-recurrent neural networks. In <i>Advances in Neural Information Processing Systems</i> (Vol. 30, pp. 272–283). Long Beach, CA, United States: Neural Information Processing Systems Foundation.","mla":"Costa, Rui Ponte, et al. “Cortical Microcircuits as Gated-Recurrent Neural Networks.” <i>Advances in Neural Information Processing Systems</i>, vol. 30, Neural Information Processing Systems Foundation, 2017, pp. 272–83.","ista":"Costa RP, Assael YM, Shillingford B, Freitas N de, Vogels TP. 2017. Cortical microcircuits as gated-recurrent neural networks. Advances in Neural Information Processing Systems. NIPS: Neural Information Processing System vol. 30, 272–283.","chicago":"Costa, Rui Ponte, Yannis M. Assael, Brendan Shillingford, Nando de Freitas, and Tim P Vogels. “Cortical Microcircuits as Gated-Recurrent Neural Networks.” In <i>Advances in Neural Information Processing Systems</i>, 30:272–83. Neural Information Processing Systems Foundation, 2017.","short":"R.P. Costa, Y.M. Assael, B. Shillingford, N. de Freitas, T.P. Vogels, in:, Advances in Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2017, pp. 272–283.","ama":"Costa RP, Assael YM, Shillingford B, Freitas N de, Vogels TP. Cortical microcircuits as gated-recurrent neural networks. In: <i>Advances in Neural Information Processing Systems</i>. Vol 30. Neural Information Processing Systems Foundation; 2017:272-283.","ieee":"R. P. Costa, Y. M. Assael, B. Shillingford, N. de Freitas, and T. P. Vogels, “Cortical microcircuits as gated-recurrent neural networks,” in <i>Advances in Neural Information Processing Systems</i>, Long Beach, CA, United States, 2017, vol. 30, pp. 272–283."},"date_created":"2020-07-16T19:13:10Z","date_published":"2017-12-01T00:00:00Z","publisher":"Neural Information Processing Systems Foundation","volume":30,"quality_controlled":"1","extern":"1","page":"272-283","abstract":[{"lang":"eng","text":"Cortical circuits exhibit intricate recurrent architectures that are remarkably similar across different brain areas. Such stereotyped structure suggests the existence of common computational principles. However, such principles have remained largely elusive. Inspired by gated-memory networks, namely long short-term memory networks (LSTMs), we introduce a recurrent neural network in which information is gated through inhibitory cells that are subtractive (subLSTM). We propose a natural mapping of subLSTMs onto known canonical excitatory-inhibitory cortical microcircuits. Our empirical evaluation across sequential image classification and language modelling tasks shows that subLSTM units can achieve similar performance to LSTM units. These results suggest that cortical circuits can be optimised to solve complex contextual problems and proposes a novel view on their computational function.\r\nOverall our work provides a step towards unifying recurrent networks as used in machine learning with their biological counterparts."}],"external_id":{"arxiv":["1711.02448"]},"publication_status":"published"},{"publisher":"Neural Information Processing Systems Foundation","date_published":"2017-01-01T00:00:00Z","page":"1710-1721","department":[{"_id":"DaAl"}],"quality_controlled":"1","volume":2017,"external_id":{"arxiv":["1610.02132"]},"abstract":[{"text":"Parallel implementations of stochastic gradient descent (SGD) have received significant research attention, thanks to its excellent scalability properties. A fundamental barrier when parallelizing SGD is the high bandwidth cost of communicating gradient updates between nodes; consequently, several lossy compresion heuristics have been proposed, by which nodes only communicate quantized gradients. Although effective in practice, these heuristics do not always converge. In this paper, we propose Quantized SGD (QSGD), a family of compression schemes with convergence guarantees and good practical performance. QSGD allows the user to smoothly trade off communication bandwidth and convergence time: nodes can adjust the number of bits sent per iteration, at the cost of possibly higher variance. We show that this trade-off is inherent, in the sense that improving it past some threshold would violate information-theoretic lower bounds. QSGD guarantees convergence for convex and non-convex objectives, under asynchrony, and can be extended to stochastic variance-reduced techniques. When applied to training deep neural networks for image classification and automated speech recognition, QSGD leads to significant reductions in end-to-end training time. For instance, on 16GPUs, we can train the ResNet-152 network to full accuracy on ImageNet 1.8 × faster than the full-precision variant. ","lang":"eng"}],"publication_status":"published","_id":"431","year":"2017","citation":{"apa":"Alistarh, D.-A., Grubic, D., Li, J., Tomioka, R., &#38; Vojnović, M. (2017). QSGD: Communication-efficient SGD via gradient quantization and encoding (Vol. 2017, pp. 1710–1721). Presented at the NIPS: Neural Information Processing System, Long Beach, CA, United States: Neural Information Processing Systems Foundation.","short":"D.-A. Alistarh, D. Grubic, J. Li, R. Tomioka, M. Vojnović, in:, Neural Information Processing Systems Foundation, 2017, pp. 1710–1721.","ieee":"D.-A. Alistarh, D. Grubic, J. Li, R. Tomioka, and M. Vojnović, “QSGD: Communication-efficient SGD via gradient quantization and encoding,” presented at the NIPS: Neural Information Processing System, Long Beach, CA, United States, 2017, vol. 2017, pp. 1710–1721.","ama":"Alistarh D-A, Grubic D, Li J, Tomioka R, Vojnović M. QSGD: Communication-efficient SGD via gradient quantization and encoding. In: Vol 2017. Neural Information Processing Systems Foundation; 2017:1710-1721.","mla":"Alistarh, Dan-Adrian, et al. <i>QSGD: Communication-Efficient SGD via Gradient Quantization and Encoding</i>. Vol. 2017, Neural Information Processing Systems Foundation, 2017, pp. 1710–21.","ista":"Alistarh D-A, Grubic D, Li J, Tomioka R, Vojnović M. 2017. QSGD: Communication-efficient SGD via gradient quantization and encoding. NIPS: Neural Information Processing System, Advances in Neural Information Processing Systems, vol. 2017, 1710–1721.","chicago":"Alistarh, Dan-Adrian, Demjan Grubic, Jerry Li, Ryota Tomioka, and Milan Vojnović. “QSGD: Communication-Efficient SGD via Gradient Quantization and Encoding,” 2017:1710–21. Neural Information Processing Systems Foundation, 2017."},"publist_id":"7392","date_created":"2018-12-11T11:46:26Z","alternative_title":["Advances in Neural Information Processing Systems"],"status":"public","type":"conference","conference":{"end_date":"2017-12-09","location":"Long Beach, CA, United States","start_date":"2017-12-04","name":"NIPS: Neural Information Processing System"},"publication_identifier":{"issn":["10495258"]},"month":"01","oa_version":"Submitted Version","day":"01","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1610.02132"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","author":[{"first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X"},{"full_name":"Grubic, Demjan","first_name":"Demjan","last_name":"Grubic"},{"full_name":"Li, Jerry","first_name":"Jerry","last_name":"Li"},{"last_name":"Tomioka","first_name":"Ryota","full_name":"Tomioka, Ryota"},{"first_name":"Milan","full_name":"Vojnović, Milan","last_name":"Vojnović"}],"arxiv":1,"language":[{"iso":"eng"}],"intvolume":"      2017","oa":1,"date_updated":"2023-10-17T11:48:03Z","article_processing_charge":"No","title":"QSGD: Communication-efficient SGD via gradient quantization and encoding"}]