[{"page":"8439-8457","extern":"1","month":"04","date_created":"2023-08-21T09:27:43Z","publisher":"ML Research Press","department":[{"_id":"FrLo"}],"quality_controlled":"1","publication":"Proceedings of the 25th International Conference on Artificial Intelligence and Statistics","status":"public","intvolume":"       151","citation":{"chicago":"Dresdner, Gideon, Maria-Luiza Vladarean, Gunnar Rätsch, Francesco Locatello, Volkan Cevher, and Alp Yurtsever. “ Faster One-Sample Stochastic Conditional Gradient Method for Composite Convex Minimization.” In <i>Proceedings of the 25th International Conference on Artificial Intelligence and Statistics</i>, 151:8439–57. ML Research Press, 2022.","ieee":"G. Dresdner, M.-L. Vladarean, G. Rätsch, F. Locatello, V. Cevher, and A. Yurtsever, “ Faster one-sample stochastic conditional gradient method for composite convex minimization,” in <i>Proceedings of the 25th International Conference on Artificial Intelligence and Statistics</i>, Virtual, 2022, vol. 151, pp. 8439–8457.","ista":"Dresdner G, Vladarean M-L, Rätsch G, Locatello F, Cevher V, Yurtsever A. 2022.  Faster one-sample stochastic conditional gradient method for composite convex minimization. Proceedings of the 25th International Conference on Artificial Intelligence and Statistics. AISTATS: Conference on Artificial Intelligence and Statistics, PMLR, vol. 151, 8439–8457.","mla":"Dresdner, Gideon, et al. “ Faster One-Sample Stochastic Conditional Gradient Method for Composite Convex Minimization.” <i>Proceedings of the 25th International Conference on Artificial Intelligence and Statistics</i>, vol. 151, ML Research Press, 2022, pp. 8439–57.","short":"G. Dresdner, M.-L. Vladarean, G. Rätsch, F. Locatello, V. Cevher, A. Yurtsever, in:, Proceedings of the 25th International Conference on Artificial Intelligence and Statistics, ML Research Press, 2022, pp. 8439–8457.","ama":"Dresdner G, Vladarean M-L, Rätsch G, Locatello F, Cevher V, Yurtsever A.  Faster one-sample stochastic conditional gradient method for composite convex minimization. In: <i>Proceedings of the 25th International Conference on Artificial Intelligence and Statistics</i>. Vol 151. ML Research Press; 2022:8439-8457.","apa":"Dresdner, G., Vladarean, M.-L., Rätsch, G., Locatello, F., Cevher, V., &#38; Yurtsever, A. (2022).  Faster one-sample stochastic conditional gradient method for composite convex minimization. In <i>Proceedings of the 25th International Conference on Artificial Intelligence and Statistics</i> (Vol. 151, pp. 8439–8457). Virtual: ML Research Press."},"conference":{"end_date":"2022-03-30","start_date":"2022-03-28","name":"AISTATS: Conference on Artificial Intelligence and Statistics","location":"Virtual"},"title":" Faster one-sample stochastic conditional gradient method for composite convex minimization","day":"01","alternative_title":["PMLR"],"type":"conference","author":[{"full_name":"Dresdner, Gideon","first_name":"Gideon","last_name":"Dresdner"},{"last_name":"Vladarean","first_name":"Maria-Luiza","full_name":"Vladarean, Maria-Luiza"},{"last_name":"Rätsch","first_name":"Gunnar","full_name":"Rätsch, Gunnar"},{"id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","last_name":"Locatello","full_name":"Locatello, Francesco","first_name":"Francesco"},{"first_name":"Volkan","full_name":"Cevher, Volkan","last_name":"Cevher"},{"first_name":"Alp","full_name":"Yurtsever, Alp","last_name":"Yurtsever"}],"language":[{"iso":"eng"}],"arxiv":1,"article_processing_charge":"No","date_published":"2022-04-01T00:00:00Z","_id":"14093","abstract":[{"lang":"eng","text":" We propose a stochastic conditional gradient method (CGM) for minimizing convex finite-sum objectives formed as a sum of smooth and non-smooth terms. Existing CGM variants for this template either suffer from slow convergence rates, or require carefully increasing the batch size over the course of the algorithm’s execution, which leads to computing full gradients. In contrast, the proposed method, equipped with a stochastic average gradient (SAG) estimator, requires only one sample per iteration. Nevertheless, it guarantees fast convergence rates on par with more sophisticated variance reduction techniques. In applications we put special emphasis on problems with a large number of separable constraints. Such problems are prevalent among semidefinite programming (SDP) formulations arising in machine learning and theoretical computer science. We provide numerical experiments on matrix completion, unsupervised clustering, and sparsest-cut SDPs. "}],"publication_status":"published","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2202.13212"}],"oa":1,"volume":151,"oa_version":"Preprint","year":"2022","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","date_updated":"2023-09-06T10:28:17Z","scopus_import":"1","external_id":{"arxiv":["2202.13212"]},"publication_identifier":{"issn":["2640-3498"]}},{"day":"01","type":"conference","author":[{"last_name":"Babaiee","full_name":"Babaiee, Zahra","first_name":"Zahra"},{"first_name":"Ramin","full_name":"Hasani, Ramin","last_name":"Hasani"},{"full_name":"Lechner, Mathias","first_name":"Mathias","last_name":"Lechner","id":"3DC22916-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Rus","first_name":"Daniela","full_name":"Rus, Daniela"},{"first_name":"Radu","full_name":"Grosu, Radu","last_name":"Grosu"}],"alternative_title":["PMLR"],"citation":{"short":"Z. Babaiee, R. Hasani, M. Lechner, D. Rus, R. Grosu, in:, Proceedings of the 38th International Conference on Machine Learning, ML Research Press, 2021, pp. 478–489.","apa":"Babaiee, Z., Hasani, R., Lechner, M., Rus, D., &#38; Grosu, R. (2021). On-off center-surround receptive fields for accurate and robust image classification. In <i>Proceedings of the 38th International Conference on Machine Learning</i> (Vol. 139, pp. 478–489). Virtual: ML Research Press.","ama":"Babaiee Z, Hasani R, Lechner M, Rus D, Grosu R. On-off center-surround receptive fields for accurate and robust image classification. In: <i>Proceedings of the 38th International Conference on Machine Learning</i>. Vol 139. ML Research Press; 2021:478-489.","ieee":"Z. Babaiee, R. Hasani, M. Lechner, D. Rus, and R. Grosu, “On-off center-surround receptive fields for accurate and robust image classification,” in <i>Proceedings of the 38th International Conference on Machine Learning</i>, Virtual, 2021, vol. 139, pp. 478–489.","ista":"Babaiee Z, Hasani R, Lechner M, Rus D, Grosu R. 2021. On-off center-surround receptive fields for accurate and robust image classification. Proceedings of the 38th International Conference on Machine Learning. ML: Machine Learning, PMLR, vol. 139, 478–489.","chicago":"Babaiee, Zahra, Ramin Hasani, Mathias Lechner, Daniela Rus, and Radu Grosu. “On-off Center-Surround Receptive Fields for Accurate and Robust Image Classification.” In <i>Proceedings of the 38th International Conference on Machine Learning</i>, 139:478–89. ML Research Press, 2021.","mla":"Babaiee, Zahra, et al. “On-off Center-Surround Receptive Fields for Accurate and Robust Image Classification.” <i>Proceedings of the 38th International Conference on Machine Learning</i>, vol. 139, ML Research Press, 2021, pp. 478–89."},"title":"On-off center-surround receptive fields for accurate and robust image classification","conference":{"name":"ML: Machine Learning","end_date":"2021-07-24","start_date":"2021-07-18","location":"Virtual"},"language":[{"iso":"eng"}],"ddc":["000"],"project":[{"_id":"25F42A32-B435-11E9-9278-68D0E5697425","call_identifier":"FWF","name":"The Wittgenstein Prize","grant_number":"Z211"}],"acknowledgement":"Z.B. is supported by the Doctoral College Resilient Embedded Systems, which is run jointly by the TU Wien’s Faculty of Informatics and the UAS Technikum Wien. R.G. is partially supported by the Horizon 2020 Era-Permed project Persorad, and ECSEL Project grant no. 783163 (iDev40). R.H and D.R were partially supported by Boeing and MIT. M.L. is supported in part by the Austrian Science Fund (FWF) under grant Z211-N23 (Wittgenstein Award).","month":"07","date_created":"2022-01-25T15:46:33Z","page":"478-489","publication":"Proceedings of the 38th International Conference on Machine Learning","department":[{"_id":"GradSch"},{"_id":"ToHe"}],"quality_controlled":"1","intvolume":"       139","status":"public","publisher":"ML Research Press","year":"2021","has_accepted_license":"1","oa_version":"Published Version","user_id":"2EBD1598-F248-11E8-B48F-1D18A9856A87","date_updated":"2022-05-04T15:02:27Z","publication_identifier":{"issn":["2640-3498"]},"file":[{"success":1,"date_created":"2022-01-26T07:38:32Z","file_id":"10681","relation":"main_file","content_type":"application/pdf","access_level":"open_access","date_updated":"2022-01-26T07:38:32Z","checksum":"d30eae62561bb517d9f978437d7677db","creator":"mlechner","file_size":4246561,"file_name":"babaiee21a.pdf"}],"_id":"10668","abstract":[{"text":"Robustness to variations in lighting conditions is a key objective for any deep vision system. To this end, our paper extends the receptive field of convolutional neural networks with two residual components, ubiquitous in the visual processing system of vertebrates: On-center and off-center pathways, with an excitatory center and inhibitory surround; OOCS for short. The On-center pathway is excited by the presence of a light stimulus in its center, but not in its surround, whereas the Off-center pathway is excited by the absence of a light stimulus in its center, but not in its surround. We design OOCS pathways via a difference of Gaussians, with their variance computed analytically from the size of the receptive fields. OOCS pathways complement each other in their response to light stimuli, ensuring this way a strong edge-detection capability, and as a result an accurate and robust inference under challenging lighting conditions. We provide extensive empirical evidence showing that networks supplied with OOCS pathways gain accuracy and illumination-robustness from the novel edge representation, compared to other baselines.","lang":"eng"}],"date_published":"2021-07-01T00:00:00Z","article_processing_charge":"No","file_date_updated":"2022-01-26T07:38:32Z","tmp":{"name":"Creative Commons Attribution-NonCommercial-NoDerivs 3.0 Unported (CC BY-NC-ND 3.0)","image":"/images/cc_by_nc_nd.png","legal_code_url":"https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode","short":"CC BY-NC-ND (3.0)"},"volume":139,"license":"https://creativecommons.org/licenses/by-nc-nd/3.0/","publication_status":"published","main_file_link":[{"url":"https://proceedings.mlr.press/v139/babaiee21a","open_access":"1"}],"oa":1},{"publisher":"ML Research Press","publication":"Proceedings of The 24th International Conference on Artificial Intelligence and Statistics","quality_controlled":"1","department":[{"_id":"MaMo"}],"intvolume":"       130","status":"public","page":"397-405","month":"04","date_created":"2022-01-03T11:34:22Z","related_material":{"record":[{"relation":"later_version","id":"12480","status":"public"}]},"project":[{"name":"Prix Lopez-Loretta 2019 - Marco Mondelli","_id":"059876FA-7A3F-11EA-A408-12923DDC885E"}],"acknowledgement":"The authors would like to thank Andrea Montanari for helpful discussions. M. Mondelli was partially supported by the 2019 Lopez-Loreta Prize. R. Venkataramanan was partially supported by the Alan Turing Institute under the EPSRC grant EP/N510129/1.","language":[{"iso":"eng"}],"citation":{"mla":"Mondelli, Marco, and Ramji Venkataramanan. “Approximate Message Passing with Spectral Initialization for Generalized Linear Models.” <i>Proceedings of The 24th International Conference on Artificial Intelligence and Statistics</i>, edited by Arindam Banerjee and Kenji Fukumizu, vol. 130, ML Research Press, 2021, pp. 397–405.","ieee":"M. Mondelli and R. Venkataramanan, “Approximate message passing with spectral initialization for generalized linear models,” in <i>Proceedings of The 24th International Conference on Artificial Intelligence and Statistics</i>, Virtual, San Diego, CA, United States, 2021, vol. 130, pp. 397–405.","ista":"Mondelli M, Venkataramanan R. 2021. Approximate message passing with spectral initialization for generalized linear models. Proceedings of The 24th International Conference on Artificial Intelligence and Statistics. AISTATS: Artificial Intelligence and Statistics, Proceedings of Machine Learning Research, vol. 130, 397–405.","chicago":"Mondelli, Marco, and Ramji Venkataramanan. “Approximate Message Passing with Spectral Initialization for Generalized Linear Models.” In <i>Proceedings of The 24th International Conference on Artificial Intelligence and Statistics</i>, edited by Arindam Banerjee and Kenji Fukumizu, 130:397–405. ML Research Press, 2021.","apa":"Mondelli, M., &#38; Venkataramanan, R. (2021). Approximate message passing with spectral initialization for generalized linear models. In A. Banerjee &#38; K. Fukumizu (Eds.), <i>Proceedings of The 24th International Conference on Artificial Intelligence and Statistics</i> (Vol. 130, pp. 397–405). Virtual, San Diego, CA, United States: ML Research Press.","ama":"Mondelli M, Venkataramanan R. Approximate message passing with spectral initialization for generalized linear models. In: Banerjee A, Fukumizu K, eds. <i>Proceedings of The 24th International Conference on Artificial Intelligence and Statistics</i>. Vol 130. ML Research Press; 2021:397-405.","short":"M. Mondelli, R. Venkataramanan, in:, A. Banerjee, K. Fukumizu (Eds.), Proceedings of The 24th International Conference on Artificial Intelligence and Statistics, ML Research Press, 2021, pp. 397–405."},"title":"Approximate message passing with spectral initialization for generalized linear models","conference":{"end_date":"2021-04-15","start_date":"2021-04-13","name":"AISTATS: Artificial Intelligence and Statistics","location":"Virtual, San Diego, CA, United States"},"day":"01","author":[{"orcid":"0000-0002-3242-7020","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco","full_name":"Mondelli, Marco","last_name":"Mondelli"},{"last_name":"Venkataramanan","full_name":"Venkataramanan, Ramji","first_name":"Ramji"}],"type":"conference","alternative_title":["Proceedings of Machine Learning Research"],"publication_status":"published","oa":1,"main_file_link":[{"open_access":"1","url":"https://proceedings.mlr.press/v130/mondelli21a.html"}],"volume":130,"arxiv":1,"article_processing_charge":"Yes (via OA deal)","abstract":[{"lang":"eng","text":" We consider the problem of estimating a signal from measurements obtained via a generalized linear model. We focus on estimators based on approximate message passing (AMP), a family of iterative algorithms with many appealing features: the performance of AMP in the high-dimensional limit can be succinctly characterized under suitable model assumptions; AMP can also be tailored to the empirical distribution of the signal entries, and for a wide class of estimation problems, AMP is conjectured to be optimal among all polynomial-time algorithms. However, a major issue of AMP is that in many models (such as phase retrieval), it requires an initialization correlated with the ground-truth signal and independent from the measurement matrix. Assuming that such an initialization is available is typically not realistic. In this paper, we solve this problem by proposing an AMP algorithm initialized with a spectral estimator. With such an initialization, the standard AMP analysis fails since the spectral estimator depends in a complicated way on the design matrix. Our main contribution is a rigorous characterization of the performance of AMP with spectral initialization in the high-dimensional limit. The key technical idea is to define and analyze a two-phase artificial AMP algorithm that first produces the spectral estimator, and then closely approximates the iterates of the true AMP. We also provide numerical results that demonstrate the validity of the proposed approach. "}],"_id":"10598","date_published":"2021-04-01T00:00:00Z","external_id":{"arxiv":["2010.03460"]},"scopus_import":"1","date_updated":"2024-03-07T10:36:53Z","user_id":"3E5EF7F0-F248-11E8-B48F-1D18A9856A87","publication_identifier":{"issn":["2640-3498"]},"editor":[{"last_name":"Banerjee","full_name":"Banerjee, Arindam","first_name":"Arindam"},{"last_name":"Fukumizu","full_name":"Fukumizu, Kenji","first_name":"Kenji"}],"year":"2021","oa_version":"Preprint"},{"tmp":{"name":"Creative Commons Attribution-NonCommercial-NoDerivs 3.0 Unported (CC BY-NC-ND 3.0)","image":"/images/cc_by_nc_nd.png","legal_code_url":"https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode","short":"CC BY-NC-ND (3.0)"},"file_date_updated":"2022-01-26T11:08:51Z","main_file_link":[{"open_access":"1","url":"http://proceedings.mlr.press/v119/hasani20a.html"}],"oa":1,"publication_status":"published","_id":"10673","date_published":"2020-01-01T00:00:00Z","abstract":[{"lang":"eng","text":"We propose a neural information processing system obtained by re-purposing the function of a biological neural circuit model to govern simulated and real-world control tasks. Inspired by the structure of the nervous system of the soil-worm, C. elegans, we introduce ordinary neural circuits (ONCs), defined as the model of biological neural circuits reparameterized for the control of alternative tasks. We first demonstrate that ONCs realize networks with higher maximum flow compared to arbitrary wired networks. We then learn instances of ONCs to control a series of robotic tasks, including the autonomous parking of a real-world rover robot. For reconfiguration of the purpose of the neural circuit, we adopt a search-based optimization algorithm. Ordinary neural circuits perform on par and, in some cases, significantly surpass the performance of contemporary deep learning models. ONC networks are compact, 77% sparser than their counterpart neural controllers, and their neural dynamics are fully interpretable at the cell-level."}],"file":[{"date_updated":"2022-01-26T11:08:51Z","access_level":"open_access","checksum":"c9a4a29161777fc1a89ef451c040e3b1","file_name":"2020_PMLR_Hasani.pdf","file_size":2329798,"creator":"cchlebak","success":1,"date_created":"2022-01-26T11:08:51Z","content_type":"application/pdf","relation":"main_file","file_id":"10691"}],"article_processing_charge":"No","publication_identifier":{"issn":["2640-3498"]},"date_updated":"2022-01-26T11:14:27Z","user_id":"8b945eb4-e2f2-11eb-945a-df72226e66a9","scopus_import":"1","has_accepted_license":"1","year":"2020","oa_version":"Published Version","status":"public","department":[{"_id":"GradSch"},{"_id":"ToHe"}],"quality_controlled":"1","publication":"Proceedings of the 37th International Conference on Machine Learning","series_title":"PMLR","date_created":"2022-01-25T15:50:34Z","page":"4082-4093","ddc":["000"],"language":[{"iso":"eng"}],"acknowledgement":"RH and RG are partially supported by Horizon-2020 ECSEL Project grant No. 783163 (iDev40), Productive 4.0, and ATBMBFW CPS-IoT Ecosystem. ML was supported in part by the Austrian Science Fund (FWF) under grant Z211-N23\r\n(Wittgenstein Award). AA is supported by the National Science Foundation (NSF) Graduate Research Fellowship\r\nProgram. RH and DR are partially supported by The Boeing Company and JP Morgan Chase. This research work is\r\npartially drawn from the PhD dissertation of RH.\r\n","project":[{"grant_number":"Z211","name":"The Wittgenstein Prize","_id":"25F42A32-B435-11E9-9278-68D0E5697425","call_identifier":"FWF"}],"alternative_title":["PMLR"],"type":"conference","author":[{"full_name":"Hasani, Ramin","first_name":"Ramin","last_name":"Hasani"},{"id":"3DC22916-F248-11E8-B48F-1D18A9856A87","first_name":"Mathias","full_name":"Lechner, Mathias","last_name":"Lechner"},{"last_name":"Amini","full_name":"Amini, Alexander","first_name":"Alexander"},{"last_name":"Rus","full_name":"Rus, Daniela","first_name":"Daniela"},{"first_name":"Radu","full_name":"Grosu, Radu","last_name":"Grosu"}],"conference":{"location":"Virtual","name":"ML: Machine Learning","start_date":"2020-07-12","end_date":"2020-07-18"},"title":"A natural lottery ticket winner: Reinforcement learning with ordinary neural circuits","citation":{"mla":"Hasani, Ramin, et al. “A Natural Lottery Ticket Winner: Reinforcement Learning with Ordinary Neural Circuits.” <i>Proceedings of the 37th International Conference on Machine Learning</i>, 2020, pp. 4082–93.","ieee":"R. Hasani, M. Lechner, A. Amini, D. Rus, and R. Grosu, “A natural lottery ticket winner: Reinforcement learning with ordinary neural circuits,” in <i>Proceedings of the 37th International Conference on Machine Learning</i>, Virtual, 2020, pp. 4082–4093.","ista":"Hasani R, Lechner M, Amini A, Rus D, Grosu R. 2020. A natural lottery ticket winner: Reinforcement learning with ordinary neural circuits. Proceedings of the 37th International Conference on Machine Learning. ML: Machine LearningPMLR, PMLR, , 4082–4093.","chicago":"Hasani, Ramin, Mathias Lechner, Alexander Amini, Daniela Rus, and Radu Grosu. “A Natural Lottery Ticket Winner: Reinforcement Learning with Ordinary Neural Circuits.” In <i>Proceedings of the 37th International Conference on Machine Learning</i>, 4082–93. PMLR, 2020.","apa":"Hasani, R., Lechner, M., Amini, A., Rus, D., &#38; Grosu, R. (2020). A natural lottery ticket winner: Reinforcement learning with ordinary neural circuits. In <i>Proceedings of the 37th International Conference on Machine Learning</i> (pp. 4082–4093). Virtual.","ama":"Hasani R, Lechner M, Amini A, Rus D, Grosu R. A natural lottery ticket winner: Reinforcement learning with ordinary neural circuits. In: <i>Proceedings of the 37th International Conference on Machine Learning</i>. PMLR. ; 2020:4082-4093.","short":"R. Hasani, M. Lechner, A. Amini, D. Rus, R. Grosu, in:, Proceedings of the 37th International Conference on Machine Learning, 2020, pp. 4082–4093."}},{"conference":{"location":"Online","end_date":"2020-07-18","start_date":"2020-07-12","name":"ICML: International Conference on Machine Learning"},"title":"On the sample complexity of adversarial multi-source PAC learning","ec_funded":1,"citation":{"ama":"Konstantinov NH, Frantar E, Alistarh D-A, Lampert C. On the sample complexity of adversarial multi-source PAC learning. In: <i>Proceedings of the 37th International Conference on Machine Learning</i>. Vol 119. ML Research Press; 2020:5416-5425.","apa":"Konstantinov, N. H., Frantar, E., Alistarh, D.-A., &#38; Lampert, C. (2020). On the sample complexity of adversarial multi-source PAC learning. In <i>Proceedings of the 37th International Conference on Machine Learning</i> (Vol. 119, pp. 5416–5425). Online: ML Research Press.","short":"N.H. Konstantinov, E. Frantar, D.-A. Alistarh, C. Lampert, in:, Proceedings of the 37th International Conference on Machine Learning, ML Research Press, 2020, pp. 5416–5425.","mla":"Konstantinov, Nikola H., et al. “On the Sample Complexity of Adversarial Multi-Source PAC Learning.” <i>Proceedings of the 37th International Conference on Machine Learning</i>, vol. 119, ML Research Press, 2020, pp. 5416–25.","chicago":"Konstantinov, Nikola H, Elias Frantar, Dan-Adrian Alistarh, and Christoph Lampert. “On the Sample Complexity of Adversarial Multi-Source PAC Learning.” In <i>Proceedings of the 37th International Conference on Machine Learning</i>, 119:5416–25. ML Research Press, 2020.","ieee":"N. H. Konstantinov, E. Frantar, D.-A. Alistarh, and C. Lampert, “On the sample complexity of adversarial multi-source PAC learning,” in <i>Proceedings of the 37th International Conference on Machine Learning</i>, Online, 2020, vol. 119, pp. 5416–5425.","ista":"Konstantinov NH, Frantar E, Alistarh D-A, Lampert C. 2020. On the sample complexity of adversarial multi-source PAC learning. Proceedings of the 37th International Conference on Machine Learning. ICML: International Conference on Machine Learning vol. 119, 5416–5425."},"author":[{"full_name":"Konstantinov, Nikola H","first_name":"Nikola H","last_name":"Konstantinov","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Frantar","first_name":"Elias","full_name":"Frantar, Elias","id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f"},{"last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","first_name":"Christoph","full_name":"Lampert, Christoph"}],"type":"conference","day":"12","acknowledgement":"Dan Alistarh is supported in part by the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML). This research was supported by the Scientific Service Units (SSU) of IST Austria through resources provided by Scientific Computing (SciComp).","related_material":{"link":[{"relation":"supplementary_material","url":"http://proceedings.mlr.press/v119/konstantinov20a/konstantinov20a-supp.pdf"}],"record":[{"id":"10799","relation":"dissertation_contains","status":"public"}]},"project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223"}],"ddc":["000"],"language":[{"iso":"eng"}],"page":"5416-5425","date_created":"2020-11-05T15:25:58Z","month":"07","publisher":"ML Research Press","intvolume":"       119","status":"public","quality_controlled":"1","department":[{"_id":"DaAl"},{"_id":"ChLa"}],"publication":"Proceedings of the 37th International Conference on Machine Learning","year":"2020","has_accepted_license":"1","oa_version":"Published Version","acknowledged_ssus":[{"_id":"ScienComp"}],"publication_identifier":{"issn":["2640-3498"]},"date_updated":"2023-09-07T13:42:08Z","user_id":"3E5EF7F0-F248-11E8-B48F-1D18A9856A87","external_id":{"arxiv":["2002.10384"]},"scopus_import":"1","article_processing_charge":"No","arxiv":1,"date_published":"2020-07-12T00:00:00Z","_id":"8724","abstract":[{"text":"We study the problem of learning from multiple untrusted data sources, a scenario of increasing practical relevance given the recent emergence of crowdsourcing and collaborative learning paradigms. Specifically, we analyze the situation in which a learning system obtains datasets from multiple sources, some of which might be biased or even adversarially perturbed. It is\r\nknown that in the single-source case, an adversary with the power to corrupt a fixed fraction of the training data can prevent PAC-learnability, that is, even in the limit of infinitely much training data, no learning system can approach the optimal test error. In this work we show that, surprisingly, the same is not true in the multi-source setting, where the adversary can arbitrarily\r\ncorrupt a fixed fraction of the data sources. Our main results are a generalization bound that provides finite-sample guarantees for this learning setting, as well as corresponding lower bounds. Besides establishing PAC-learnability our results also show that in a cooperative learning setting sharing data with other parties has provable benefits, even if some\r\nparticipants are malicious. ","lang":"eng"}],"file":[{"relation":"main_file","file_id":"9120","content_type":"application/pdf","success":1,"date_created":"2021-02-15T09:00:01Z","file_size":281286,"creator":"dernst","file_name":"2020_PMLR_Konstantinov.pdf","access_level":"open_access","date_updated":"2021-02-15T09:00:01Z","checksum":"cc755d0054bc4b2be778ea7aa7884d2f"}],"oa":1,"publication_status":"published","volume":119,"file_date_updated":"2021-02-15T09:00:01Z"},{"has_accepted_license":"1","year":"2020","oa_version":"Published Version","publication_identifier":{"issn":["2640-3498"]},"scopus_import":"1","date_updated":"2023-02-23T13:57:24Z","user_id":"3E5EF7F0-F248-11E8-B48F-1D18A9856A87","_id":"9415","date_published":"2020-07-12T00:00:00Z","abstract":[{"text":"Optimizing convolutional neural networks for fast inference has recently become an extremely active area of research. One of the go-to solutions in this context is weight pruning, which aims to reduce computational and memory footprint by removing large subsets of the connections in a neural network. Surprisingly, much less attention has been given to exploiting sparsity in the activation maps, which tend to be naturally sparse in many settings thanks to the structure of rectified linear (ReLU) activation functions. In this paper, we present an in-depth analysis of methods for maximizing the sparsity of the activations in a trained neural network, and show that, when coupled with an efficient sparse-input convolution algorithm, we can leverage this sparsity for significant performance gains. To induce highly sparse activation maps without accuracy loss, we introduce a new regularization technique, coupled with a new threshold-based sparsification method based on a parameterized activation function called Forced-Activation-Threshold Rectified Linear Unit (FATReLU). We examine the impact of our methods on popular image classification models, showing that most architectures can adapt to significantly sparser activation maps without any accuracy loss. Our second contribution is showing that these these compression gains can be translated into inference speedups: we provide a new algorithm to enable fast convolution operations over networks with sparse activations, and show that it can enable significant speedups for end-to-end inference on a range of popular models on the large-scale ImageNet image classification task on modern Intel CPUs, with little or no retraining cost. ","lang":"eng"}],"file":[{"date_created":"2021-05-25T09:51:36Z","success":1,"content_type":"application/pdf","file_id":"9421","relation":"main_file","checksum":"2aaaa7d7226e49161311d91627cf783b","date_updated":"2021-05-25T09:51:36Z","access_level":"open_access","file_name":"2020_PMLR_Kurtz.pdf","creator":"kschuh","file_size":741899}],"article_processing_charge":"No","volume":119,"file_date_updated":"2021-05-25T09:51:36Z","oa":1,"author":[{"first_name":"Mark","full_name":"Kurtz, Mark","last_name":"Kurtz"},{"last_name":"Kopinsky","first_name":"Justin","full_name":"Kopinsky, Justin"},{"full_name":"Gelashvili, Rati","first_name":"Rati","last_name":"Gelashvili"},{"first_name":"Alexander","full_name":"Matveev, Alexander","last_name":"Matveev"},{"full_name":"Carr, John","first_name":"John","last_name":"Carr"},{"last_name":"Goin","full_name":"Goin, Michael","first_name":"Michael"},{"last_name":"Leiserson","full_name":"Leiserson, William","first_name":"William"},{"last_name":"Moore","first_name":"Sage","full_name":"Moore, Sage"},{"first_name":"Bill","full_name":"Nell, Bill","last_name":"Nell"},{"last_name":"Shavit","full_name":"Shavit, Nir","first_name":"Nir"},{"last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"}],"type":"conference","day":"12","title":"Inducing and exploiting activation sparsity for fast neural network inference","conference":{"location":"Online","end_date":"2020-07-18","start_date":"2020-07-12","name":"ICML: International Conference on Machine Learning"},"citation":{"ista":"Kurtz M, Kopinsky J, Gelashvili R, Matveev A, Carr J, Goin M, Leiserson W, Moore S, Nell B, Shavit N, Alistarh D-A. 2020. Inducing and exploiting activation sparsity for fast neural network inference. 37th International Conference on Machine Learning, ICML 2020. ICML: International Conference on Machine Learning vol. 119, 5533–5543.","ieee":"M. Kurtz <i>et al.</i>, “Inducing and exploiting activation sparsity for fast neural network inference,” in <i>37th International Conference on Machine Learning, ICML 2020</i>, Online, 2020, vol. 119, pp. 5533–5543.","chicago":"Kurtz, Mark, Justin Kopinsky, Rati Gelashvili, Alexander Matveev, John Carr, Michael Goin, William Leiserson, et al. “Inducing and Exploiting Activation Sparsity for Fast Neural Network Inference.” In <i>37th International Conference on Machine Learning, ICML 2020</i>, 119:5533–43, 2020.","mla":"Kurtz, Mark, et al. “Inducing and Exploiting Activation Sparsity for Fast Neural Network Inference.” <i>37th International Conference on Machine Learning, ICML 2020</i>, vol. 119, 2020, pp. 5533–43.","short":"M. Kurtz, J. Kopinsky, R. Gelashvili, A. Matveev, J. Carr, M. Goin, W. Leiserson, S. Moore, B. Nell, N. Shavit, D.-A. Alistarh, in:, 37th International Conference on Machine Learning, ICML 2020, 2020, pp. 5533–5543.","apa":"Kurtz, M., Kopinsky, J., Gelashvili, R., Matveev, A., Carr, J., Goin, M., … Alistarh, D.-A. (2020). Inducing and exploiting activation sparsity for fast neural network inference. In <i>37th International Conference on Machine Learning, ICML 2020</i> (Vol. 119, pp. 5533–5543). Online.","ama":"Kurtz M, Kopinsky J, Gelashvili R, et al. Inducing and exploiting activation sparsity for fast neural network inference. In: <i>37th International Conference on Machine Learning, ICML 2020</i>. Vol 119. ; 2020:5533-5543."},"ddc":["000"],"language":[{"iso":"eng"}],"date_created":"2021-05-23T22:01:45Z","month":"07","page":"5533-5543","intvolume":"       119","status":"public","publication":"37th International Conference on Machine Learning, ICML 2020","department":[{"_id":"DaAl"}],"quality_controlled":"1"}]
