[{"ec_funded":1,"abstract":[{"text":"We show for the first time that large-scale generative pretrained transformer (GPT) family models can be pruned to at least 50% sparsity in one-shot, without any retraining, at minimal loss of accuracy. This is achieved via a new pruning method called SparseGPT, specifically designed to work efficiently and accurately on massive GPT-family models. We can execute SparseGPT on the largest available open-source models, OPT-175B and BLOOM-176B, in under 4.5 hours, and can reach 60% unstructured sparsity with negligible increase in perplexity: remarkably, more than 100 billion weights from these models can be ignored at inference time. SparseGPT generalizes to semi-structured (2:4 and 4:8) patterns, and is compatible with weight quantization approaches. The code is available at: https://github.com/IST-DASLab/sparsegpt.","lang":"eng"}],"status":"public","day":"30","citation":{"ama":"Frantar E, Alistarh D-A. SparseGPT: Massive language models can be accurately pruned in one-shot. In: <i>Proceedings of the 40th International Conference on Machine Learning</i>. Vol 202. ML Research Press; 2023:10323-10337.","short":"E. Frantar, D.-A. Alistarh, in:, Proceedings of the 40th International Conference on Machine Learning, ML Research Press, 2023, pp. 10323–10337.","mla":"Frantar, Elias, and Dan-Adrian Alistarh. “SparseGPT: Massive Language Models Can Be Accurately Pruned in One-Shot.” <i>Proceedings of the 40th International Conference on Machine Learning</i>, vol. 202, ML Research Press, 2023, pp. 10323–37.","ieee":"E. Frantar and D.-A. Alistarh, “SparseGPT: Massive language models can be accurately pruned in one-shot,” in <i>Proceedings of the 40th International Conference on Machine Learning</i>, Honolulu, Hawaii, HI, United States, 2023, vol. 202, pp. 10323–10337.","ista":"Frantar E, Alistarh D-A. 2023. SparseGPT: Massive language models can be accurately pruned in one-shot. Proceedings of the 40th International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 202, 10323–10337.","apa":"Frantar, E., &#38; Alistarh, D.-A. (2023). SparseGPT: Massive language models can be accurately pruned in one-shot. In <i>Proceedings of the 40th International Conference on Machine Learning</i> (Vol. 202, pp. 10323–10337). Honolulu, Hawaii, HI, United States: ML Research Press.","chicago":"Frantar, Elias, and Dan-Adrian Alistarh. “SparseGPT: Massive Language Models Can Be Accurately Pruned in One-Shot.” In <i>Proceedings of the 40th International Conference on Machine Learning</i>, 202:10323–37. ML Research Press, 2023."},"intvolume":"       202","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"07","_id":"14458","title":"SparseGPT: Massive language models can be accurately pruned in one-shot","oa":1,"publication_status":"published","acknowledgement":"The authors gratefully acknowledge funding from the European Research Council (ERC) under the European Union’s Horizon 2020 programme (grant agreement No. 805223 ScaleML), as well as experimental support from Eldar Kurtic, and from the IST Austria IT department, in particular Stefano Elefante, Andrei Hornoiu, and Alois Schloegl.","department":[{"_id":"DaAl"}],"alternative_title":["PMLR"],"date_updated":"2023-10-31T09:59:42Z","project":[{"name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223","_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020"}],"acknowledged_ssus":[{"_id":"ScienComp"}],"page":"10323-10337","quality_controlled":"1","date_created":"2023-10-29T23:01:16Z","year":"2023","volume":202,"article_processing_charge":"No","scopus_import":"1","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2301.00774"}],"publication_identifier":{"eissn":["2640-3498"]},"publisher":"ML Research Press","type":"conference","language":[{"iso":"eng"}],"arxiv":1,"date_published":"2023-07-30T00:00:00Z","oa_version":"Preprint","external_id":{"arxiv":["2301.00774"]},"conference":{"location":"Honolulu, Hawaii, HI, United States","end_date":"2023-07-29","name":"ICML: International Conference on Machine Learning","start_date":"2023-07-23"},"author":[{"last_name":"Frantar","full_name":"Frantar, Elias","first_name":"Elias","id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f"},{"last_name":"Alistarh","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X"}],"publication":"Proceedings of the 40th International Conference on Machine Learning"},{"article_processing_charge":"No","publication_identifier":{"issn":["1049-5258"],"isbn":["9781713845393"]},"scopus_import":"1","main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2021/file/7cfd5df443b4eb0d69886a583b33de4c-Paper.pdf","open_access":"1"}],"page":"14873-14886","quality_controlled":"1","date_created":"2022-06-26T22:01:35Z","year":"2021","date_updated":"2022-06-27T07:05:12Z","project":[{"call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223"}],"volume":34,"oa_version":"Published Version","conference":{"name":"NeurIPS: Neural Information Processing Systems","start_date":"2021-12-06","location":"Virtual, Online","end_date":"2021-12-14"},"external_id":{"arxiv":["2010.08222"]},"date_published":"2021-12-06T00:00:00Z","publication":"35th Conference on Neural Information Processing Systems","author":[{"id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f","full_name":"Frantar, Elias","first_name":"Elias","last_name":"Frantar"},{"id":"47beb3a5-07b5-11eb-9b87-b108ec578218","full_name":"Kurtic, Eldar","first_name":"Eldar","last_name":"Kurtic"},{"orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","last_name":"Alistarh"}],"publisher":"Curran Associates","language":[{"iso":"eng"}],"arxiv":1,"type":"conference","day":"06","intvolume":"        34","citation":{"short":"E. Frantar, E. Kurtic, D.-A. Alistarh, in:, 35th Conference on Neural Information Processing Systems, Curran Associates, 2021, pp. 14873–14886.","ama":"Frantar E, Kurtic E, Alistarh D-A. M-FAC: Efficient matrix-free approximations of second-order information. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Curran Associates; 2021:14873-14886.","apa":"Frantar, E., Kurtic, E., &#38; Alistarh, D.-A. (2021). M-FAC: Efficient matrix-free approximations of second-order information. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 14873–14886). Virtual, Online: Curran Associates.","ista":"Frantar E, Kurtic E, Alistarh D-A. 2021. M-FAC: Efficient matrix-free approximations of second-order information. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 14873–14886.","chicago":"Frantar, Elias, Eldar Kurtic, and Dan-Adrian Alistarh. “M-FAC: Efficient Matrix-Free Approximations of Second-Order Information.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:14873–86. Curran Associates, 2021.","ieee":"E. Frantar, E. Kurtic, and D.-A. Alistarh, “M-FAC: Efficient matrix-free approximations of second-order information,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 14873–14886.","mla":"Frantar, Elias, et al. “M-FAC: Efficient Matrix-Free Approximations of Second-Order Information.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Curran Associates, 2021, pp. 14873–86."},"ec_funded":1,"status":"public","abstract":[{"lang":"eng","text":"Efficiently approximating local curvature information of the loss function is a key tool for optimization and compression of deep neural networks. Yet, most existing methods to approximate second-order information have high computational\r\nor storage costs, which limits their practicality. In this work, we investigate matrix-free, linear-time approaches for estimating Inverse-Hessian Vector Products (IHVPs) for the case when the Hessian can be approximated as a sum of rank-one matrices, as in the classic approximation of the Hessian by the empirical Fisher matrix. We propose two new algorithms: the first is tailored towards network compression and can compute the IHVP for dimension d, if the Hessian is given as a sum of m rank-one matrices, using O(dm2) precomputation, O(dm) cost for computing the IHVP, and query cost O(m) for any single element of the inverse Hessian. The second algorithm targets an optimization setting, where we wish to compute the product between the inverse Hessian, estimated over a sliding window of optimization steps, and a given gradient direction, as required for preconditioned SGD. We give an algorithm with cost O(dm + m2) for computing the IHVP and O(dm + m3) for adding or removing any gradient from the sliding window. These\r\ntwo algorithms yield state-of-the-art results for network pruning and optimization with lower computational overhead relative to existing second-order methods. Implementations are available at [9] and [17]."}],"acknowledgement":"We gratefully acknowledge funding the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML), as well as computational support from Amazon Web Services (AWS) EC2.","department":[{"_id":"DaAl"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"12","_id":"11463","oa":1,"title":"M-FAC: Efficient matrix-free approximations of second-order information","publication_status":"published"},{"date_published":"2020-07-12T00:00:00Z","external_id":{"arxiv":["2002.10384"]},"oa_version":"Published Version","conference":{"location":"Online","end_date":"2020-07-18","name":"ICML: International Conference on Machine Learning","start_date":"2020-07-12"},"author":[{"last_name":"Konstantinov","first_name":"Nikola H","full_name":"Konstantinov, Nikola H","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87"},{"id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f","first_name":"Elias","full_name":"Frantar, Elias","last_name":"Frantar"},{"last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","last_name":"Lampert","full_name":"Lampert, Christoph","first_name":"Christoph"}],"ddc":["000"],"publication":"Proceedings of the 37th International Conference on Machine Learning","publisher":"ML Research Press","type":"conference","file_date_updated":"2021-02-15T09:00:01Z","arxiv":1,"language":[{"iso":"eng"}],"article_processing_charge":"No","scopus_import":"1","publication_identifier":{"issn":["2640-3498"]},"project":[{"call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223","_id":"268A44D6-B435-11E9-9278-68D0E5697425"}],"acknowledged_ssus":[{"_id":"ScienComp"}],"date_updated":"2023-09-07T13:42:08Z","year":"2020","date_created":"2020-11-05T15:25:58Z","page":"5416-5425","quality_controlled":"1","file":[{"checksum":"cc755d0054bc4b2be778ea7aa7884d2f","relation":"main_file","creator":"dernst","file_size":281286,"file_name":"2020_PMLR_Konstantinov.pdf","date_updated":"2021-02-15T09:00:01Z","success":1,"access_level":"open_access","date_created":"2021-02-15T09:00:01Z","file_id":"9120","content_type":"application/pdf"}],"volume":119,"department":[{"_id":"DaAl"},{"_id":"ChLa"}],"acknowledgement":"Dan Alistarh is supported in part by the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML). This research was supported by the Scientific Service Units (SSU) of IST Austria through resources provided by Scientific Computing (SciComp).","month":"07","_id":"8724","user_id":"3E5EF7F0-F248-11E8-B48F-1D18A9856A87","publication_status":"published","related_material":{"link":[{"url":"http://proceedings.mlr.press/v119/konstantinov20a/konstantinov20a-supp.pdf","relation":"supplementary_material"}],"record":[{"id":"10799","relation":"dissertation_contains","status":"public"}]},"oa":1,"title":"On the sample complexity of adversarial multi-source PAC learning","has_accepted_license":"1","day":"12","citation":{"chicago":"Konstantinov, Nikola H, Elias Frantar, Dan-Adrian Alistarh, and Christoph Lampert. “On the Sample Complexity of Adversarial Multi-Source PAC Learning.” In <i>Proceedings of the 37th International Conference on Machine Learning</i>, 119:5416–25. ML Research Press, 2020.","ista":"Konstantinov NH, Frantar E, Alistarh D-A, Lampert C. 2020. On the sample complexity of adversarial multi-source PAC learning. Proceedings of the 37th International Conference on Machine Learning. ICML: International Conference on Machine Learning vol. 119, 5416–5425.","apa":"Konstantinov, N. H., Frantar, E., Alistarh, D.-A., &#38; Lampert, C. (2020). On the sample complexity of adversarial multi-source PAC learning. In <i>Proceedings of the 37th International Conference on Machine Learning</i> (Vol. 119, pp. 5416–5425). Online: ML Research Press.","mla":"Konstantinov, Nikola H., et al. “On the Sample Complexity of Adversarial Multi-Source PAC Learning.” <i>Proceedings of the 37th International Conference on Machine Learning</i>, vol. 119, ML Research Press, 2020, pp. 5416–25.","ieee":"N. H. Konstantinov, E. Frantar, D.-A. Alistarh, and C. Lampert, “On the sample complexity of adversarial multi-source PAC learning,” in <i>Proceedings of the 37th International Conference on Machine Learning</i>, Online, 2020, vol. 119, pp. 5416–5425.","short":"N.H. Konstantinov, E. Frantar, D.-A. Alistarh, C. Lampert, in:, Proceedings of the 37th International Conference on Machine Learning, ML Research Press, 2020, pp. 5416–5425.","ama":"Konstantinov NH, Frantar E, Alistarh D-A, Lampert C. On the sample complexity of adversarial multi-source PAC learning. In: <i>Proceedings of the 37th International Conference on Machine Learning</i>. Vol 119. ML Research Press; 2020:5416-5425."},"intvolume":"       119","ec_funded":1,"abstract":[{"text":"We study the problem of learning from multiple untrusted data sources, a scenario of increasing practical relevance given the recent emergence of crowdsourcing and collaborative learning paradigms. Specifically, we analyze the situation in which a learning system obtains datasets from multiple sources, some of which might be biased or even adversarially perturbed. It is\r\nknown that in the single-source case, an adversary with the power to corrupt a fixed fraction of the training data can prevent PAC-learnability, that is, even in the limit of infinitely much training data, no learning system can approach the optimal test error. In this work we show that, surprisingly, the same is not true in the multi-source setting, where the adversary can arbitrarily\r\ncorrupt a fixed fraction of the data sources. Our main results are a generalization bound that provides finite-sample guarantees for this learning setting, as well as corresponding lower bounds. Besides establishing PAC-learnability our results also show that in a cooperative learning setting sharing data with other parties has provable benefits, even if some\r\nparticipants are malicious. ","lang":"eng"}],"status":"public"}]