[{"language":[{"iso":"eng"}],"title":"Deep neural collapse is provably optimal for the deep unconstrained features model","external_id":{"arxiv":["2305.13165"]},"date_published":"2023-12-15T00:00:00Z","month":"12","year":"2023","date_created":"2024-02-02T11:17:41Z","conference":{"start_date":"2023-12-10","end_date":"2023-12-16","name":"NeurIPS: Neural Information Processing Systems","location":"New Orleans, LA, United States"},"department":[{"_id":"MaMo"},{"_id":"ChLa"}],"main_file_link":[{"open_access":"1","url":" https://doi.org/10.48550/arXiv.2305.13165"}],"alternative_title":["NeurIPS"],"status":"public","author":[{"id":"d64d6a8d-eb8e-11eb-b029-96fd216dec3c","first_name":"Peter","full_name":"Súkeník, Peter","last_name":"Súkeník"},{"first_name":"Marco","full_name":"Mondelli, Marco","last_name":"Mondelli","orcid":"0000-0002-3242-7020","id":"27EB676C-8706-11E9-9510-7717E6697425"},{"first_name":"Christoph","orcid":"0000-0001-8622-7887","last_name":"Lampert","full_name":"Lampert, Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87"}],"abstract":[{"text":"Neural collapse (NC) refers to the surprising structure of the last layer of deep neural networks in the terminal phase of gradient descent training. Recently, an increasing amount of experimental evidence has pointed to the propagation of NC to earlier layers of neural networks. However, while the NC in the last layer is well studied theoretically, much less is known about its multi-layered counterpart - deep neural collapse (DNC). In particular, existing work focuses either on linear layers or only on the last two layers at the price of an extra assumption. Our paper fills this gap by generalizing the established analytical framework for NC - the unconstrained features model - to multiple non-linear layers. Our key technical contribution is to show that, in a deep unconstrained features model, the unique global optimum for binary classification exhibits all the properties typical of DNC. This explains the existing experimental evidence of DNC. We also empirically show that (i) by optimizing deep unconstrained features models via gradient descent, the resulting solution agrees well with our theory, and (ii) trained networks recover the unconstrained features suitable for the occurrence of DNC, thus supporting the validity of this modeling principle.","lang":"eng"}],"type":"conference","publication_status":"inpress","citation":{"chicago":"Súkeník, Peter, Marco Mondelli, and Christoph Lampert. “Deep Neural Collapse Is Provably Optimal for the Deep Unconstrained Features Model.” In <i>37th Annual Conference on Neural Information Processing Systems</i>, n.d.","ieee":"P. Súkeník, M. Mondelli, and C. Lampert, “Deep neural collapse is provably optimal for the deep unconstrained features model,” in <i>37th Annual Conference on Neural Information Processing Systems</i>, New Orleans, LA, United States.","apa":"Súkeník, P., Mondelli, M., &#38; Lampert, C. (n.d.). Deep neural collapse is provably optimal for the deep unconstrained features model. In <i>37th Annual Conference on Neural Information Processing Systems</i>. New Orleans, LA, United States.","ista":"Súkeník P, Mondelli M, Lampert C. Deep neural collapse is provably optimal for the deep unconstrained features model. 37th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, NeurIPS, .","short":"P. Súkeník, M. Mondelli, C. Lampert, in:, 37th Annual Conference on Neural Information Processing Systems, n.d.","mla":"Súkeník, Peter, et al. “Deep Neural Collapse Is Provably Optimal for the Deep Unconstrained Features Model.” <i>37th Annual Conference on Neural Information Processing Systems</i>.","ama":"Súkeník P, Mondelli M, Lampert C. Deep neural collapse is provably optimal for the deep unconstrained features model. In: <i>37th Annual Conference on Neural Information Processing Systems</i>."},"day":"15","acknowledgement":"M. M. is partially supported by the 2019 Lopez-Loreta Prize. The authors would like to thank Eugenia Iofinova, Bernd Prach and Simone Bombari for valuable feedback on the manuscript.","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"}],"quality_controlled":"1","oa_version":"Preprint","_id":"14921","date_updated":"2024-09-10T13:03:19Z","oa":1,"article_processing_charge":"No","arxiv":1,"publication":"37th Annual Conference on Neural Information Processing Systems"},{"has_accepted_license":"1","department":[{"_id":"ChLa"}],"article_number":"2208.13499","main_file_link":[{"open_access":"1","url":" https://doi.org/10.48550/arXiv.2208.13499"}],"date_created":"2023-02-20T08:23:06Z","ddc":["004"],"doi":"10.48550/arXiv.2208.13499","year":"2022","month":"08","date_published":"2022-08-29T00:00:00Z","external_id":{"arxiv":["2208.13499"]},"title":"Generalization in Multi-objective machine learning","language":[{"iso":"eng"}],"arxiv":1,"publication":"arXiv","oa":1,"date_updated":"2023-02-21T08:24:55Z","article_processing_charge":"No","_id":"12662","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Preprint","publication_status":"submitted","citation":{"ama":"Súkeník P, Lampert C. Generalization in Multi-objective machine learning. <i>arXiv</i>. doi:<a href=\"https://doi.org/10.48550/arXiv.2208.13499\">10.48550/arXiv.2208.13499</a>","mla":"Súkeník, Peter, and Christoph Lampert. “Generalization in Multi-Objective Machine Learning.” <i>ArXiv</i>, 2208.13499, doi:<a href=\"https://doi.org/10.48550/arXiv.2208.13499\">10.48550/arXiv.2208.13499</a>.","short":"P. Súkeník, C. Lampert, ArXiv (n.d.).","ista":"Súkeník P, Lampert C. Generalization in Multi-objective machine learning. arXiv, 2208.13499.","ieee":"P. Súkeník and C. Lampert, “Generalization in Multi-objective machine learning,” <i>arXiv</i>. .","apa":"Súkeník, P., &#38; Lampert, C. (n.d.). Generalization in Multi-objective machine learning. <i>arXiv</i>. <a href=\"https://doi.org/10.48550/arXiv.2208.13499\">https://doi.org/10.48550/arXiv.2208.13499</a>","chicago":"Súkeník, Peter, and Christoph Lampert. “Generalization in Multi-Objective Machine Learning.” <i>ArXiv</i>, n.d. <a href=\"https://doi.org/10.48550/arXiv.2208.13499\">https://doi.org/10.48550/arXiv.2208.13499</a>."},"day":"29","type":"preprint","abstract":[{"lang":"eng","text":"Modern machine learning tasks often require considering not just one but multiple objectives. For example, besides the prediction quality, this could be the efficiency, robustness or fairness of the learned models, or any of their combinations. Multi-objective learning offers a natural framework for handling such problems without having to commit to early trade-offs. Surprisingly, statistical learning theory so far offers almost no insight into the generalization properties of multi-objective learning. In this work, we make first steps to fill this gap: we establish foundational generalization bounds for the multi-objective setting as well as generalization and excess bounds for learning with scalarizations. We also provide the first theoretical analysis of the relation between the Pareto-optimal sets of the true objectives and the Pareto-optimal sets of their empirical approximations from training data. In particular, we show a surprising asymmetry: all Pareto-optimal solutions can be approximated by empirically Pareto-optimal ones, but not vice versa."}],"status":"public","author":[{"id":"d64d6a8d-eb8e-11eb-b029-96fd216dec3c","last_name":"Súkeník","full_name":"Súkeník, Peter","first_name":"Peter"},{"orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph","last_name":"Lampert","first_name":"Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87"}]},{"has_accepted_license":"1","conference":{"location":"Baltimore, MD, United States","name":"International Conference on Machine Learning","end_date":"2022-07-23","start_date":"2022-07-17"},"file":[{"success":1,"creator":"chl","file_id":"12665","content_type":"application/pdf","relation":"main_file","date_created":"2023-02-20T08:30:10Z","checksum":"ab8695b1e24fb4fef4f1f9cd63ca8238","file_name":"sukeni-k22a.pdf","file_size":8470811,"date_updated":"2023-02-20T08:30:10Z","access_level":"open_access"}],"date_created":"2023-02-20T08:30:21Z","month":"07","date_published":"2022-07-19T00:00:00Z","publisher":"ML Research Press","scopus_import":"1","language":[{"iso":"eng"}],"publication":"Proceedings of the 39th International Conference on Machine Learning","page":"20697-20743","file_date_updated":"2023-02-20T08:30:10Z","day":"19","type":"conference","intvolume":"       162","status":"public","ddc":["004"],"year":"2022","external_id":{"arxiv":["2110.05365"]},"title":"Intriguing properties of input-dependent randomized smoothing","arxiv":1,"date_updated":"2023-02-23T10:03:47Z","volume":162,"oa":1,"article_processing_charge":"No","_id":"12664","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Published Version","quality_controlled":"1","publication_status":"published","citation":{"ieee":"P. Súkeník, A. Kuvshinov, and S. Günnemann, “Intriguing properties of input-dependent randomized smoothing,” in <i>Proceedings of the 39th International Conference on Machine Learning</i>, Baltimore, MD, United States, 2022, vol. 162, pp. 20697–20743.","apa":"Súkeník, P., Kuvshinov, A., &#38; Günnemann, S. (2022). Intriguing properties of input-dependent randomized smoothing. In <i>Proceedings of the 39th International Conference on Machine Learning</i> (Vol. 162, pp. 20697–20743). Baltimore, MD, United States: ML Research Press.","chicago":"Súkeník, Peter, Aleksei Kuvshinov, and Stephan Günnemann. “Intriguing Properties of Input-Dependent Randomized Smoothing.” In <i>Proceedings of the 39th International Conference on Machine Learning</i>, 162:20697–743. ML Research Press, 2022.","mla":"Súkeník, Peter, et al. “Intriguing Properties of Input-Dependent Randomized Smoothing.” <i>Proceedings of the 39th International Conference on Machine Learning</i>, vol. 162, ML Research Press, 2022, pp. 20697–743.","ama":"Súkeník P, Kuvshinov A, Günnemann S. Intriguing properties of input-dependent randomized smoothing. In: <i>Proceedings of the 39th International Conference on Machine Learning</i>. Vol 162. ML Research Press; 2022:20697-20743.","short":"P. Súkeník, A. Kuvshinov, S. Günnemann, in:, Proceedings of the 39th International Conference on Machine Learning, ML Research Press, 2022, pp. 20697–20743.","ista":"Súkeník P, Kuvshinov A, Günnemann S. 2022. Intriguing properties of input-dependent randomized smoothing. Proceedings of the 39th International Conference on Machine Learning. International Conference on Machine Learning vol. 162, 20697–20743."},"abstract":[{"lang":"eng","text":"Randomized smoothing is currently considered the state-of-the-art method to obtain certifiably robust classifiers. Despite its remarkable performance, the method is associated with various serious problems such as “certified accuracy waterfalls”, certification vs. accuracy trade-off, or even fairness issues. Input-dependent smoothing approaches have been proposed with intention of overcoming these flaws. However, we demonstrate that these methods lack formal guarantees and so the resulting certificates are not justified. We show that in general, the input-dependent smoothing suffers from the curse of dimensionality, forcing the variance function to have low semi-elasticity. On the other hand, we provide a theoretical and practical framework that enables the usage of input-dependent smoothing even in the presence of the curse of dimensionality, under strict restrictions. We present one concrete design of the smoothing variance function and test it on CIFAR10 and MNIST. Our design mitigates some of the problems of classical smoothing and is formally underlined, yet further improvement of the design is still necessary."}],"author":[{"id":"d64d6a8d-eb8e-11eb-b029-96fd216dec3c","last_name":"Súkeník","full_name":"Súkeník, Peter","first_name":"Peter"},{"last_name":"Kuvshinov","full_name":"Kuvshinov, Aleksei","first_name":"Aleksei"},{"full_name":"Günnemann, Stephan","last_name":"Günnemann","first_name":"Stephan"}]}]