[{"author":[{"full_name":"Barbier, Jean","last_name":"Barbier","first_name":"Jean"},{"first_name":"TianQi","full_name":"Hou, TianQi","last_name":"Hou"},{"first_name":"Marco","orcid":"0000-0002-3242-7020","last_name":"Mondelli","id":"27EB676C-8706-11E9-9510-7717E6697425","full_name":"Mondelli, Marco"},{"first_name":"Manuel","last_name":"Saenz","full_name":"Saenz, Manuel"}],"_id":"12536","external_id":{"arxiv":["2205.10009"]},"alternative_title":["NeurIPS"],"volume":35,"intvolume":"        35","article_processing_charge":"No","date_published":"2022-11-20T00:00:00Z","year":"2022","oa":1,"conference":{"location":"New Orleans, LA, United States","end_date":"2022-12-09","name":"NeurIPS: Neural Information Processing Systems","start_date":"2022-11-28"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","title":"The price of ignorance: How much does it cost to forget noise structure in low-rank matrix estimation?","date_updated":"2024-10-09T21:04:25Z","type":"conference","publication_identifier":{"isbn":["9781713871088"]},"date_created":"2023-02-10T13:45:41Z","publication":"36th Annual Conference on Neural Information Processing Systems","abstract":[{"text":"We consider the problem of estimating a rank-1 signal corrupted by structured rotationally invariant noise, and address the following question: how well do inference algorithms perform when the noise statistics is unknown and hence Gaussian noise is assumed? While the matched Bayes-optimal setting with unstructured noise is well understood, the analysis of this mismatched problem is only at its premises. In this paper, we make a step towards understanding the effect of the strong source of mismatch which is the noise statistics. Our main technical contribution is the rigorous analysis of a Bayes estimator and of an approximate message passing (AMP) algorithm, both of which incorrectly assume a Gaussian setup. The first result exploits the theory of spherical integrals and of low-rank matrix perturbations; the idea behind the second one is to design and analyze an artificial AMP which, by taking advantage of the flexibility in the denoisers, is able to \"correct\" the mismatch. Armed with these sharp asymptotic characterizations, we unveil a rich and often unexpected phenomenology. For example, despite AMP is in principle designed to efficiently compute the Bayes estimator, the former is outperformed by the latter in terms of mean-square error. We show that this performance gap is due to an incorrect estimation of the signal norm. In fact, when the SNR is large enough, the overlaps of the AMP and the Bayes estimator coincide, and they even match those of optimal estimators taking into account the structure of the noise.","lang":"eng"}],"quality_controlled":"1","oa_version":"Preprint","language":[{"iso":"eng"}],"arxiv":1,"day":"20","department":[{"_id":"MaMo"}],"publication_status":"published","corr_author":"1","month":"11","scopus_import":"1","acknowledgement":"M. Mondelli was partially supported by the 2019 Lopez-Loreta Prize. The authors acknowledge\r\ndiscussions with A. Krajenbrink, M. Robinson, A. Depope, N. Macris and F. Pourkamali.\r\n","citation":{"short":"J. Barbier, T. Hou, M. Mondelli, M. Saenz, in:, 36th Annual Conference on Neural Information Processing Systems, 2022.","mla":"Barbier, Jean, et al. “The Price of Ignorance: How Much Does It Cost to Forget Noise Structure in Low-Rank Matrix Estimation?” <i>36th Annual Conference on Neural Information Processing Systems</i>, vol. 35, 2022.","chicago":"Barbier, Jean, TianQi Hou, Marco Mondelli, and Manuel Saenz. “The Price of Ignorance: How Much Does It Cost to Forget Noise Structure in Low-Rank Matrix Estimation?” In <i>36th Annual Conference on Neural Information Processing Systems</i>, Vol. 35, 2022.","ista":"Barbier J, Hou T, Mondelli M, Saenz M. 2022. The price of ignorance: How much does it cost to forget noise structure in low-rank matrix estimation? 36th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, NeurIPS, vol. 35.","ieee":"J. Barbier, T. Hou, M. Mondelli, and M. Saenz, “The price of ignorance: How much does it cost to forget noise structure in low-rank matrix estimation?,” in <i>36th Annual Conference on Neural Information Processing Systems</i>, New Orleans, LA, United States, 2022, vol. 35.","apa":"Barbier, J., Hou, T., Mondelli, M., &#38; Saenz, M. (2022). The price of ignorance: How much does it cost to forget noise structure in low-rank matrix estimation? In <i>36th Annual Conference on Neural Information Processing Systems</i> (Vol. 35). New Orleans, LA, United States.","ama":"Barbier J, Hou T, Mondelli M, Saenz M. The price of ignorance: How much does it cost to forget noise structure in low-rank matrix estimation? In: <i>36th Annual Conference on Neural Information Processing Systems</i>. Vol 35. ; 2022."},"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2205.10009"}],"status":"public"},{"intvolume":"        35","volume":35,"alternative_title":["Advances in Neural Information Processing Systems"],"_id":"12537","external_id":{"arxiv":["2205.10217"]},"author":[{"last_name":"Bombari","id":"ca726dda-de17-11ea-bc14-f9da834f63aa","full_name":"Bombari, Simone","first_name":"Simone"},{"full_name":"Amani, Mohammad Hossein","last_name":"Amani","first_name":"Mohammad Hossein"},{"first_name":"Marco","orcid":"0000-0002-3242-7020","id":"27EB676C-8706-11E9-9510-7717E6697425","full_name":"Mondelli, Marco","last_name":"Mondelli"}],"project":[{"name":"Prix Lopez-Loretta 2019 - Marco Mondelli","_id":"059876FA-7A3F-11EA-A408-12923DDC885E"}],"publisher":"Neural Information Processing Systems Foundation","year":"2022","date_published":"2022-07-24T00:00:00Z","page":"7628-7640","article_processing_charge":"No","publication":"36th Conference on Neural Information Processing Systems","date_created":"2023-02-10T13:46:37Z","publication_identifier":{"isbn":["9781713871088"],"eissn":["1049-5258"]},"abstract":[{"lang":"eng","text":"The Neural Tangent Kernel (NTK) has emerged as a powerful tool to provide memorization, optimization and generalization guarantees in deep neural networks. A line of work has studied the NTK spectrum for two-layer and deep networks with at least a layer with Ω(N) neurons, N being the number of training samples. Furthermore, there is increasing evidence suggesting that deep networks with sub-linear layer widths are powerful memorizers and optimizers, as long as the number of parameters exceeds the number of samples. Thus, a natural open question is whether the NTK is well conditioned in such a challenging sub-linear setup. In this paper, we answer this question in the affirmative. Our key technical contribution is a lower bound on the smallest NTK eigenvalue for deep networks with the minimum possible over-parameterization: the number of parameters is roughly Ω(N) and, hence, the number of neurons is as little as Ω(N−−√). To showcase the applicability of our NTK bounds, we provide two results concerning memorization capacity and optimization guarantees for gradient descent training."}],"type":"conference","date_updated":"2025-05-14T11:28:22Z","title":"Memorization and optimization in deep neural networks with minimum over-parameterization","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","OA_type":"green","OA_place":"repository","oa":1,"conference":{"start_date":"2022-11-28","name":"NeurIPS: Neural Information Processing Systems","end_date":"2022-12-09","location":"New Orleans, LA, United States"},"citation":{"ista":"Bombari S, Amani MH, Mondelli M. 2022. Memorization and optimization in deep neural networks with minimum over-parameterization. 36th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 35, 7628–7640.","short":"S. Bombari, M.H. Amani, M. Mondelli, in:, 36th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2022, pp. 7628–7640.","chicago":"Bombari, Simone, Mohammad Hossein Amani, and Marco Mondelli. “Memorization and Optimization in Deep Neural Networks with Minimum Over-Parameterization.” In <i>36th Conference on Neural Information Processing Systems</i>, 35:7628–40. Neural Information Processing Systems Foundation, 2022.","mla":"Bombari, Simone, et al. “Memorization and Optimization in Deep Neural Networks with Minimum Over-Parameterization.” <i>36th Conference on Neural Information Processing Systems</i>, vol. 35, Neural Information Processing Systems Foundation, 2022, pp. 7628–40.","apa":"Bombari, S., Amani, M. H., &#38; Mondelli, M. (2022). Memorization and optimization in deep neural networks with minimum over-parameterization. In <i>36th Conference on Neural Information Processing Systems</i> (Vol. 35, pp. 7628–7640). New Orleans, LA, United States: Neural Information Processing Systems Foundation.","ama":"Bombari S, Amani MH, Mondelli M. Memorization and optimization in deep neural networks with minimum over-parameterization. In: <i>36th Conference on Neural Information Processing Systems</i>. Vol 35. Neural Information Processing Systems Foundation; 2022:7628-7640.","ieee":"S. Bombari, M. H. Amani, and M. Mondelli, “Memorization and optimization in deep neural networks with minimum over-parameterization,” in <i>36th Conference on Neural Information Processing Systems</i>, New Orleans, LA, United States, 2022, vol. 35, pp. 7628–7640."},"main_file_link":[{"open_access":"1","url":" https://doi.org/10.48550/arXiv.2205.10217"}],"status":"public","acknowledgement":"The authors were partially supported by the 2019 Lopez-Loreta prize, and they would like to thank\r\nQuynh Nguyen, Mahdi Soltanolkotabi and Adel Javanmard for helpful discussions.\r\n","publication_status":"published","department":[{"_id":"MaMo"}],"day":"24","month":"07","corr_author":"1","oa_version":"Preprint","language":[{"iso":"eng"}],"quality_controlled":"1","arxiv":1},{"volume":35,"intvolume":"        35","author":[{"first_name":"Michael","last_name":"Lohaus","full_name":"Lohaus, Michael"},{"last_name":"Kleindessner","full_name":"Kleindessner, Matthäus","first_name":"Matthäus"},{"first_name":"Krishnaram","last_name":"Kenthapadi","full_name":"Kenthapadi, Krishnaram"},{"orcid":"0000-0002-4850-0683","first_name":"Francesco","full_name":"Locatello, Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","last_name":"Locatello"},{"first_name":"Chris","full_name":"Russell, Chris","last_name":"Russell"}],"extern":"1","_id":"14106","external_id":{"arxiv":["2204.04440"]},"alternative_title":["Advances in Neural Information Processing Systems"],"year":"2022","publisher":"Neural Information Processing Systems Foundation","article_processing_charge":"No","page":"16548-16562","date_published":"2022-12-15T00:00:00Z","date_updated":"2024-10-14T12:27:01Z","type":"conference","abstract":[{"lang":"eng","text":"We show that deep networks trained to satisfy demographic parity often do so\r\nthrough a form of race or gender awareness, and that the more we force a network\r\nto be fair, the more accurately we can recover race or gender from the internal state\r\nof the network. Based on this observation, we investigate an alternative fairness\r\napproach: we add a second classification head to the network to explicitly predict\r\nthe protected attribute (such as race or gender) alongside the original task. After\r\ntraining the two-headed network, we enforce demographic parity by merging the\r\ntwo heads, creating a network with the same architecture as the original network.\r\nWe establish a close relationship between existing approaches and our approach\r\nby showing (1) that the decisions of a fair classifier are well-approximated by our\r\napproach, and (2) that an unfair and optimally accurate classifier can be recovered\r\nfrom a fair classifier and our second head predicting the protected attribute. We use\r\nour explicit formulation to argue that the existing fairness approaches, just as ours,\r\ndemonstrate disparate treatment and that they are likely to be unlawful in a wide\r\nrange of scenarios under US law."}],"publication_identifier":{"isbn":["9781713871088"]},"date_created":"2023-08-21T12:12:42Z","publication":"36th Conference on Neural Information Processing Systems","conference":{"start_date":"2022-11-28","name":"NeurIPS: Neural Information Processing Systems","end_date":"2022-12-09","location":"New Orleans, LA, United States"},"oa":1,"title":"Are two heads the same as one? Identifying disparate treatment in fair neural networks","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","scopus_import":"1","status":"public","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2204.04440"}],"citation":{"apa":"Lohaus, M., Kleindessner, M., Kenthapadi, K., Locatello, F., &#38; Russell, C. (2022). Are two heads the same as one? Identifying disparate treatment in fair neural networks. In <i>36th Conference on Neural Information Processing Systems</i> (Vol. 35, pp. 16548–16562). New Orleans, LA, United States: Neural Information Processing Systems Foundation.","ama":"Lohaus M, Kleindessner M, Kenthapadi K, Locatello F, Russell C. Are two heads the same as one? Identifying disparate treatment in fair neural networks. In: <i>36th Conference on Neural Information Processing Systems</i>. Vol 35. Neural Information Processing Systems Foundation; 2022:16548-16562.","ieee":"M. Lohaus, M. Kleindessner, K. Kenthapadi, F. Locatello, and C. Russell, “Are two heads the same as one? Identifying disparate treatment in fair neural networks,” in <i>36th Conference on Neural Information Processing Systems</i>, New Orleans, LA, United States, 2022, vol. 35, pp. 16548–16562.","ista":"Lohaus M, Kleindessner M, Kenthapadi K, Locatello F, Russell C. 2022. Are two heads the same as one? Identifying disparate treatment in fair neural networks. 36th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 35, 16548–16562.","mla":"Lohaus, Michael, et al. “Are Two Heads the Same as One? Identifying Disparate Treatment in Fair Neural Networks.” <i>36th Conference on Neural Information Processing Systems</i>, vol. 35, Neural Information Processing Systems Foundation, 2022, pp. 16548–62.","short":"M. Lohaus, M. Kleindessner, K. Kenthapadi, F. Locatello, C. Russell, in:, 36th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2022, pp. 16548–16562.","chicago":"Lohaus, Michael, Matthäus Kleindessner, Krishnaram Kenthapadi, Francesco Locatello, and Chris Russell. “Are Two Heads the Same as One? Identifying Disparate Treatment in Fair Neural Networks.” In <i>36th Conference on Neural Information Processing Systems</i>, 35:16548–62. Neural Information Processing Systems Foundation, 2022."},"arxiv":1,"language":[{"iso":"eng"}],"quality_controlled":"1","oa_version":"Preprint","month":"12","day":"15","publication_status":"published","department":[{"_id":"FrLo"}]},{"status":"public","main_file_link":[{"url":"https://arxiv.org/abs/2207.09239","open_access":"1"}],"citation":{"apa":"Wenzel, F., Dittadi, A., Gehler, P. V., Carl-Johann Simon-Gabriel, C.-J. S.-G., Horn, M., Zietlow, D., … Locatello, F. (2022). Assaying out-of-distribution generalization in transfer learning. In <i>36th Conference on Neural Information Processing Systems</i> (Vol. 35, pp. 7181–7198). New Orleans, LA, United States: Neural Information Processing Systems Foundation.","ama":"Wenzel F, Dittadi A, Gehler PV, et al. Assaying out-of-distribution generalization in transfer learning. In: <i>36th Conference on Neural Information Processing Systems</i>. Vol 35. Neural Information Processing Systems Foundation; 2022:7181-7198.","ieee":"F. Wenzel <i>et al.</i>, “Assaying out-of-distribution generalization in transfer learning,” in <i>36th Conference on Neural Information Processing Systems</i>, New Orleans, LA, United States, 2022, vol. 35, pp. 7181–7198.","ista":"Wenzel F, Dittadi A, Gehler PV, Carl-Johann Simon-Gabriel C-JS-G, Horn M, Zietlow D, Kernert D, Russell C, Brox T, Schiele B, Schölkopf B, Locatello F. 2022. Assaying out-of-distribution generalization in transfer learning. 36th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 35, 7181–7198.","short":"F. Wenzel, A. Dittadi, P.V. Gehler, C.-J.S.-G. Carl-Johann Simon-Gabriel, M. Horn, D. Zietlow, D. Kernert, C. Russell, T. Brox, B. Schiele, B. Schölkopf, F. Locatello, in:, 36th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2022, pp. 7181–7198.","mla":"Wenzel, Florian, et al. “Assaying Out-of-Distribution Generalization in Transfer Learning.” <i>36th Conference on Neural Information Processing Systems</i>, vol. 35, Neural Information Processing Systems Foundation, 2022, pp. 7181–98.","chicago":"Wenzel, Florian, Andrea Dittadi, Peter Vincent Gehler, Carl-Johann Simon-Gabriel Carl-Johann Simon-Gabriel, Max Horn, Dominik Zietlow, David Kernert, et al. “Assaying Out-of-Distribution Generalization in Transfer Learning.” In <i>36th Conference on Neural Information Processing Systems</i>, 35:7181–98. Neural Information Processing Systems Foundation, 2022."},"scopus_import":"1","month":"12","day":"15","publication_status":"published","department":[{"_id":"FrLo"}],"arxiv":1,"language":[{"iso":"eng"}],"quality_controlled":"1","oa_version":"Preprint","abstract":[{"lang":"eng","text":"Since out-of-distribution generalization is a generally ill-posed problem, various proxy targets (e.g., calibration, adversarial robustness, algorithmic corruptions, invariance across shifts) were studied across different research programs resulting in different recommendations. While sharing the same aspirational goal, these approaches have never been tested under the same\r\nexperimental conditions on real data. In this paper, we take a unified view of previous work, highlighting message discrepancies that we address empirically, and providing recommendations on how to measure the robustness of a model and how to improve it. To this end, we collect 172 publicly available dataset pairs for training and out-of-distribution evaluation of accuracy, calibration error, adversarial attacks, environment invariance, and synthetic corruptions. We fine-tune over 31k networks, from nine different architectures in the many- and\r\nfew-shot setting. Our findings confirm that in- and out-of-distribution accuracies tend to increase jointly, but show that their relation is largely dataset-dependent, and in general more nuanced and more complex than posited by previous, smaller scale studies."}],"publication_identifier":{"isbn":["9781713871088"]},"publication":"36th Conference on Neural Information Processing Systems","date_created":"2023-08-22T14:01:13Z","date_updated":"2023-09-06T10:34:43Z","type":"conference","title":"Assaying out-of-distribution generalization in transfer learning","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","conference":{"start_date":"2022-11-28","name":"NeurIPS: Neural Information Processing Systems","end_date":"2022-12-09","location":"New Orleans, LA, United States"},"oa":1,"publisher":"Neural Information Processing Systems Foundation","year":"2022","date_published":"2022-12-15T00:00:00Z","article_processing_charge":"No","page":"7181-7198","intvolume":"        35","volume":35,"_id":"14173","external_id":{"arxiv":["2207.09239"]},"extern":"1","alternative_title":["Advances in Neural Information Processing Systems"],"author":[{"first_name":"Florian","last_name":"Wenzel","full_name":"Wenzel, Florian"},{"first_name":"Andrea","full_name":"Dittadi, Andrea","last_name":"Dittadi"},{"first_name":"Peter Vincent","last_name":"Gehler","full_name":"Gehler, Peter Vincent"},{"first_name":"Carl-Johann Simon-Gabriel","last_name":"Carl-Johann Simon-Gabriel","full_name":"Carl-Johann Simon-Gabriel, Carl-Johann Simon-Gabriel"},{"first_name":"Max","last_name":"Horn","full_name":"Horn, Max"},{"first_name":"Dominik","last_name":"Zietlow","full_name":"Zietlow, Dominik"},{"last_name":"Kernert","full_name":"Kernert, David","first_name":"David"},{"full_name":"Russell, Chris","last_name":"Russell","first_name":"Chris"},{"last_name":"Brox","full_name":"Brox, Thomas","first_name":"Thomas"},{"first_name":"Bernt","full_name":"Schiele, Bernt","last_name":"Schiele"},{"first_name":"Bernhard","full_name":"Schölkopf, Bernhard","last_name":"Schölkopf"},{"first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","last_name":"Locatello"}]},{"article_processing_charge":"No","date_published":"2022-12-01T00:00:00Z","year":"2022","ddc":["000"],"publisher":"ML Research Press","author":[{"last_name":"Zhang","id":"2ce5da42-b2ea-11eb-bba5-9f264e9d002c","full_name":"Zhang, Yihan","first_name":"Yihan","orcid":"0000-0002-6465-6258"},{"last_name":"Weinberger","full_name":"Weinberger, Nir","first_name":"Nir"}],"alternative_title":["NeurIPS"],"_id":"17086","external_id":{"arxiv":["2206.02455"]},"volume":35,"intvolume":"        35","quality_controlled":"1","language":[{"iso":"eng"}],"oa_version":"Published Version","arxiv":1,"department":[{"_id":"MaMo"}],"publication_status":"published","day":"01","corr_author":"1","month":"12","acknowledgement":"Part of this work was done when YZ was a postdoc at Technion where he received funding from\r\nthe European Union’s Horizon 2020 research and innovation programme under grant agreement No 682203-ERC-[Inf-Speed-Tradeoff]. The work of of NW was supported in part by the Israel Science Foundation (ISF) under Grant 1782/22. NW is grateful to Guy Bresler for introducing him to this problem, for the initial ideas that led to this research, and for many helpful discussions on the topic.","scopus_import":"1","citation":{"ieee":"Y. Zhang and N. Weinberger, “Mean estimation in high-dimensional binary Markov Gaussian mixture models,” in <i>36th Conference on Neural Information Processing Systems</i>, New Orleans, LA, United States, 2022, vol. 35.","apa":"Zhang, Y., &#38; Weinberger, N. (2022). Mean estimation in high-dimensional binary Markov Gaussian mixture models. In <i>36th Conference on Neural Information Processing Systems</i> (Vol. 35). New Orleans, LA, United States: ML Research Press.","ama":"Zhang Y, Weinberger N. Mean estimation in high-dimensional binary Markov Gaussian mixture models. In: <i>36th Conference on Neural Information Processing Systems</i>. Vol 35. ML Research Press; 2022.","mla":"Zhang, Yihan, and Nir Weinberger. “Mean Estimation in High-Dimensional Binary Markov Gaussian Mixture Models.” <i>36th Conference on Neural Information Processing Systems</i>, vol. 35, ML Research Press, 2022.","short":"Y. Zhang, N. Weinberger, in:, 36th Conference on Neural Information Processing Systems, ML Research Press, 2022.","chicago":"Zhang, Yihan, and Nir Weinberger. “Mean Estimation in High-Dimensional Binary Markov Gaussian Mixture Models.” In <i>36th Conference on Neural Information Processing Systems</i>, Vol. 35. ML Research Press, 2022.","ista":"Zhang Y, Weinberger N. 2022. Mean estimation in high-dimensional binary Markov Gaussian mixture models. 36th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, NeurIPS, vol. 35."},"file":[{"success":1,"access_level":"open_access","content_type":"application/pdf","relation":"main_file","checksum":"05f6f9f8fc34e224e0cad045b9489030","file_id":"17392","date_created":"2024-08-05T09:44:49Z","file_name":"2022_NeurIPS_Zhang.pdf","date_updated":"2024-08-05T09:44:49Z","file_size":476307,"creator":"dernst"}],"status":"public","oa":1,"has_accepted_license":"1","conference":{"end_date":"2022-12-09","location":"New Orleans, LA, United States","start_date":"2022-11-28","name":"NeurIPS: Neural Information Processing Systems"},"title":"Mean estimation in high-dimensional binary Markov Gaussian mixture models","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","type":"conference","date_updated":"2024-08-05T09:48:58Z","file_date_updated":"2024-08-05T09:44:49Z","date_created":"2024-05-29T06:37:16Z","publication":"36th Conference on Neural Information Processing Systems","publication_identifier":{"isbn":["9781713871088"]},"abstract":[{"text":"We consider a high-dimensional mean estimation problem over a binary hidden Markov model, which illuminates the interplay between memory in data, sample size, dimension, and signal strength in statistical inference. In this model, an estimator observes n samples of a d-dimensional parameter vector θ∗∈Rd, multiplied by a random sign Si (1≤i≤n), and corrupted by isotropic standard Gaussian noise. The sequence of signs {Si}i∈[n]∈{−1,1}n is drawn from a stationary homogeneous Markov chain with flip probability δ∈[0,1/2]. As δ varies, this model smoothly interpolates two well-studied models: the Gaussian Location Model for which δ=0 and the Gaussian Mixture Model for which δ=1/2. Assuming that the estimator knows δ, we establish a nearly minimax optimal (up to logarithmic factors) estimation error rate, as a function of ∥θ∗∥,δ,d,n. We then provide an upper bound to the case of estimating δ, assuming a (possibly inaccurate) knowledge of θ∗. The bound is proved to be tight when θ∗ is an accurately known constant. These results are then combined to an algorithm which estimates θ∗ with δ unknown a priori, and theoretical guarantees on its error are stated.","lang":"eng"}]},{"intvolume":"        35","volume":35,"alternative_title":["NeurIPS"],"_id":"17087","external_id":{"arxiv":["2208.11580"]},"author":[{"first_name":"Elias","full_name":"Frantar, Elias","id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f","last_name":"Frantar"},{"last_name":"Singh","full_name":"Singh, Sidak Pal","id":"DD138E24-D89D-11E9-9DC0-DEF6E5697425","first_name":"Sidak Pal"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X"}],"ddc":["000"],"project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","call_identifier":"H2020","name":"Elastic Coordination for Scalable Machine Learning"}],"publisher":"ML Research Press","year":"2022","date_published":"2022-12-01T00:00:00Z","article_processing_charge":"No","publication":"36th Conference on Neural Information Processing Systems","date_created":"2024-05-29T06:38:26Z","publication_identifier":{"isbn":["9781713871088"]},"abstract":[{"text":"We consider the problem of model compression for deep neural networks (DNNs) in the challenging one-shot/post-training setting, in which we are given an accurate trained model, and must compress it without any retraining, based only on a small amount of calibration input data. This problem has become popular in view of the emerging software and hardware support for executing models compressed via pruning and/or quantization with speedup, and well-performing solutions have been proposed independently for both compression approaches.In this paper, we introduce a new compression framework which covers both weight pruning and quantization in a unified setting, is time- and space-efficient, and considerably improves upon the practical performance of existing post-training methods. At the technical level, our approach is based on an exact and efficient realization of the classical Optimal Brain Surgeon (OBS) framework of [LeCun, Denker, and Solla, 1990] extended to also cover weight quantization at the scale of modern DNNs. From the practical perspective, our experimental results show that it can improve significantly upon the compression-accuracy trade-offs of existing post-training methods, and that it can enable the accurate compound application of both pruning and quantization in a post-training setting.","lang":"eng"}],"type":"conference","date_updated":"2026-04-07T12:43:03Z","file_date_updated":"2024-08-05T09:25:39Z","title":"Optimal brain compression: A framework for accurate post-training quantization and pruning","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","has_accepted_license":"1","oa":1,"conference":{"start_date":"2022-11-28","name":"NeurIPS: Neural Information Processing Systems","end_date":"2022-12-09","location":"New Orleans, LA, United States"},"citation":{"apa":"Frantar, E., Singh, S. P., &#38; Alistarh, D.-A. (2022). Optimal brain compression: A framework for accurate post-training quantization and pruning. In <i>36th Conference on Neural Information Processing Systems</i> (Vol. 35). New Orleans, LA, United States: ML Research Press.","ama":"Frantar E, Singh SP, Alistarh D-A. Optimal brain compression: A framework for accurate post-training quantization and pruning. In: <i>36th Conference on Neural Information Processing Systems</i>. Vol 35. ML Research Press; 2022.","ieee":"E. Frantar, S. P. Singh, and D.-A. Alistarh, “Optimal brain compression: A framework for accurate post-training quantization and pruning,” in <i>36th Conference on Neural Information Processing Systems</i>, New Orleans, LA, United States, 2022, vol. 35.","ista":"Frantar E, Singh SP, Alistarh D-A. 2022. Optimal brain compression: A framework for accurate post-training quantization and pruning. 36th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, NeurIPS, vol. 35.","mla":"Frantar, Elias, et al. “Optimal Brain Compression: A Framework for Accurate Post-Training Quantization and Pruning.” <i>36th Conference on Neural Information Processing Systems</i>, vol. 35, ML Research Press, 2022.","chicago":"Frantar, Elias, Sidak Pal Singh, and Dan-Adrian Alistarh. “Optimal Brain Compression: A Framework for Accurate Post-Training Quantization and Pruning.” In <i>36th Conference on Neural Information Processing Systems</i>, Vol. 35. ML Research Press, 2022.","short":"E. Frantar, S.P. Singh, D.-A. Alistarh, in:, 36th Conference on Neural Information Processing Systems, ML Research Press, 2022."},"file":[{"file_size":491843,"creator":"dernst","date_updated":"2024-08-05T09:25:39Z","file_name":"2022_NeurIPS_Frantar.pdf","date_created":"2024-08-05T09:25:39Z","file_id":"17391","relation":"main_file","checksum":"38e7d75f578e8d2e207c81895e09f211","content_type":"application/pdf","success":1,"access_level":"open_access"}],"status":"public","scopus_import":"1","acknowledgement":"We gratefully acknowledge funding from the European Research Council (ERC) under the European Union’s Horizon 2020 programme (grant agreement No 805223 ScaleML), as well as computational support from AWS EC2. We thank Eldar Kurtic for providing us BERT code and pretrained models, and the Neural Magic Team, notably Michael Goin and Mark Kurtz, for support with their software. ","department":[{"_id":"DaAl"}],"publication_status":"published","ec_funded":1,"day":"01","related_material":{"record":[{"relation":"dissertation_contains","id":"17485","status":"public"}]},"month":"12","corr_author":"1","oa_version":"Submitted Version","language":[{"iso":"eng"}],"quality_controlled":"1","arxiv":1}]