[{"department":[{"_id":"ChLa"}],"acknowledgement":"The authors thank Eugenia Iofinova and Bernd Prach for providing feedback on early versions of this paper. This publication was made possible by an ETH AI Center postdoctoral fellowship to Nikola Konstantinov.","date_created":"2022-02-28T14:05:42Z","article_processing_charge":"No","publication_identifier":{"eissn":["1533-7928"],"issn":["1532-4435"]},"article_type":"original","external_id":{"arxiv":["2102.06004"]},"title":"Fairness-aware PAC learning from corrupted data","volume":23,"year":"2022","publication":"Journal of Machine Learning Research","page":"1-60","related_material":{"record":[{"id":"13241","status":"public","relation":"shorter_version"},{"status":"public","id":"10799","relation":"dissertation_contains"}]},"type":"journal_article","intvolume":"        23","publisher":"ML Research Press","author":[{"last_name":"Konstantinov","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87","orcid":"0009-0009-5204-7621","first_name":"Nikola H","full_name":"Konstantinov, Nikola H"},{"full_name":"Lampert, Christoph","first_name":"Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","orcid":"0000-0002-4561-241X"}],"_id":"10802","file_date_updated":"2022-07-12T15:08:28Z","has_accepted_license":"1","quality_controlled":"1","scopus_import":"1","citation":{"chicago":"Konstantinov, Nikola H, and Christoph Lampert. “Fairness-Aware PAC Learning from Corrupted Data.” <i>Journal of Machine Learning Research</i>. ML Research Press, 2022.","apa":"Konstantinov, N. H., &#38; Lampert, C. (2022). Fairness-aware PAC learning from corrupted data. <i>Journal of Machine Learning Research</i>. ML Research Press.","short":"N.H. Konstantinov, C. Lampert, Journal of Machine Learning Research 23 (2022) 1–60.","ama":"Konstantinov NH, Lampert C. Fairness-aware PAC learning from corrupted data. <i>Journal of Machine Learning Research</i>. 2022;23:1-60.","ista":"Konstantinov NH, Lampert C. 2022. Fairness-aware PAC learning from corrupted data. Journal of Machine Learning Research. 23, 1–60.","ieee":"N. H. Konstantinov and C. Lampert, “Fairness-aware PAC learning from corrupted data,” <i>Journal of Machine Learning Research</i>, vol. 23. ML Research Press, pp. 1–60, 2022.","mla":"Konstantinov, Nikola H., and Christoph Lampert. “Fairness-Aware PAC Learning from Corrupted Data.” <i>Journal of Machine Learning Research</i>, vol. 23, ML Research Press, 2022, pp. 1–60."},"keyword":["Fairness","robustness","data poisoning","trustworthy machine learning","PAC learning"],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","tmp":{"short":"CC BY (4.0)","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"month":"05","date_published":"2022-05-01T00:00:00Z","status":"public","publication_status":"published","abstract":[{"text":"Addressing fairness concerns about machine learning models is a crucial step towards their long-term adoption in real-world automated systems. While many approaches have been developed for training fair models from data, little is known about the robustness of these methods to data corruption. In this work we consider fairness-aware learning under worst-case data manipulations. We show that an adversary can in some situations force any learner to return an overly biased classifier, regardless of the sample size and with or without degrading\r\naccuracy, and that the strength of the excess bias increases for learning problems with underrepresented protected groups in the data. We also prove that our hardness results are tight up to constant factors. To this end, we study two natural learning algorithms that optimize for both accuracy and fairness and show that these algorithms enjoy guarantees that are order-optimal in terms of the corruption ratio and the protected groups frequencies in the large data\r\nlimit.","lang":"eng"}],"ddc":["004"],"corr_author":"1","file":[{"access_level":"open_access","creator":"kschuh","date_updated":"2022-07-12T15:08:28Z","relation":"main_file","content_type":"application/pdf","checksum":"9cac897b54a0ddf3a553a2c33e88cfda","file_id":"11570","date_created":"2022-07-12T15:08:28Z","file_size":551862,"file_name":"2022_JournalMachineLearningResearch_Konstantinov.pdf","success":1}],"day":"01","language":[{"iso":"eng"}],"oa":1,"date_updated":"2026-04-07T14:19:48Z","oa_version":"Published Version","arxiv":1},{"publication_identifier":{"eissn":["1533-7928"],"issn":["1532-4435"]},"article_processing_charge":"No","date_created":"2022-05-29T22:01:54Z","acknowledgement":"We would like to thank Mert Pilanci for several exploratory discussions in the early stage\r\nof the project, Jan Maas for clarifications about Jordan et al. (1998), and Max Zimmer for\r\nsuggestive numerical experiments. A. Shevchenko and M. Mondelli are partially supported\r\nby the 2019 Lopez-Loreta Prize. V. Kungurtsev acknowledges support to the OP VVV\r\nproject CZ.02.1.01/0.0/0.0/16 019/0000765 Research Center for Informatics.\r\n","department":[{"_id":"MaMo"},{"_id":"DaAl"}],"related_material":{"record":[{"status":"public","id":"17465","relation":"dissertation_contains"}],"link":[{"relation":"other","url":"https://www.jmlr.org/papers/v23/21-1365.html"}]},"page":"1-55","project":[{"name":"Prix Lopez-Loretta 2019 - Marco Mondelli","_id":"059876FA-7A3F-11EA-A408-12923DDC885E"}],"publication":"Journal of Machine Learning Research","year":"2022","volume":23,"article_type":"original","title":"Mean-field analysis of piecewise linear solutions for wide ReLU networks","external_id":{"arxiv":["2111.02278"]},"publisher":"Journal of Machine Learning Research","intvolume":"        23","type":"journal_article","citation":{"apa":"Shevchenko, A., Kungurtsev, V., &#38; Mondelli, M. (2022). Mean-field analysis of piecewise linear solutions for wide ReLU networks. <i>Journal of Machine Learning Research</i>. Journal of Machine Learning Research.","short":"A. Shevchenko, V. Kungurtsev, M. Mondelli, Journal of Machine Learning Research 23 (2022) 1–55.","chicago":"Shevchenko, Alexander, Vyacheslav Kungurtsev, and Marco Mondelli. “Mean-Field Analysis of Piecewise Linear Solutions for Wide ReLU Networks.” <i>Journal of Machine Learning Research</i>. Journal of Machine Learning Research, 2022.","mla":"Shevchenko, Alexander, et al. “Mean-Field Analysis of Piecewise Linear Solutions for Wide ReLU Networks.” <i>Journal of Machine Learning Research</i>, vol. 23, no. 130, Journal of Machine Learning Research, 2022, pp. 1–55.","ieee":"A. Shevchenko, V. Kungurtsev, and M. Mondelli, “Mean-field analysis of piecewise linear solutions for wide ReLU networks,” <i>Journal of Machine Learning Research</i>, vol. 23, no. 130. Journal of Machine Learning Research, pp. 1–55, 2022.","ista":"Shevchenko A, Kungurtsev V, Mondelli M. 2022. Mean-field analysis of piecewise linear solutions for wide ReLU networks. Journal of Machine Learning Research. 23(130), 1–55.","ama":"Shevchenko A, Kungurtsev V, Mondelli M. Mean-field analysis of piecewise linear solutions for wide ReLU networks. <i>Journal of Machine Learning Research</i>. 2022;23(130):1-55."},"scopus_import":"1","quality_controlled":"1","has_accepted_license":"1","file_date_updated":"2022-05-30T08:22:55Z","_id":"11420","author":[{"first_name":"Aleksandr","last_name":"Shevchenko","id":"F2B06EC2-C99E-11E9-89F0-752EE6697425","full_name":"Shevchenko, Aleksandr"},{"full_name":"Kungurtsev, Vyacheslav","last_name":"Kungurtsev","first_name":"Vyacheslav"},{"first_name":"Marco","last_name":"Mondelli","id":"27EB676C-8706-11E9-9510-7717E6697425","orcid":"0000-0002-3242-7020","full_name":"Mondelli, Marco"}],"month":"04","tmp":{"short":"CC BY (4.0)","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"user_id":"8b945eb4-e2f2-11eb-945a-df72226e66a9","ddc":["000"],"publication_status":"published","abstract":[{"lang":"eng","text":"Understanding the properties of neural networks trained via stochastic gradient descent (SGD) is at the heart of the theory of deep learning. In this work, we take a mean-field view, and consider a two-layer ReLU network trained via noisy-SGD for a univariate regularized regression problem. Our main result is that SGD with vanishingly small noise injected in the gradients is biased towards a simple solution: at convergence, the ReLU network implements a piecewise linear map of the inputs, and the number of “knot” points -- i.e., points where the tangent of the ReLU network estimator changes -- between two consecutive training inputs is at most three. In particular, as the number of neurons of the network grows, the SGD dynamics is captured by the solution of a gradient flow and, at convergence, the distribution of the weights approaches the unique minimizer of a related free energy, which has a Gibbs form. Our key technical contribution consists in the analysis of the estimator resulting from this minimizer: we show that its second derivative vanishes everywhere, except at some specific locations which represent the “knot” points. We also provide empirical evidence that knots at locations distinct from the data points might occur, as predicted by our theory."}],"status":"public","date_published":"2022-04-01T00:00:00Z","day":"01","file":[{"checksum":"d4ff5d1affb34848b5c5e4002483fc62","file_id":"11422","date_created":"2022-05-30T08:22:55Z","file_size":1521701,"file_name":"21-1365.pdf","success":1,"access_level":"open_access","creator":"cchlebak","date_updated":"2022-05-30T08:22:55Z","relation":"main_file","content_type":"application/pdf"}],"corr_author":"1","arxiv":1,"date_updated":"2026-05-02T22:30:05Z","oa_version":"Published Version","issue":"130","oa":1,"language":[{"iso":"eng"}]},{"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","tmp":{"short":"CC BY (4.0)","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"month":"09","main_file_link":[{"url":"https://www.jmlr.org/papers/v22/21-0366.html","open_access":"1"}],"date_published":"2021-09-01T00:00:00Z","status":"public","publication_status":"published","abstract":[{"text":"The growing energy and performance costs of deep learning have driven the community to reduce the size of neural networks by selectively pruning components. Similarly to their biological counterparts, sparse networks generalize just as well, sometimes even better than, the original dense networks. Sparsity promises to reduce the memory footprint of regular networks to fit mobile devices, as well as shorten training time for ever growing networks. In this paper, we survey prior work on sparsity in deep learning and provide an extensive tutorial of sparsification for both inference and training. We describe approaches to remove and add elements of neural networks, different training strategies to achieve model sparsity, and mechanisms to exploit sparsity in practice. Our work distills ideas from more than 300 research papers and provides guidance to practitioners who wish to utilize sparsity today, as well as to researchers whose goal is to push the frontier forward. We include the necessary background on mathematical methods in sparsification, describe phenomena such as early structure adaptation, the intricate relations between sparsity and the training process, and show techniques for achieving acceleration on real hardware. We also define a metric of pruned parameter efficiency that could serve as a baseline for comparison of different sparse networks. We close by speculating on how sparsity can improve future workloads and outline major open problems in the field.","lang":"eng"}],"ddc":["000"],"corr_author":"1","file":[{"access_level":"open_access","creator":"cziletti","date_updated":"2021-10-27T15:34:18Z","relation":"main_file","content_type":"application/pdf","checksum":"3389d9d01fc58f8fb4c1a53e14a8abbf","file_id":"10192","date_created":"2021-10-27T15:34:18Z","file_size":3527521,"file_name":"2021_JMachLearnRes_Hoefler.pdf","success":1}],"day":"01","oa":1,"language":[{"iso":"eng"}],"issue":"241","oa_version":"Published Version","date_updated":"2025-06-26T11:53:12Z","arxiv":1,"department":[{"_id":"DaAl"}],"date_created":"2021-10-24T22:01:34Z","acknowledgement":"We thank Doug Burger, Steve Scott, Marco Heddes, and the respective teams at Microsoft for inspiring discussions on the topic. We thank Angelika Steger for uplifting debates about the connections to biological brains, Sidak Pal Singh for his support regarding experimental results, and Utku Evci as well as Xin Wang for comments on previous versions of this\r\nwork. Special thanks go to Bernhard Schölkopf, our JMLR editor Samy Bengio, and the three anonymous reviewers who provided excellent comprehensive, pointed, and deep review comments that improved the quality of our manuscript significantly.","article_processing_charge":"No","publication_identifier":{"eissn":["1533-7928"],"issn":["1532-4435"]},"article_type":"original","external_id":{"arxiv":["2102.00554"]},"title":"Sparsity in deep learning: Pruning and growth for efficient inference and training in neural networks","volume":22,"year":"2021","publication":"Journal of Machine Learning Research","OA_place":"publisher","page":"1-124","type":"journal_article","intvolume":"        22","publisher":"ML Research Press","author":[{"full_name":"Hoefler, Torsten","last_name":"Hoefler","first_name":"Torsten"},{"full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Ben-Nun","first_name":"Tal","full_name":"Ben-Nun, Tal"},{"first_name":"Nikoli","last_name":"Dryden","full_name":"Dryden, Nikoli"},{"full_name":"Peste, Elena-Alexandra","id":"32D78294-F248-11E8-B48F-1D18A9856A87","last_name":"Peste","first_name":"Elena-Alexandra"}],"file_date_updated":"2021-10-27T15:34:18Z","_id":"10180","has_accepted_license":"1","scopus_import":"1","quality_controlled":"1","citation":{"apa":"Hoefler, T., Alistarh, D.-A., Ben-Nun, T., Dryden, N., &#38; Krumes, A. (2021). Sparsity in deep learning: Pruning and growth for efficient inference and training in neural networks. <i>Journal of Machine Learning Research</i>. ML Research Press.","short":"T. Hoefler, D.-A. Alistarh, T. Ben-Nun, N. Dryden, A. Krumes, Journal of Machine Learning Research 22 (2021) 1–124.","chicago":"Hoefler, Torsten, Dan-Adrian Alistarh, Tal Ben-Nun, Nikoli Dryden, and Alexandra Krumes. “Sparsity in Deep Learning: Pruning and Growth for Efficient Inference and Training in Neural Networks.” <i>Journal of Machine Learning Research</i>. ML Research Press, 2021.","ieee":"T. Hoefler, D.-A. Alistarh, T. Ben-Nun, N. Dryden, and A. Krumes, “Sparsity in deep learning: Pruning and growth for efficient inference and training in neural networks,” <i>Journal of Machine Learning Research</i>, vol. 22, no. 241. ML Research Press, pp. 1–124, 2021.","mla":"Hoefler, Torsten, et al. “Sparsity in Deep Learning: Pruning and Growth for Efficient Inference and Training in Neural Networks.” <i>Journal of Machine Learning Research</i>, vol. 22, no. 241, ML Research Press, 2021, pp. 1–124.","ista":"Hoefler T, Alistarh D-A, Ben-Nun T, Dryden N, Krumes A. 2021. Sparsity in deep learning: Pruning and growth for efficient inference and training in neural networks. Journal of Machine Learning Research. 22(241), 1–124.","ama":"Hoefler T, Alistarh D-A, Ben-Nun T, Dryden N, Krumes A. Sparsity in deep learning: Pruning and growth for efficient inference and training in neural networks. <i>Journal of Machine Learning Research</i>. 2021;22(241):1-124."}},{"date_published":"2021-04-01T00:00:00Z","status":"public","abstract":[{"lang":"eng","text":"As the size and complexity of models and datasets grow, so does the need for communication-efficient variants of stochastic gradient descent that can be deployed to perform parallel model training. One popular communication-compression method for data-parallel SGD is QSGD (Alistarh et al., 2017), which quantizes and encodes gradients to reduce communication costs. The baseline variant of QSGD provides strong theoretical guarantees, however, for practical purposes, the authors proposed a heuristic variant which we call QSGDinf, which demonstrated impressive empirical gains for distributed training of large neural networks. In this paper, we build on this work to propose a new gradient quantization scheme, and show that it has both stronger theoretical guarantees than QSGD, and matches and exceeds the empirical performance of the QSGDinf heuristic and of other compression methods."}],"publication_status":"published","ddc":["000"],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","tmp":{"short":"CC BY (4.0)","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"month":"04","main_file_link":[{"url":"https://www.jmlr.org/papers/v22/20-255.html","open_access":"1"}],"language":[{"iso":"eng"}],"oa":1,"issue":"114","oa_version":"Published Version","date_updated":"2025-07-10T12:01:54Z","arxiv":1,"corr_author":"1","file":[{"access_level":"open_access","creator":"asandaue","date_updated":"2021-06-23T07:09:41Z","relation":"main_file","content_type":"application/pdf","checksum":"6428aa8bcb67768b6949c99b55d5281d","file_id":"9595","date_created":"2021-06-23T07:09:41Z","file_size":11237154,"file_name":"2021_JournalOfMachineLearningResearch_Ramezani-Kebrya.pdf","success":1}],"day":"01","external_id":{"arxiv":["1908.06077"]},"article_type":"original","title":"NUQSGD: Provably communication-efficient data-parallel SGD via nonuniform quantization","volume":22,"year":"2021","publication":"Journal of Machine Learning Research","page":"1−43","department":[{"_id":"DaAl"}],"date_created":"2021-06-20T22:01:33Z","article_processing_charge":"No","publication_identifier":{"eissn":["1533-7928"],"issn":["1532-4435"]},"author":[{"last_name":"Ramezani-Kebrya","first_name":"Ali","full_name":"Ramezani-Kebrya, Ali"},{"full_name":"Faghri, Fartash","last_name":"Faghri","first_name":"Fartash"},{"last_name":"Markov","first_name":"Ilya","full_name":"Markov, Ilya"},{"last_name":"Aksenov","id":"2980135A-F248-11E8-B48F-1D18A9856A87","first_name":"Vitalii","full_name":"Aksenov, Vitalii"},{"orcid":"0000-0003-3650-940X","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian"},{"last_name":"Roy","first_name":"Daniel M.","full_name":"Roy, Daniel M."}],"file_date_updated":"2021-06-23T07:09:41Z","_id":"9571","has_accepted_license":"1","quality_controlled":"1","scopus_import":"1","citation":{"mla":"Ramezani-Kebrya, Ali, et al. “NUQSGD: Provably Communication-Efficient Data-Parallel SGD via Nonuniform Quantization.” <i>Journal of Machine Learning Research</i>, vol. 22, no. 114, Journal of Machine Learning Research, 2021, p. 1−43.","ista":"Ramezani-Kebrya A, Faghri F, Markov I, Aksenov V, Alistarh D-A, Roy DM. 2021. NUQSGD: Provably communication-efficient data-parallel SGD via nonuniform quantization. Journal of Machine Learning Research. 22(114), 1−43.","ieee":"A. Ramezani-Kebrya, F. Faghri, I. Markov, V. Aksenov, D.-A. Alistarh, and D. M. Roy, “NUQSGD: Provably communication-efficient data-parallel SGD via nonuniform quantization,” <i>Journal of Machine Learning Research</i>, vol. 22, no. 114. Journal of Machine Learning Research, p. 1−43, 2021.","ama":"Ramezani-Kebrya A, Faghri F, Markov I, Aksenov V, Alistarh D-A, Roy DM. NUQSGD: Provably communication-efficient data-parallel SGD via nonuniform quantization. <i>Journal of Machine Learning Research</i>. 2021;22(114):1−43.","apa":"Ramezani-Kebrya, A., Faghri, F., Markov, I., Aksenov, V., Alistarh, D.-A., &#38; Roy, D. M. (2021). NUQSGD: Provably communication-efficient data-parallel SGD via nonuniform quantization. <i>Journal of Machine Learning Research</i>. Journal of Machine Learning Research.","short":"A. Ramezani-Kebrya, F. Faghri, I. Markov, V. Aksenov, D.-A. Alistarh, D.M. Roy, Journal of Machine Learning Research 22 (2021) 1−43.","chicago":"Ramezani-Kebrya, Ali, Fartash Faghri, Ilya Markov, Vitalii Aksenov, Dan-Adrian Alistarh, and Daniel M. Roy. “NUQSGD: Provably Communication-Efficient Data-Parallel SGD via Nonuniform Quantization.” <i>Journal of Machine Learning Research</i>. Journal of Machine Learning Research, 2021."},"type":"journal_article","intvolume":"        22","publisher":"Journal of Machine Learning Research"}]