[{"author":[{"first_name":"Aleksandr","id":"F2B06EC2-C99E-11E9-89F0-752EE6697425","full_name":"Shevchenko, Aleksandr","last_name":"Shevchenko"},{"last_name":"Kögler","id":"94ec913c-dc85-11ea-9058-e5051ab2428b","first_name":"Kevin","full_name":"Kögler, Kevin"},{"first_name":"Hamed","full_name":"Hassani, Hamed","last_name":"Hassani"},{"orcid":"0000-0002-3242-7020","full_name":"Mondelli, Marco","first_name":"Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","last_name":"Mondelli"}],"quality_controlled":"1","publication":"Proceedings of the 40th International Conference on Machine Learning","language":[{"iso":"eng"}],"year":"2023","publication_identifier":{"eissn":["2640-3498"]},"acknowledgement":"Aleksandr Shevchenko, Kevin Kogler and Marco Mondelli are supported by the 2019 Lopez-Loreta Prize. Hamed Hassani acknowledges the support by the NSF CIF award (1910056) and the NSF Institute for CORE Emerging Methods in Data Science (EnCORE).","status":"public","article_processing_charge":"No","date_created":"2023-10-29T23:01:17Z","date_published":"2023-07-30T00:00:00Z","external_id":{"arxiv":["2212.13468"]},"related_material":{"record":[{"relation":"dissertation_contains","status":"public","id":"17465"}]},"abstract":[{"lang":"eng","text":"Autoencoders are a popular model in many branches of machine learning and lossy data compression. However, their fundamental limits, the performance of gradient methods and the features learnt during optimization remain poorly understood, even in the two-layer setting. In fact, earlier work has considered either linear autoencoders or specific training regimes (leading to vanishing or diverging compression rates). Our paper addresses this gap by focusing on non-linear two-layer autoencoders trained in the challenging proportional regime in which the input dimension scales linearly with the size of the representation. Our results characterize the minimizers of the population risk, and show that such minimizers are achieved by gradient methods; their structure is also unveiled, thus leading to a concise description of the features obtained via training. For the special case of a sign activation function, our analysis establishes the fundamental limits for the lossy compression of Gaussian sources via (shallow) autoencoders. Finally, while the results are proved for Gaussian data, numerical simulations on standard datasets display the universality of the theoretical predictions."}],"page":"31151-31209","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","type":"conference","date_updated":"2026-05-30T22:30:05Z","_id":"14459","month":"07","title":"Fundamental limits of two-layer autoencoders, and achieving them with gradient methods","alternative_title":["PMLR"],"corr_author":"1","oa":1,"citation":{"short":"A. Shevchenko, K. Kögler, H. Hassani, M. Mondelli, in:, Proceedings of the 40th International Conference on Machine Learning, ML Research Press, 2023, pp. 31151–31209.","ama":"Shevchenko A, Kögler K, Hassani H, Mondelli M. Fundamental limits of two-layer autoencoders, and achieving them with gradient methods. In: <i>Proceedings of the 40th International Conference on Machine Learning</i>. Vol 202. ML Research Press; 2023:31151-31209.","chicago":"Shevchenko, Alexander, Kevin Kögler, Hamed Hassani, and Marco Mondelli. “Fundamental Limits of Two-Layer Autoencoders, and Achieving Them with Gradient Methods.” In <i>Proceedings of the 40th International Conference on Machine Learning</i>, 202:31151–209. ML Research Press, 2023.","mla":"Shevchenko, Alexander, et al. “Fundamental Limits of Two-Layer Autoencoders, and Achieving Them with Gradient Methods.” <i>Proceedings of the 40th International Conference on Machine Learning</i>, vol. 202, ML Research Press, 2023, pp. 31151–209.","ieee":"A. Shevchenko, K. Kögler, H. Hassani, and M. Mondelli, “Fundamental limits of two-layer autoencoders, and achieving them with gradient methods,” in <i>Proceedings of the 40th International Conference on Machine Learning</i>, Honolulu, Hawaii, HI, United States, 2023, vol. 202, pp. 31151–31209.","apa":"Shevchenko, A., Kögler, K., Hassani, H., &#38; Mondelli, M. (2023). Fundamental limits of two-layer autoencoders, and achieving them with gradient methods. In <i>Proceedings of the 40th International Conference on Machine Learning</i> (Vol. 202, pp. 31151–31209). Honolulu, Hawaii, HI, United States: ML Research Press.","ista":"Shevchenko A, Kögler K, Hassani H, Mondelli M. 2023. Fundamental limits of two-layer autoencoders, and achieving them with gradient methods. Proceedings of the 40th International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 202, 31151–31209."},"publisher":"ML Research Press","publication_status":"published","arxiv":1,"department":[{"_id":"MaMo"},{"_id":"DaAl"}],"intvolume":"       202","scopus_import":"1","volume":202,"project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"}],"conference":{"name":"ICML: International Conference on Machine Learning","location":"Honolulu, Hawaii, HI, United States","start_date":"2023-07-23","end_date":"2023-07-29"},"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2212.13468"}],"oa_version":"Preprint","day":"30"},{"acknowledgement":"This project has received funding from the European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie grant agreement No 840605. This work was supported in part by the Academy of Finland, Grants 314888 and 333837. The authors would also like to thank David Harris, Neven Villani, and the anonymous reviewers for their very helpful comments and feedback on previous versions of this work.","year":"2022","publication":"International Colloquium on Structural Information and Communication Complexity","language":[{"iso":"eng"}],"isi":1,"publication_identifier":{"issn":["0302-9743"],"eissn":["1611-3349"],"isbn":["9783031099922"]},"author":[{"last_name":"Balliu","first_name":"Alkida","full_name":"Balliu, Alkida"},{"full_name":"Hirvonen, Juho","first_name":"Juho","last_name":"Hirvonen"},{"last_name":"Melnyk","full_name":"Melnyk, Darya","first_name":"Darya"},{"last_name":"Olivetti","full_name":"Olivetti, Dennis","first_name":"Dennis"},{"last_name":"Rybicki","orcid":"0000-0002-6432-6646","full_name":"Rybicki, Joel","id":"334EFD2E-F248-11E8-B48F-1D18A9856A87","first_name":"Joel"},{"first_name":"Jukka","full_name":"Suomela, Jukka","last_name":"Suomela"}],"quality_controlled":"1","abstract":[{"text":"In this work we introduce the graph-theoretic notion of mendability: for each locally checkable graph problem we can define its mending radius, which captures the idea of how far one needs to modify a partial solution in order to “patch a hole.” We explore how mendability is connected to the existence of efficient algorithms, especially in distributed, parallel, and fault-tolerant settings. It is easy to see that O(1)-mendable problems are also solvable in O(log∗n) rounds in the LOCAL model of distributed computing. One of the surprises is that in paths and cycles, a converse also holds in the following sense: if a problem Π can be solved in O(log∗n), there is always a restriction Π′⊆Π that is still efficiently solvable but that is also O(1)-mendable. We also explore the structure of the landscape of mendability. For example, we show that in trees, the mending radius of any locally checkable problem is O(1), Θ(logn), or Θ(n), while in general graphs the structure is much more diverse.","lang":"eng"}],"page":"1-20","external_id":{"isi":["000876977400001"],"arxiv":["2102.08703"]},"editor":[{"full_name":"Parter, Merav","first_name":"Merav","last_name":"Parter"}],"status":"public","article_processing_charge":"No","date_created":"2022-07-31T22:01:49Z","date_published":"2022-06-25T00:00:00Z","title":"Local mending","date_updated":"2025-04-14T07:50:55Z","_id":"11707","month":"06","ec_funded":1,"user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","type":"conference","series_title":"LNCS","main_file_link":[{"url":"https://arxiv.org/abs/2102.08703","open_access":"1"}],"oa_version":"Preprint","day":"25","doi":"10.1007/978-3-031-09993-9_1","scopus_import":"1","volume":13298,"project":[{"name":"Coordination in constrained and natural distributed systems","grant_number":"840605","_id":"26A5D39A-B435-11E9-9278-68D0E5697425","call_identifier":"H2020"}],"conference":{"location":"Paderborn, Germany","name":"SIROCCO: Structural Information and Communication Complexity","end_date":"2022-06-29","start_date":"2022-06-27"},"publication_status":"published","arxiv":1,"department":[{"_id":"DaAl"}],"intvolume":"     13298","oa":1,"citation":{"chicago":"Balliu, Alkida, Juho Hirvonen, Darya Melnyk, Dennis Olivetti, Joel Rybicki, and Jukka Suomela. “Local Mending.” In <i>International Colloquium on Structural Information and Communication Complexity</i>, edited by Merav Parter, 13298:1–20. LNCS. Springer Nature, 2022. <a href=\"https://doi.org/10.1007/978-3-031-09993-9_1\">https://doi.org/10.1007/978-3-031-09993-9_1</a>.","ama":"Balliu A, Hirvonen J, Melnyk D, Olivetti D, Rybicki J, Suomela J. Local mending. In: Parter M, ed. <i>International Colloquium on Structural Information and Communication Complexity</i>. Vol 13298. LNCS. Springer Nature; 2022:1-20. doi:<a href=\"https://doi.org/10.1007/978-3-031-09993-9_1\">10.1007/978-3-031-09993-9_1</a>","short":"A. Balliu, J. Hirvonen, D. Melnyk, D. Olivetti, J. Rybicki, J. Suomela, in:, M. Parter (Ed.), International Colloquium on Structural Information and Communication Complexity, Springer Nature, 2022, pp. 1–20.","mla":"Balliu, Alkida, et al. “Local Mending.” <i>International Colloquium on Structural Information and Communication Complexity</i>, edited by Merav Parter, vol. 13298, Springer Nature, 2022, pp. 1–20, doi:<a href=\"https://doi.org/10.1007/978-3-031-09993-9_1\">10.1007/978-3-031-09993-9_1</a>.","apa":"Balliu, A., Hirvonen, J., Melnyk, D., Olivetti, D., Rybicki, J., &#38; Suomela, J. (2022). Local mending. In M. Parter (Ed.), <i>International Colloquium on Structural Information and Communication Complexity</i> (Vol. 13298, pp. 1–20). Paderborn, Germany: Springer Nature. <a href=\"https://doi.org/10.1007/978-3-031-09993-9_1\">https://doi.org/10.1007/978-3-031-09993-9_1</a>","ista":"Balliu A, Hirvonen J, Melnyk D, Olivetti D, Rybicki J, Suomela J. 2022. Local mending. International Colloquium on Structural Information and Communication Complexity. SIROCCO: Structural Information and Communication ComplexityLNCS vol. 13298, 1–20.","ieee":"A. Balliu, J. Hirvonen, D. Melnyk, D. Olivetti, J. Rybicki, and J. Suomela, “Local mending,” in <i>International Colloquium on Structural Information and Communication Complexity</i>, Paderborn, Germany, 2022, vol. 13298, pp. 1–20."},"publisher":"Springer Nature"},{"has_accepted_license":"1","scopus_import":"1","project":[{"name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223","_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020"}],"conference":{"name":"PODC: Symposium on Principles of Distributed Computing","location":"Salerno, Italy","start_date":"2022-07-25","end_date":"2022-07-29"},"day":"21","oa_version":"Published Version","doi":"10.1145/3519270.3538435","oa":1,"citation":{"chicago":"Alistarh, Dan-Adrian, Joel Rybicki, and Sasha Voitovych. “Near-Optimal Leader Election in Population Protocols on Graphs.” In <i>Proceedings of the Annual ACM Symposium on Principles of Distributed Computing</i>, 246–56. Association for Computing Machinery, 2022. <a href=\"https://doi.org/10.1145/3519270.3538435\">https://doi.org/10.1145/3519270.3538435</a>.","short":"D.-A. Alistarh, J. Rybicki, S. Voitovych, in:, Proceedings of the Annual ACM Symposium on Principles of Distributed Computing, Association for Computing Machinery, 2022, pp. 246–256.","ama":"Alistarh D-A, Rybicki J, Voitovych S. Near-optimal leader election in population protocols on graphs. In: <i>Proceedings of the Annual ACM Symposium on Principles of Distributed Computing</i>. Association for Computing Machinery; 2022:246-256. doi:<a href=\"https://doi.org/10.1145/3519270.3538435\">10.1145/3519270.3538435</a>","mla":"Alistarh, Dan-Adrian, et al. “Near-Optimal Leader Election in Population Protocols on Graphs.” <i>Proceedings of the Annual ACM Symposium on Principles of Distributed Computing</i>, Association for Computing Machinery, 2022, pp. 246–56, doi:<a href=\"https://doi.org/10.1145/3519270.3538435\">10.1145/3519270.3538435</a>.","apa":"Alistarh, D.-A., Rybicki, J., &#38; Voitovych, S. (2022). Near-optimal leader election in population protocols on graphs. In <i>Proceedings of the Annual ACM Symposium on Principles of Distributed Computing</i> (pp. 246–256). Salerno, Italy: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3519270.3538435\">https://doi.org/10.1145/3519270.3538435</a>","ista":"Alistarh D-A, Rybicki J, Voitovych S. 2022. Near-optimal leader election in population protocols on graphs. Proceedings of the Annual ACM Symposium on Principles of Distributed Computing. PODC: Symposium on Principles of Distributed Computing, 246–256.","ieee":"D.-A. Alistarh, J. Rybicki, and S. Voitovych, “Near-optimal leader election in population protocols on graphs,” in <i>Proceedings of the Annual ACM Symposium on Principles of Distributed Computing</i>, Salerno, Italy, 2022, pp. 246–256."},"publisher":"Association for Computing Machinery","publication_status":"published","arxiv":1,"department":[{"_id":"DaAl"}],"date_updated":"2025-12-30T09:04:17Z","_id":"11844","month":"07","title":"Near-optimal leader election in population protocols on graphs","ddc":["000"],"corr_author":"1","user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"type":"conference","ec_funded":1,"abstract":[{"lang":"eng","text":"In the stochastic population protocol model, we are given a connected graph with n nodes, and in every time step, a scheduler samples an edge of the graph uniformly at random and the nodes connected by this edge interact. A fundamental task in this model is stable leader election, in which all nodes start in an identical state and the aim is to reach a configuration in which (1) exactly one node is elected as leader and (2) this node remains as the unique leader no matter what sequence of interactions follows. On cliques, the complexity of this problem has recently been settled: time-optimal protocols stabilize in Θ(n log n) expected steps using Θ(log log n) states, whereas protocols that use O(1) states require Θ(n2) expected steps.\r\n\r\nIn this work, we investigate the complexity of stable leader election on general graphs. We provide the first non-trivial time lower bounds for leader election on general graphs, showing that, when moving beyond cliques, the complexity landscape of leader election becomes very diverse: the time required to elect a leader can range from O(1) to Θ(n3) expected steps. On the upper bound side, we first observe that there exists a protocol that is time-optimal on many graph families, but uses polynomially-many states. In contrast, we give a near-time-optimal protocol that uses only O(log2n) states that is at most a factor log n slower. Finally, we show that the constant-state protocol of Beauquier et al. [OPODIS 2013] is at most a factor n log n slower than the fast polynomial-state protocol. Moreover, among constant-state protocols, this protocol has near-optimal average case complexity on dense random graphs."}],"page":"246-256","file":[{"content_type":"application/pdf","success":1,"creator":"cchlebak","access_level":"open_access","file_id":"11854","date_updated":"2022-08-16T08:05:15Z","file_size":1593474,"relation":"main_file","date_created":"2022-08-16T08:05:15Z","file_name":"2022_PODC_Alistarh.pdf","checksum":"4c6b29172b8e355b4fbc364a2e0827b2"}],"status":"public","article_processing_charge":"Yes (via OA deal)","date_created":"2022-08-14T22:01:46Z","date_published":"2022-07-21T00:00:00Z","external_id":{"arxiv":["2205.12597"],"isi":["001031439100030"]},"related_material":{"record":[{"relation":"later_version","id":"19969","status":"public"}]},"isi":1,"year":"2022","language":[{"iso":"eng"}],"publication":"Proceedings of the Annual ACM Symposium on Principles of Distributed Computing","publication_identifier":{"isbn":["9781450392624"]},"file_date_updated":"2022-08-16T08:05:15Z","acknowledgement":"We thank the anonymous reviewers for their helpful comments. We gratefully acknowledge funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).","author":[{"last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X"},{"full_name":"Rybicki, Joel","orcid":"0000-0002-6432-6646","id":"334EFD2E-F248-11E8-B48F-1D18A9856A87","first_name":"Joel","last_name":"Rybicki"},{"last_name":"Voitovych","full_name":"Voitovych, Sasha","first_name":"Sasha"}],"quality_controlled":"1"},{"author":[{"last_name":"Pacut","full_name":"Pacut, Maciej","first_name":"Maciej"},{"last_name":"Parham","full_name":"Parham, Mahmoud","first_name":"Mahmoud"},{"first_name":"Joel","id":"334EFD2E-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-6432-6646","full_name":"Rybicki, Joel","last_name":"Rybicki"},{"full_name":"Schmid, Stefan","first_name":"Stefan","last_name":"Schmid"},{"last_name":"Suomela","first_name":"Jukka","full_name":"Suomela, Jukka"},{"last_name":"Tereshchenko","full_name":"Tereshchenko, Aleksandr","first_name":"Aleksandr"}],"article_number":"52","quality_controlled":"1","publication":"36th International Symposium on Distributed Computing","year":"2022","language":[{"iso":"eng"}],"publication_identifier":{"eisbn":["9783959772556"],"eissn":["1868-8969"]},"acknowledgement":"This research has received funding from the German Research Foundation (DFG), grant\r\n470029389 (FlexNets), 2021-2024, and the Marie Skłodowska-Curie grant agreement No. 840605.","file_date_updated":"2023-01-27T06:58:02Z","status":"public","date_published":"2022-10-17T00:00:00Z","date_created":"2023-01-13T11:06:28Z","article_processing_charge":"No","abstract":[{"text":"Online algorithms make decisions based on past inputs, with the goal of being competitive against an algorithm that sees also future inputs. In this work, we introduce time-local online algorithms; these are online algorithms in which the output at any given time is a function of only T latest inputs. Our main observation is that time-local online algorithms are closely connected to local distributed graph algorithms: distributed algorithms make decisions based on the local information in the spatial dimension, while time-local online algorithms make decisions based on the local information in the temporal dimension. We formalize this connection, and show how we can directly use the tools developed to study distributed approximability of graph optimization problems to prove upper and lower bounds on the competitive ratio achieved with time-local online algorithms. Moreover, we show how to use computational techniques to synthesize optimal time-local algorithms.","lang":"eng"}],"file":[{"file_size":524804,"date_updated":"2023-01-27T06:58:02Z","file_id":"12409","creator":"dernst","access_level":"open_access","success":1,"content_type":"application/pdf","file_name":"2022_LIPICs_Pacut.pdf","checksum":"11bbb56f68a00f2cf6bcce6cc7f5c5f9","date_created":"2023-01-27T06:58:02Z","relation":"main_file"}],"tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","type":"conference","ec_funded":1,"_id":"12182","date_updated":"2025-04-14T07:50:55Z","month":"10","ddc":["000"],"title":"Brief announcement: Temporal locality in online algorithms","citation":{"ama":"Pacut M, Parham M, Rybicki J, Schmid S, Suomela J, Tereshchenko A. Brief announcement: Temporal locality in online algorithms. In: <i>36th International Symposium on Distributed Computing</i>. Vol 246. Schloss Dagstuhl - Leibniz-Zentrum für Informatik; 2022. doi:<a href=\"https://doi.org/10.4230/LIPIcs.DISC.2022.52\">10.4230/LIPIcs.DISC.2022.52</a>","short":"M. Pacut, M. Parham, J. Rybicki, S. Schmid, J. Suomela, A. Tereshchenko, in:, 36th International Symposium on Distributed Computing, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022.","chicago":"Pacut, Maciej, Mahmoud Parham, Joel Rybicki, Stefan Schmid, Jukka Suomela, and Aleksandr Tereshchenko. “Brief Announcement: Temporal Locality in Online Algorithms.” In <i>36th International Symposium on Distributed Computing</i>, Vol. 246. Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022. <a href=\"https://doi.org/10.4230/LIPIcs.DISC.2022.52\">https://doi.org/10.4230/LIPIcs.DISC.2022.52</a>.","mla":"Pacut, Maciej, et al. “Brief Announcement: Temporal Locality in Online Algorithms.” <i>36th International Symposium on Distributed Computing</i>, vol. 246, 52, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022, doi:<a href=\"https://doi.org/10.4230/LIPIcs.DISC.2022.52\">10.4230/LIPIcs.DISC.2022.52</a>.","ieee":"M. Pacut, M. Parham, J. Rybicki, S. Schmid, J. Suomela, and A. Tereshchenko, “Brief announcement: Temporal locality in online algorithms,” in <i>36th International Symposium on Distributed Computing</i>, Augusta, GA, United States, 2022, vol. 246.","apa":"Pacut, M., Parham, M., Rybicki, J., Schmid, S., Suomela, J., &#38; Tereshchenko, A. (2022). Brief announcement: Temporal locality in online algorithms. In <i>36th International Symposium on Distributed Computing</i> (Vol. 246). Augusta, GA, United States: Schloss Dagstuhl - Leibniz-Zentrum für Informatik. <a href=\"https://doi.org/10.4230/LIPIcs.DISC.2022.52\">https://doi.org/10.4230/LIPIcs.DISC.2022.52</a>","ista":"Pacut M, Parham M, Rybicki J, Schmid S, Suomela J, Tereshchenko A. 2022. Brief announcement: Temporal locality in online algorithms. 36th International Symposium on Distributed Computing. DISC: Symposium on Distributed Computing vol. 246, 52."},"oa":1,"publisher":"Schloss Dagstuhl - Leibniz-Zentrum für Informatik","department":[{"_id":"DaAl"}],"publication_status":"published","intvolume":"       246","scopus_import":"1","has_accepted_license":"1","volume":246,"conference":{"name":"DISC: Symposium on Distributed Computing","location":"Augusta, GA, United States","end_date":"2022-10-27","start_date":"2022-10-25"},"project":[{"name":"Coordination in constrained and natural distributed systems","grant_number":"840605","call_identifier":"H2020","_id":"26A5D39A-B435-11E9-9278-68D0E5697425"}],"doi":"10.4230/LIPIcs.DISC.2022.52","oa_version":"Published Version","day":"17"},{"abstract":[{"lang":"eng","text":"The source code for replicating experiments presented in the paper.\r\n\r\nThe implementation of the designed priority schedulers can be found in Galois-2.2.1/include/Galois/WorkList/:\r\nStealingMultiQueue.h is the StealingMultiQueue.\r\nMQOptimized/ contains MQ Optimized variants.\r\n\r\nWe provide images that contain all the dependencies and datasets. Images can be pulled from npostnikova/mq-based-schedulers repository, or downloaded from Zenodo. See readme for more detail."}],"doi":"10.5281/ZENODO.5733408","main_file_link":[{"url":"https://doi.org/10.5281/zenodo.5813846","open_access":"1"}],"oa_version":"Published Version","day":"03","date_published":"2022-01-03T00:00:00Z","date_created":"2023-05-23T17:05:40Z","article_processing_charge":"No","publisher":"Zenodo","citation":{"ama":"Postnikova A, Koval N, Nadiradze G, Alistarh D-A. Multi-queues can be state-of-the-art priority schedulers. 2022. doi:<a href=\"https://doi.org/10.5281/ZENODO.5733408\">10.5281/ZENODO.5733408</a>","short":"A. Postnikova, N. Koval, G. Nadiradze, D.-A. Alistarh, (2022).","chicago":"Postnikova, Anastasiia, Nikita Koval, Giorgi Nadiradze, and Dan-Adrian Alistarh. “Multi-Queues Can Be State-of-the-Art Priority Schedulers.” Zenodo, 2022. <a href=\"https://doi.org/10.5281/ZENODO.5733408\">https://doi.org/10.5281/ZENODO.5733408</a>.","ieee":"A. Postnikova, N. Koval, G. Nadiradze, and D.-A. Alistarh, “Multi-queues can be state-of-the-art priority schedulers.” Zenodo, 2022.","apa":"Postnikova, A., Koval, N., Nadiradze, G., &#38; Alistarh, D.-A. (2022). Multi-queues can be state-of-the-art priority schedulers. Zenodo. <a href=\"https://doi.org/10.5281/ZENODO.5733408\">https://doi.org/10.5281/ZENODO.5733408</a>","ista":"Postnikova A, Koval N, Nadiradze G, Alistarh D-A. 2022. Multi-queues can be state-of-the-art priority schedulers, Zenodo, <a href=\"https://doi.org/10.5281/ZENODO.5733408\">10.5281/ZENODO.5733408</a>.","mla":"Postnikova, Anastasiia, et al. <i>Multi-Queues Can Be State-of-the-Art Priority Schedulers</i>. Zenodo, 2022, doi:<a href=\"https://doi.org/10.5281/ZENODO.5733408\">10.5281/ZENODO.5733408</a>."},"status":"public","oa":1,"related_material":{"record":[{"relation":"used_in_publication","status":"public","id":"11180"}],"link":[{"relation":"software","url":"https://github.com/npostnikova/mq-based-schedulers/tree/v1.1"}]},"department":[{"_id":"DaAl"}],"month":"01","_id":"13076","year":"2022","date_updated":"2025-04-14T13:51:59Z","corr_author":"1","ddc":["510"],"title":"Multi-queues can be state-of-the-art priority schedulers","type":"research_data_reference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","author":[{"full_name":"Postnikova, Anastasiia","first_name":"Anastasiia","last_name":"Postnikova"},{"first_name":"Nikita","id":"2F4DB10C-F248-11E8-B48F-1D18A9856A87","full_name":"Koval, Nikita","last_name":"Koval"},{"last_name":"Nadiradze","orcid":"0000-0001-5634-0731","full_name":"Nadiradze, Giorgi","first_name":"Giorgi","id":"3279A00C-F248-11E8-B48F-1D18A9856A87"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh"}]},{"related_material":{"record":[{"id":"13076","status":"public","relation":"research_data"}]},"external_id":{"isi":["000883318200025"],"arxiv":["2109.00657"]},"date_published":"2022-04-02T00:00:00Z","date_created":"2022-04-17T22:01:46Z","article_processing_charge":"No","status":"public","page":"353-367","abstract":[{"lang":"eng","text":"Designing and implementing efficient parallel priority schedulers is an active research area. An intriguing proposed design is the Multi-Queue: given n threads and m ≥ n distinct priority queues, task insertions are performed uniformly at random, while, to delete, a thread picks two queues uniformly at random, and removes the observed task of higher priority. This approach scales well, and has probabilistic rank guarantees: roughly, the rank of each task removed, relative to remaining tasks in all other queues, is O (m) in expectation. Yet, the performance of this pattern is below that of well-engineered schedulers, which eschew theoretical guarantees for practical efficiency.\r\n\r\nWe investigate whether it is possible to design and implement a Multi-Queue-based task scheduler that is both highly-efficient and has analytical guarantees. We propose a new variant called the Stealing Multi-Queue (SMQ), a cache-efficient variant of the Multi-Queue, which leverages both queue affinity---each thread has a local queue, from which tasks are usually removed; but, with some probability, threads also attempt to steal higher-priority tasks from the other queues---and task batching, that is, the processing of several tasks in a single insert / remove step. These ideas are well-known for task scheduling without priorities; our theoretical contribution is showing that, despite relaxations, this design can still provide rank guarantees, which in turn implies bounds on total work performed. We provide a general SMQ implementation which can surpass state-of-the-art schedulers such as OBIM and PMOD in terms of performance on popular graph-processing benchmarks. Notably, the performance improvement comes mainly from the superior rank guarantees provided by our scheduler, confirming that analytically-reasoned approaches can still provide performance improvements for priority task scheduling."}],"quality_controlled":"1","author":[{"first_name":"Anastasiia","full_name":"Postnikova, Anastasiia","last_name":"Postnikova"},{"first_name":"Nikita","id":"2F4DB10C-F248-11E8-B48F-1D18A9856A87","full_name":"Koval, Nikita","last_name":"Koval"},{"last_name":"Nadiradze","id":"3279A00C-F248-11E8-B48F-1D18A9856A87","first_name":"Giorgi","full_name":"Nadiradze, Giorgi","orcid":"0000-0001-5634-0731"},{"full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh"}],"acknowledgement":"We would like to thank the anonymous reviewers for their useful comments. This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).","publication_identifier":{"isbn":["9781450392044"]},"language":[{"iso":"eng"}],"year":"2022","publication":"Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","isi":1,"department":[{"_id":"DaAl"}],"arxiv":1,"publication_status":"published","publisher":"Association for Computing Machinery","citation":{"chicago":"Postnikova, Anastasiia, Nikita Koval, Giorgi Nadiradze, and Dan-Adrian Alistarh. “Multi-Queues Can Be State-of-the-Art Priority Schedulers.” In <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>, 353–67. Association for Computing Machinery, 2022. <a href=\"https://doi.org/10.1145/3503221.3508432\">https://doi.org/10.1145/3503221.3508432</a>.","short":"A. Postnikova, N. Koval, G. Nadiradze, D.-A. Alistarh, in:, Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, Association for Computing Machinery, 2022, pp. 353–367.","ama":"Postnikova A, Koval N, Nadiradze G, Alistarh D-A. Multi-queues can be state-of-the-art priority schedulers. In: <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>. Association for Computing Machinery; 2022:353-367. doi:<a href=\"https://doi.org/10.1145/3503221.3508432\">10.1145/3503221.3508432</a>","ieee":"A. Postnikova, N. Koval, G. Nadiradze, and D.-A. Alistarh, “Multi-queues can be state-of-the-art priority schedulers,” in <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>, Seoul, Republic of Korea, 2022, pp. 353–367.","ista":"Postnikova A, Koval N, Nadiradze G, Alistarh D-A. 2022. Multi-queues can be state-of-the-art priority schedulers. Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming. PPoPP: Sympopsium on Principles and Practice of Parallel Programming, 353–367.","apa":"Postnikova, A., Koval, N., Nadiradze, G., &#38; Alistarh, D.-A. (2022). Multi-queues can be state-of-the-art priority schedulers. In <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i> (pp. 353–367). Seoul, Republic of Korea: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3503221.3508432\">https://doi.org/10.1145/3503221.3508432</a>","mla":"Postnikova, Anastasiia, et al. “Multi-Queues Can Be State-of-the-Art Priority Schedulers.” <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>, Association for Computing Machinery, 2022, pp. 353–67, doi:<a href=\"https://doi.org/10.1145/3503221.3508432\">10.1145/3503221.3508432</a>."},"oa":1,"doi":"10.1145/3503221.3508432","oa_version":"Preprint","day":"02","main_file_link":[{"open_access":"1","url":" https://doi.org/10.48550/arXiv.2109.00657"}],"project":[{"call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223"}],"conference":{"name":"PPoPP: Sympopsium on Principles and Practice of Parallel Programming","location":"Seoul, Republic of Korea","end_date":"2022-04-06","start_date":"2022-04-02"},"scopus_import":"1","ec_funded":1,"type":"conference","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","corr_author":"1","title":"Multi-queues can be state-of-the-art priority schedulers","month":"04","_id":"11180","date_updated":"2025-04-14T07:49:13Z"},{"conference":{"end_date":"2022-04-06","start_date":"2022-04-02","name":"PPoPP: Sympopsium on Principles and Practice of Parallel Programming","location":"Seoul, Republic of Korea"},"has_accepted_license":"1","scopus_import":"1","doi":"10.1145/3503221.3508410","oa_version":"Published Version","day":"02","publisher":"Association for Computing Machinery","citation":{"chicago":"Brown, Trevor A, William Sigouin, and Dan-Adrian Alistarh. “PathCAS: An Efficient Middle Ground for Concurrent Search Data Structures.” In <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>, 385–99. Association for Computing Machinery, 2022. <a href=\"https://doi.org/10.1145/3503221.3508410\">https://doi.org/10.1145/3503221.3508410</a>.","short":"T.A. Brown, W. Sigouin, D.-A. Alistarh, in:, Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, Association for Computing Machinery, 2022, pp. 385–399.","ama":"Brown TA, Sigouin W, Alistarh D-A. PathCAS: An efficient middle ground for concurrent search data structures. In: <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>. Association for Computing Machinery; 2022:385-399. doi:<a href=\"https://doi.org/10.1145/3503221.3508410\">10.1145/3503221.3508410</a>","mla":"Brown, Trevor A., et al. “PathCAS: An Efficient Middle Ground for Concurrent Search Data Structures.” <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>, Association for Computing Machinery, 2022, pp. 385–99, doi:<a href=\"https://doi.org/10.1145/3503221.3508410\">10.1145/3503221.3508410</a>.","apa":"Brown, T. A., Sigouin, W., &#38; Alistarh, D.-A. (2022). PathCAS: An efficient middle ground for concurrent search data structures. In <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i> (pp. 385–399). Seoul, Republic of Korea: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3503221.3508410\">https://doi.org/10.1145/3503221.3508410</a>","ieee":"T. A. Brown, W. Sigouin, and D.-A. Alistarh, “PathCAS: An efficient middle ground for concurrent search data structures,” in <i>Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming</i>, Seoul, Republic of Korea, 2022, pp. 385–399.","ista":"Brown TA, Sigouin W, Alistarh D-A. 2022. PathCAS: An efficient middle ground for concurrent search data structures. Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming. PPoPP: Sympopsium on Principles and Practice of Parallel Programming, 385–399."},"oa":1,"department":[{"_id":"DaAl"}],"publication_status":"published","month":"04","_id":"11181","date_updated":"2024-10-09T21:02:23Z","corr_author":"1","title":"PathCAS: An efficient middle ground for concurrent search data structures","ddc":["000"],"type":"conference","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"file":[{"success":1,"creator":"dernst","access_level":"open_access","content_type":"application/pdf","date_updated":"2022-08-05T09:19:29Z","file_size":1128343,"file_id":"11731","relation":"main_file","file_name":"2022_PPoPP_Brown.pdf","checksum":"8ceea411fa133795cd4903529498eb6b","date_created":"2022-08-05T09:19:29Z"}],"page":"385-399","abstract":[{"lang":"eng","text":"To maximize the performance of concurrent data structures, researchers have often turned to highly complex fine-grained techniques, resulting in efficient and elegant algorithms, which can however be often difficult to understand and prove correct. While simpler techniques exist, such as transactional memory, they can have limited performance or portability relative to their fine-grained counterparts. Approaches at both ends of this complexity-performance spectrum have been extensively explored, but relatively less is known about the middle ground: approaches that are willing to sacrifice some performance for simplicity, while remaining competitive with state-of-the-art handcrafted designs. In this paper, we explore this middle ground, and present PathCAS, a primitive that combines ideas from multi-word CAS (KCAS) and transactional memory approaches, while carefully avoiding overhead. We show how PathCAS can be used to implement efficient search data structures relatively simply, using an internal binary search tree as an example, then extending this to an AVL tree. Our best implementations outperform many handcrafted search trees: in search-heavy workloads, it rivals the BCCO tree [5], the fastest known concurrent binary tree in terms of search performance [3]. Our results suggest that PathCAS can yield concurrent data structures that are relatively easy to build and prove correct, while offering surprisingly high performance."}],"date_published":"2022-04-02T00:00:00Z","date_created":"2022-04-17T22:01:46Z","article_processing_charge":"No","status":"public","external_id":{"isi":["000883318200027"]},"publication_identifier":{"isbn":["9781450392044"]},"year":"2022","isi":1,"language":[{"iso":"eng"}],"publication":"Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","acknowledgement":"This work was supported by: the Natural Sciences and Engineering Research Council of Canada (NSERC) Collaborative Research and Development grant: CRDPJ 539431-19, the\r\nCanada Foundation for Innovation John R. Evans Leaders Fund with equal support from the Ontario Research Fund CFI Leaders Opportunity Fund: 38512, Waterloo Huawei Joint Innovation Lab project “Scalable Infrastructure for Next Generation Data Management Systems”, NSERC Discovery Launch Supplement: DGECR-2019-00048, NSERC Discovery\r\nProgram under the grants: RGPIN-2019-04227 and RGPIN04512-2018, and the University of Waterloo. We would also like to thank the reviewers for their insightful comments.","file_date_updated":"2022-08-05T09:19:29Z","quality_controlled":"1","author":[{"last_name":"Brown","full_name":"Brown, Trevor A","first_name":"Trevor A","id":"3569F0A0-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Sigouin","first_name":"William","full_name":"Sigouin, William"},{"last_name":"Alistarh","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"}]},{"quality_controlled":"1","article_number":"15","author":[{"first_name":"Amir","full_name":"Nikabadi, Amir","last_name":"Nikabadi"},{"last_name":"Korhonen","full_name":"Korhonen, Janne","first_name":"Janne","id":"C5402D42-15BC-11E9-A202-CA2BE6697425"}],"acknowledgement":"Amir Nikabadi: Supported by the LABEX MILYON (ANR-10-LABX-0070) of Université de Lyon, within the program “Investissements d’Avenir” (ANR-11-IDEX-0007) operated by the French National Research Agency (ANR). Janne H. Korhonen: Supported by the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).\r\nWe thank François Le Gall and Masayuki Miyamoto for sharing their work on lower bounds for induced subgraph detection [36].","file_date_updated":"2022-05-02T07:53:00Z","publication_identifier":{"isbn":["9783959772198"],"issn":["1868-8969"]},"publication":"25th International Conference on Principles of Distributed Systems","year":"2022","language":[{"iso":"eng"}],"editor":[{"full_name":"Bramas, Quentin","first_name":"Quentin","last_name":"Bramas"},{"full_name":"Gramoli, Vincent","first_name":"Vincent","last_name":"Gramoli"},{"last_name":"Milani","full_name":"Milani, Alessia","first_name":"Alessia"}],"article_processing_charge":"No","date_published":"2022-02-01T00:00:00Z","date_created":"2022-04-17T22:01:47Z","status":"public","file":[{"file_id":"11345","file_size":790396,"date_updated":"2022-05-02T07:53:00Z","content_type":"application/pdf","creator":"dernst","access_level":"open_access","success":1,"date_created":"2022-05-02T07:53:00Z","file_name":"2022_LIPICs_Nikabadi.pdf","checksum":"626551c14de5d4091573200ed0535752","relation":"main_file"}],"abstract":[{"lang":"eng","text":"Subgraph detection has recently been one of the most studied problems in the CONGEST model of distributed computing. In this work, we study the distributed complexity of problems closely related to subgraph detection, mainly focusing on induced subgraph detection. The main line of this work presents lower bounds and parameterized algorithms w.r.t structural parameters of the input graph:\r\n- On general graphs, we give unconditional lower bounds for induced detection of cycles and patterns of treewidth 2 in CONGEST. Moreover, by adapting reductions from centralized parameterized complexity, we prove lower bounds in CONGEST for detecting patterns with a 4-clique, and for induced path detection conditional on the hardness of triangle detection in the congested clique.\r\n- On graphs of bounded degeneracy, we show that induced paths can be detected fast in CONGEST using techniques from parameterized algorithms, while detecting cycles and patterns of treewidth 2 is hard.\r\n- On graphs of bounded vertex cover number, we show that induced subgraph detection is easy in CONGEST for any pattern graph. More specifically, we adapt a centralized parameterized algorithm for a more general maximum common induced subgraph detection problem to the distributed setting. In addition to these induced subgraph detection results, we study various related problems in the CONGEST and congested clique models, including for multicolored versions of subgraph-detection-like problems."}],"ec_funded":1,"type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"alternative_title":["LIPIcs"],"corr_author":"1","title":"Beyond distributed subgraph detection: Induced subgraphs, multicolored problems and graph parameters","ddc":["510"],"month":"02","date_updated":"2025-04-14T07:49:13Z","_id":"11183","intvolume":"       217","publication_status":"published","department":[{"_id":"DaAl"}],"publisher":"Schloss Dagstuhl - Leibniz-Zentrum für Informatik","oa":1,"citation":{"ista":"Nikabadi A, Korhonen J. 2022. Beyond distributed subgraph detection: Induced subgraphs, multicolored problems and graph parameters. 25th International Conference on Principles of Distributed Systems. OPODIS, LIPIcs, vol. 217, 15.","apa":"Nikabadi, A., &#38; Korhonen, J. (2022). Beyond distributed subgraph detection: Induced subgraphs, multicolored problems and graph parameters. In Q. Bramas, V. Gramoli, &#38; A. Milani (Eds.), <i>25th International Conference on Principles of Distributed Systems</i> (Vol. 217). Strasbourg, France: Schloss Dagstuhl - Leibniz-Zentrum für Informatik. <a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.15\">https://doi.org/10.4230/LIPIcs.OPODIS.2021.15</a>","ieee":"A. Nikabadi and J. Korhonen, “Beyond distributed subgraph detection: Induced subgraphs, multicolored problems and graph parameters,” in <i>25th International Conference on Principles of Distributed Systems</i>, Strasbourg, France, 2022, vol. 217.","mla":"Nikabadi, Amir, and Janne Korhonen. “Beyond Distributed Subgraph Detection: Induced Subgraphs, Multicolored Problems and Graph Parameters.” <i>25th International Conference on Principles of Distributed Systems</i>, edited by Quentin Bramas et al., vol. 217, 15, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022, doi:<a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.15\">10.4230/LIPIcs.OPODIS.2021.15</a>.","chicago":"Nikabadi, Amir, and Janne Korhonen. “Beyond Distributed Subgraph Detection: Induced Subgraphs, Multicolored Problems and Graph Parameters.” In <i>25th International Conference on Principles of Distributed Systems</i>, edited by Quentin Bramas, Vincent Gramoli, and Alessia Milani, Vol. 217. Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022. <a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.15\">https://doi.org/10.4230/LIPIcs.OPODIS.2021.15</a>.","short":"A. Nikabadi, J. Korhonen, in:, Q. Bramas, V. Gramoli, A. Milani (Eds.), 25th International Conference on Principles of Distributed Systems, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022.","ama":"Nikabadi A, Korhonen J. Beyond distributed subgraph detection: Induced subgraphs, multicolored problems and graph parameters. In: Bramas Q, Gramoli V, Milani A, eds. <i>25th International Conference on Principles of Distributed Systems</i>. Vol 217. Schloss Dagstuhl - Leibniz-Zentrum für Informatik; 2022. doi:<a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.15\">10.4230/LIPIcs.OPODIS.2021.15</a>"},"day":"01","oa_version":"Published Version","doi":"10.4230/LIPIcs.OPODIS.2021.15","project":[{"call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning"}],"conference":{"name":"OPODIS","location":"Strasbourg, France","start_date":"2021-12-13","end_date":"2021-12-15"},"volume":217,"has_accepted_license":"1","scopus_import":"1"},{"corr_author":"1","alternative_title":["LIPIcs"],"ddc":["510"],"title":"Fast graphical population protocols","month":"02","_id":"11184","date_updated":"2025-04-14T07:49:13Z","ec_funded":1,"type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"doi":"10.4230/LIPIcs.OPODIS.2021.14","oa_version":"Published Version","day":"01","project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning"},{"call_identifier":"H2020","_id":"26A5D39A-B435-11E9-9278-68D0E5697425","grant_number":"840605","name":"Coordination in constrained and natural distributed systems"}],"conference":{"location":"Strasbourg, France","name":"OPODIS","end_date":"2021-12-15","start_date":"2021-12-13"},"has_accepted_license":"1","volume":217,"scopus_import":"1","intvolume":"       217","department":[{"_id":"DaAl"}],"publication_status":"published","arxiv":1,"publisher":"Schloss Dagstuhl - Leibniz-Zentrum für Informatik","citation":{"chicago":"Alistarh, Dan-Adrian, Rati Gelashvili, and Joel Rybicki. “Fast Graphical Population Protocols.” In <i>25th International Conference on Principles of Distributed Systems</i>, edited by Quentin Bramas, Vincent Gramoli, and Alessia Milani, Vol. 217. Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022. <a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.14\">https://doi.org/10.4230/LIPIcs.OPODIS.2021.14</a>.","ama":"Alistarh D-A, Gelashvili R, Rybicki J. Fast graphical population protocols. In: Bramas Q, Gramoli V, Milani A, eds. <i>25th International Conference on Principles of Distributed Systems</i>. Vol 217. Schloss Dagstuhl - Leibniz-Zentrum für Informatik; 2022. doi:<a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.14\">10.4230/LIPIcs.OPODIS.2021.14</a>","short":"D.-A. Alistarh, R. Gelashvili, J. Rybicki, in:, Q. Bramas, V. Gramoli, A. Milani (Eds.), 25th International Conference on Principles of Distributed Systems, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022.","mla":"Alistarh, Dan-Adrian, et al. “Fast Graphical Population Protocols.” <i>25th International Conference on Principles of Distributed Systems</i>, edited by Quentin Bramas et al., vol. 217, 14, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2022, doi:<a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.14\">10.4230/LIPIcs.OPODIS.2021.14</a>.","ieee":"D.-A. Alistarh, R. Gelashvili, and J. Rybicki, “Fast graphical population protocols,” in <i>25th International Conference on Principles of Distributed Systems</i>, Strasbourg, France, 2022, vol. 217.","apa":"Alistarh, D.-A., Gelashvili, R., &#38; Rybicki, J. (2022). Fast graphical population protocols. In Q. Bramas, V. Gramoli, &#38; A. Milani (Eds.), <i>25th International Conference on Principles of Distributed Systems</i> (Vol. 217). Strasbourg, France: Schloss Dagstuhl - Leibniz-Zentrum für Informatik. <a href=\"https://doi.org/10.4230/LIPIcs.OPODIS.2021.14\">https://doi.org/10.4230/LIPIcs.OPODIS.2021.14</a>","ista":"Alistarh D-A, Gelashvili R, Rybicki J. 2022. Fast graphical population protocols. 25th International Conference on Principles of Distributed Systems. OPODIS, LIPIcs, vol. 217, 14."},"oa":1,"acknowledgement":"Dan Alistarh: This project has received funding from the European Research Council (ERC)\r\nunder the European Union’s Horizon 2020 research and innovation programme (grant agreement No.805223 ScaleML).\r\nJoel Rybicki: This project has received from the European Union’s Horizon 2020 research and\r\ninnovation programme under the Marie Skłodowska-Curie grant agreement No. 840605.\r\nAcknowledgements We grateful to Giorgi Nadiradze for pointing out a generalisation of the phase clock construction to non-regular graphs. We also thank anonymous reviewers for their useful comments on earlier versions of this manuscript.","file_date_updated":"2022-05-02T08:06:33Z","publication_identifier":{"isbn":["9783959772198"],"issn":["1868-8969"]},"language":[{"iso":"eng"}],"year":"2022","publication":"25th International Conference on Principles of Distributed Systems","quality_controlled":"1","author":[{"first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh"},{"last_name":"Gelashvili","first_name":"Rati","full_name":"Gelashvili, Rati"},{"orcid":"0000-0002-6432-6646","full_name":"Rybicki, Joel","first_name":"Joel","id":"334EFD2E-F248-11E8-B48F-1D18A9856A87","last_name":"Rybicki"}],"article_number":"14","file":[{"relation":"main_file","file_name":"2022_LIPICs_Alistarh.pdf","checksum":"2c7c982174c6f98c4ca6e92539d15086","date_created":"2022-05-02T08:06:33Z","access_level":"open_access","creator":"dernst","success":1,"content_type":"application/pdf","file_size":959406,"date_updated":"2022-05-02T08:06:33Z","file_id":"11346"}],"abstract":[{"text":"Let G be a graph on n nodes. In the stochastic population protocol model, a collection of n indistinguishable, resource-limited nodes collectively solve tasks via pairwise interactions. In each interaction, two randomly chosen neighbors first read each other’s states, and then update their local states. A rich line of research has established tight upper and lower bounds on the complexity of fundamental tasks, such as majority and leader election, in this model, when G is a clique. Specifically, in the clique, these tasks can be solved fast, i.e., in n polylog n pairwise interactions, with high probability, using at most polylog n states per node.\r\nIn this work, we consider the more general setting where G is an arbitrary regular graph, and present a technique for simulating protocols designed for fully-connected networks in any connected regular graph. Our main result is a simulation that is efficient on many interesting graph families: roughly, the simulation overhead is polylogarithmic in the number of nodes, and quadratic in the conductance of the graph. As a sample application, we show that, in any regular graph with conductance φ, both leader election and exact majority can be solved in φ^{-2} ⋅ n polylog n pairwise interactions, with high probability, using at most φ^{-2} ⋅ polylog n states per node. This shows that there are fast and space-efficient population protocols for leader election and exact majority on graphs with good expansion properties. We believe our results will prove generally useful, as they allow efficient technology transfer between the well-mixed (clique) case, and the under-explored spatial setting.","lang":"eng"}],"editor":[{"last_name":"Bramas","full_name":"Bramas, Quentin","first_name":"Quentin"},{"full_name":"Gramoli, Vincent","first_name":"Vincent","last_name":"Gramoli"},{"full_name":"Milani, Alessia","first_name":"Alessia","last_name":"Milani"}],"external_id":{"arxiv":["2102.08808"]},"date_published":"2022-02-01T00:00:00Z","date_created":"2022-04-17T22:01:47Z","article_processing_charge":"No","status":"public"},{"abstract":[{"text":"The recent focus on the efficiency of deep neural networks (DNNs) has led to significant work on model compression approaches, of which weight pruning is one of the most popular. At the same time, there is rapidly-growing computational support for efficiently executing the unstructured-sparse models obtained via pruning. Yet, most existing pruning methods minimize just the number of remaining weights, i.e. the size of the model, rather than optimizing for inference time. We address this gap by introducing SPDY, a new compression method which automatically determines layer-wise sparsity targets achieving a desired inference speedup on a given system, while minimizing accuracy loss. SPDY is the composition of two new techniques. The first is an efficient and general dynamic programming algorithm for solving constrained layer-wise compression problems, given a set of layer-wise error scores. The second technique is a local search procedure for automatically determining such scores in an accurate and robust manner. Experiments across popular vision and language models show that SPDY guarantees speedups while recovering higher accuracy relative to existing strategies, both for one-shot and gradual pruning scenarios, and is compatible with most existing pruning approaches. We also extend our approach to the recently-proposed task of pruning with very little data, where we achieve the best known accuracy recovery when pruning to the GPU-supported 2:4 sparsity pattern.","lang":"eng"}],"page":"6726-6743","file":[{"creator":"dernst","access_level":"open_access","success":1,"content_type":"application/pdf","file_size":615916,"date_updated":"2024-08-19T06:54:41Z","file_id":"17440","relation":"main_file","checksum":"5179a1e4dfc0fbfab6674907299e414a","file_name":"2022_PMLR_Frantar.pdf","date_created":"2024-08-19T06:54:41Z"}],"status":"public","article_processing_charge":"Yes","date_published":"2022-07-20T00:00:00Z","date_created":"2024-05-28T13:45:20Z","external_id":{"isi":["000922378801029"]},"year":"2022","language":[{"iso":"eng"}],"publication":"39th International Conference on Machine Learning","isi":1,"file_date_updated":"2024-08-19T06:54:41Z","acknowledgement":"We gratefully acknowledge funding from the European Research Council (ERC) under the European Union’s Horizon 2020 programme (grant agreement No 805223 ScaleML),\r\nas well as computational support from AWS EC2. We thank Eldar Kurtic for code and hyper-parameters for BERT pruning, and the Neural Magic Team, notably Michael Goin and\r\nMark Kurtz, for support with their software.","author":[{"id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f","first_name":"Elias","full_name":"Frantar, Elias","last_name":"Frantar"},{"first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","last_name":"Alistarh"}],"quality_controlled":"1","volume":162,"has_accepted_license":"1","scopus_import":"1","conference":{"start_date":"2022-07-17","end_date":"2022-07-23","name":"ICML: International Conference on Machine Learning","location":"Baltimore, MD, United States"},"project":[{"call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning"}],"day":"20","oa_version":"Published Version","oa":1,"citation":{"mla":"Frantar, Elias, and Dan-Adrian Alistarh. “SPDY: Accurate Pruning with Speedup Guarantees.” <i>39th International Conference on Machine Learning</i>, vol. 162, ML Research Press, 2022, pp. 6726–43.","ista":"Frantar E, Alistarh D-A. 2022. SPDY: Accurate pruning with speedup guarantees. 39th International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 162, 6726–6743.","ieee":"E. Frantar and D.-A. Alistarh, “SPDY: Accurate pruning with speedup guarantees,” in <i>39th International Conference on Machine Learning</i>, Baltimore, MD, United States, 2022, vol. 162, pp. 6726–6743.","apa":"Frantar, E., &#38; Alistarh, D.-A. (2022). SPDY: Accurate pruning with speedup guarantees. In <i>39th International Conference on Machine Learning</i> (Vol. 162, pp. 6726–6743). Baltimore, MD, United States: ML Research Press.","chicago":"Frantar, Elias, and Dan-Adrian Alistarh. “SPDY: Accurate Pruning with Speedup Guarantees.” In <i>39th International Conference on Machine Learning</i>, 162:6726–43. ML Research Press, 2022.","ama":"Frantar E, Alistarh D-A. SPDY: Accurate pruning with speedup guarantees. In: <i>39th International Conference on Machine Learning</i>. Vol 162. ML Research Press; 2022:6726-6743.","short":"E. Frantar, D.-A. Alistarh, in:, 39th International Conference on Machine Learning, ML Research Press, 2022, pp. 6726–6743."},"publisher":"ML Research Press","publication_status":"published","department":[{"_id":"DaAl"}],"intvolume":"       162","date_updated":"2025-04-14T07:49:14Z","_id":"17059","month":"07","ddc":["000"],"title":"SPDY: Accurate pruning with speedup guarantees","alternative_title":["PMLR"],"corr_author":"1","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"type":"conference","ec_funded":1},{"page":"4163-4181","abstract":[{"text":"In this paper, we consider the problem of sparsifying BERT models, which are a key building block for natural language processing, in order to reduce their storage and computational cost. We introduce the Optimal BERT Surgeon (oBERT), an efficient and accurate pruning method based on approximate second-order information, which we show to yield state-of-the-art results in both stages of language tasks: pre-training and fine-tuning. Specifically, oBERT extends existing work on second-order pruning by allowing for pruning weight blocks, and is the first such method that is applicable at BERT scale. Second, we investigate compounding compression approaches to obtain highly compressed but accurate models for deployment on edge devices. These models significantly push boundaries of the current state-of-the-art sparse BERT models with respect to all metrics: model size, inference speed and task accuracy. For example, relative to the dense BERT-base, we obtain 10x model size compression with < 1% accuracy drop, 10x CPU-inference speedup with < 2% accuracy drop, and 29x CPU-inference speedup with < 7.5% accuracy drop. Our code, fully integrated with Transformers and SparseML, is available at https://github.com/neuralmagic/sparseml/tree/main/research/optimal_BERT_surgeon_oBERT.","lang":"eng"}],"file":[{"content_type":"application/pdf","success":1,"creator":"dernst","access_level":"open_access","file_id":"17354","date_updated":"2024-07-31T11:03:34Z","file_size":522563,"relation":"main_file","date_created":"2024-07-31T11:03:34Z","file_name":"2022_EMNLP_Kurtic.pdf","checksum":"c47b9edd8a9f743ac77a593de6d2e84a"}],"external_id":{"arxiv":["2203.07259"]},"related_material":{"link":[{"relation":"software","url":"https://github.com/neuralmagic/sparseml/tree/main/research/optimal_BERT_surgeon_oBERT"}]},"status":"public","date_published":"2022-12-01T00:00:00Z","date_created":"2024-05-29T06:40:55Z","article_processing_charge":"Yes","file_date_updated":"2024-07-31T11:03:34Z","language":[{"iso":"eng"}],"year":"2022","publication":"Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing","author":[{"full_name":"Kurtic, Eldar","first_name":"Eldar","id":"47beb3a5-07b5-11eb-9b87-b108ec578218","last_name":"Kurtic"},{"first_name":"Daniel","full_name":"Campos, Daniel","last_name":"Campos"},{"last_name":"Nguyen","full_name":"Nguyen, Tuan","first_name":"Tuan"},{"full_name":"Frantar, Elias","first_name":"Elias","id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f","last_name":"Frantar"},{"full_name":"Kurtz, Mark","first_name":"Mark","last_name":"Kurtz"},{"last_name":"Fineran","full_name":"Fineran, Benjamin","first_name":"Benjamin"},{"first_name":"Michael","full_name":"Goin, Michael","last_name":"Goin"},{"last_name":"Alistarh","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian"}],"quality_controlled":"1","doi":"10.18653/v1/2022.emnlp-main.279","day":"01","oa_version":"Published Version","scopus_import":"1","has_accepted_license":"1","conference":{"start_date":"2022-12-07","end_date":"2022-12-11","name":"EMNLP: Conference on Empirical Methods in Natural Language Processing","location":"Abu Dhabi, United Arab Emirates"},"department":[{"_id":"DaAl"}],"arxiv":1,"publication_status":"published","citation":{"chicago":"Kurtic, Eldar, Daniel Campos, Tuan Nguyen, Elias Frantar, Mark Kurtz, Benjamin Fineran, Michael Goin, and Dan-Adrian Alistarh. “The Optimal BERT Surgeon: Scalable and Accurate Second-Order Pruning for Large Language Models.” In <i>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</i>, 4163–81. Association for Computational Linguistics, 2022. <a href=\"https://doi.org/10.18653/v1/2022.emnlp-main.279\">https://doi.org/10.18653/v1/2022.emnlp-main.279</a>.","ama":"Kurtic E, Campos D, Nguyen T, et al. The optimal BERT surgeon: Scalable and accurate second-order pruning for large language models. In: <i>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</i>. Association for Computational Linguistics; 2022:4163-4181. doi:<a href=\"https://doi.org/10.18653/v1/2022.emnlp-main.279\">10.18653/v1/2022.emnlp-main.279</a>","short":"E. Kurtic, D. Campos, T. Nguyen, E. Frantar, M. Kurtz, B. Fineran, M. Goin, D.-A. Alistarh, in:, Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, Association for Computational Linguistics, 2022, pp. 4163–4181.","apa":"Kurtic, E., Campos, D., Nguyen, T., Frantar, E., Kurtz, M., Fineran, B., … Alistarh, D.-A. (2022). The optimal BERT surgeon: Scalable and accurate second-order pruning for large language models. In <i>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</i> (pp. 4163–4181). Abu Dhabi, United Arab Emirates: Association for Computational Linguistics. <a href=\"https://doi.org/10.18653/v1/2022.emnlp-main.279\">https://doi.org/10.18653/v1/2022.emnlp-main.279</a>","ista":"Kurtic E, Campos D, Nguyen T, Frantar E, Kurtz M, Fineran B, Goin M, Alistarh D-A. 2022. The optimal BERT surgeon: Scalable and accurate second-order pruning for large language models. Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing. EMNLP: Conference on Empirical Methods in Natural Language Processing, 4163–4181.","ieee":"E. Kurtic <i>et al.</i>, “The optimal BERT surgeon: Scalable and accurate second-order pruning for large language models,” in <i>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</i>, Abu Dhabi, United Arab Emirates, 2022, pp. 4163–4181.","mla":"Kurtic, Eldar, et al. “The Optimal BERT Surgeon: Scalable and Accurate Second-Order Pruning for Large Language Models.” <i>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</i>, Association for Computational Linguistics, 2022, pp. 4163–81, doi:<a href=\"https://doi.org/10.18653/v1/2022.emnlp-main.279\">10.18653/v1/2022.emnlp-main.279</a>."},"oa":1,"publisher":"Association for Computational Linguistics","title":"The optimal BERT surgeon: Scalable and accurate second-order pruning for large language models","ddc":["000"],"corr_author":"1","_id":"17088","date_updated":"2024-07-31T11:05:32Z","month":"12","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","type":"conference"},{"title":"Dynamic averaging load balancing on cycles","ddc":["000"],"month":"04","_id":"8286","date_updated":"2025-07-10T11:55:11Z","ec_funded":1,"type":"journal_article","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"doi":"10.1007/s00453-021-00905-9","oa_version":"Published Version","day":"01","conference":{"name":"ICALP: Automata, Languages and Programming","location":"Virtual, Online; Germany","end_date":"2020-07-11","start_date":"2020-07-08"},"project":[{"call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning"},{"_id":"B67AFEDC-15C9-11EA-A837-991A96BB2854","name":"IST Austria Open Access Fund"}],"has_accepted_license":"1","volume":84,"scopus_import":"1","intvolume":"        84","department":[{"_id":"DaAl"}],"arxiv":1,"publication_status":"published","publisher":"Springer Nature","citation":{"apa":"Alistarh, D.-A., Nadiradze, G., &#38; Sabour, A. (2022). Dynamic averaging load balancing on cycles. <i>Algorithmica</i>. Virtual, Online; Germany: Springer Nature. <a href=\"https://doi.org/10.1007/s00453-021-00905-9\">https://doi.org/10.1007/s00453-021-00905-9</a>","ista":"Alistarh D-A, Nadiradze G, Sabour A. 2022. Dynamic averaging load balancing on cycles. Algorithmica. 84(4), 1007–1029.","ieee":"D.-A. Alistarh, G. Nadiradze, and A. Sabour, “Dynamic averaging load balancing on cycles,” <i>Algorithmica</i>, vol. 84, no. 4. Springer Nature, pp. 1007–1029, 2022.","mla":"Alistarh, Dan-Adrian, et al. “Dynamic Averaging Load Balancing on Cycles.” <i>Algorithmica</i>, vol. 84, no. 4, Springer Nature, 2022, pp. 1007–29, doi:<a href=\"https://doi.org/10.1007/s00453-021-00905-9\">10.1007/s00453-021-00905-9</a>.","chicago":"Alistarh, Dan-Adrian, Giorgi Nadiradze, and Amirmojtaba Sabour. “Dynamic Averaging Load Balancing on Cycles.” <i>Algorithmica</i>. Springer Nature, 2022. <a href=\"https://doi.org/10.1007/s00453-021-00905-9\">https://doi.org/10.1007/s00453-021-00905-9</a>.","ama":"Alistarh D-A, Nadiradze G, Sabour A. Dynamic averaging load balancing on cycles. <i>Algorithmica</i>. 2022;84(4):1007-1029. doi:<a href=\"https://doi.org/10.1007/s00453-021-00905-9\">10.1007/s00453-021-00905-9</a>","short":"D.-A. Alistarh, G. Nadiradze, A. Sabour, Algorithmica 84 (2022) 1007–1029."},"oa":1,"acknowledgement":"The authors sincerely thank Thomas Sauerwald and George Giakkoupis for insightful discussions, and Mohsen Ghaffari, Yuval Peres, and Udi Wieder for feedback on earlier versions of this draft. We also thank the ICALP anonymous reviewers for their very useful comments. Open access funding provided by Institute of Science and Technology (IST Austria). Funding was provided by European Research Council (Grant No. PR1042ERC01).","file_date_updated":"2021-12-27T10:36:40Z","publication_identifier":{"issn":["0178-4617"],"eissn":["1432-0541"]},"year":"2022","isi":1,"language":[{"iso":"eng"}],"publication":"Algorithmica","quality_controlled":"1","author":[{"orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh"},{"first_name":"Giorgi","id":"3279A00C-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-5634-0731","full_name":"Nadiradze, Giorgi","last_name":"Nadiradze"},{"full_name":"Sabour, Amirmojtaba","id":"bcc145fd-e77f-11ea-ae8b-80d661dbff67","first_name":"Amirmojtaba","last_name":"Sabour"}],"issue":"4","file":[{"relation":"main_file","checksum":"21169b25b0c8e17b21e12af22bff9870","file_name":"2021_Algorithmica_Alistarh.pdf","date_created":"2021-12-27T10:36:40Z","access_level":"open_access","creator":"cchlebak","success":1,"content_type":"application/pdf","file_size":525950,"date_updated":"2021-12-27T10:36:40Z","file_id":"10577"}],"page":"1007-1029","abstract":[{"text":"We consider the following dynamic load-balancing process: given an underlying graph G with n nodes, in each step t≥ 0, one unit of load is created, and placed at a randomly chosen graph node. In the same step, the chosen node picks a random neighbor, and the two nodes balance their loads by averaging them. We are interested in the expected gap between the minimum and maximum loads at nodes as the process progresses, and its dependence on n and on the graph structure. Variants of the above graphical balanced allocation process have been studied previously by Peres, Talwar, and Wieder [Peres et al., 2015], and by Sauerwald and Sun [Sauerwald and Sun, 2015]. These authors left as open the question of characterizing the gap in the case of cycle graphs in the dynamic case, where weights are created during the algorithm’s execution. For this case, the only known upper bound is of 𝒪(n log n), following from a majorization argument due to [Peres et al., 2015], which analyzes a related graphical allocation process. In this paper, we provide an upper bound of 𝒪 (√n log n) on the expected gap of the above process for cycles of length n. We introduce a new potential analysis technique, which enables us to bound the difference in load between k-hop neighbors on the cycle, for any k ≤ n/2. We complement this with a \"gap covering\" argument, which bounds the maximum value of the gap by bounding its value across all possible subsets of a certain structure, and recursively bounding the gaps within each subset. We provide analytical and experimental evidence that our upper bound on the gap is tight up to a logarithmic factor. ","lang":"eng"}],"related_material":{"link":[{"relation":"earlier_version","url":"https://doi.org/10.4230/LIPIcs.ICALP.2020.7"}],"record":[{"relation":"earlier_version","status":"public","id":"15077"}]},"article_type":"original","external_id":{"arxiv":["2003.09297"],"isi":["000734004600001"]},"date_created":"2020-08-24T06:24:04Z","date_published":"2022-04-01T00:00:00Z","article_processing_charge":"Yes (via OA deal)","status":"public"},{"quality_controlled":"1","author":[{"first_name":"Elias","id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f","full_name":"Frantar, Elias","last_name":"Frantar"},{"last_name":"Singh","id":"DD138E24-D89D-11E9-9DC0-DEF6E5697425","first_name":"Sidak Pal","full_name":"Singh, Sidak Pal"},{"last_name":"Alistarh","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian"}],"publication_identifier":{"isbn":["9781713871088"]},"language":[{"iso":"eng"}],"publication":"36th Conference on Neural Information Processing Systems","year":"2022","file_date_updated":"2024-08-05T09:25:39Z","acknowledgement":"We gratefully acknowledge funding from the European Research Council (ERC) under the European Union’s Horizon 2020 programme (grant agreement No 805223 ScaleML), as well as computational support from AWS EC2. We thank Eldar Kurtic for providing us BERT code and pretrained models, and the Neural Magic Team, notably Michael Goin and Mark Kurtz, for support with their software. ","article_processing_charge":"No","date_published":"2022-12-01T00:00:00Z","date_created":"2024-05-29T06:38:26Z","status":"public","related_material":{"record":[{"relation":"dissertation_contains","id":"17485","status":"public"}]},"external_id":{"arxiv":["2208.11580"]},"file":[{"file_name":"2022_NeurIPS_Frantar.pdf","checksum":"38e7d75f578e8d2e207c81895e09f211","date_created":"2024-08-05T09:25:39Z","relation":"main_file","file_size":491843,"date_updated":"2024-08-05T09:25:39Z","file_id":"17391","access_level":"open_access","creator":"dernst","success":1,"content_type":"application/pdf"}],"abstract":[{"lang":"eng","text":"We consider the problem of model compression for deep neural networks (DNNs) in the challenging one-shot/post-training setting, in which we are given an accurate trained model, and must compress it without any retraining, based only on a small amount of calibration input data. This problem has become popular in view of the emerging software and hardware support for executing models compressed via pruning and/or quantization with speedup, and well-performing solutions have been proposed independently for both compression approaches.In this paper, we introduce a new compression framework which covers both weight pruning and quantization in a unified setting, is time- and space-efficient, and considerably improves upon the practical performance of existing post-training methods. At the technical level, our approach is based on an exact and efficient realization of the classical Optimal Brain Surgeon (OBS) framework of [LeCun, Denker, and Solla, 1990] extended to also cover weight quantization at the scale of modern DNNs. From the practical perspective, our experimental results show that it can improve significantly upon the compression-accuracy trade-offs of existing post-training methods, and that it can enable the accurate compound application of both pruning and quantization in a post-training setting."}],"type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","ec_funded":1,"month":"12","date_updated":"2026-04-07T12:43:03Z","_id":"17087","alternative_title":["NeurIPS"],"corr_author":"1","title":"Optimal brain compression: A framework for accurate post-training quantization and pruning","ddc":["000"],"publisher":"ML Research Press","oa":1,"citation":{"mla":"Frantar, Elias, et al. “Optimal Brain Compression: A Framework for Accurate Post-Training Quantization and Pruning.” <i>36th Conference on Neural Information Processing Systems</i>, vol. 35, ML Research Press, 2022.","ieee":"E. Frantar, S. P. Singh, and D.-A. Alistarh, “Optimal brain compression: A framework for accurate post-training quantization and pruning,” in <i>36th Conference on Neural Information Processing Systems</i>, New Orleans, LA, United States, 2022, vol. 35.","apa":"Frantar, E., Singh, S. P., &#38; Alistarh, D.-A. (2022). Optimal brain compression: A framework for accurate post-training quantization and pruning. In <i>36th Conference on Neural Information Processing Systems</i> (Vol. 35). New Orleans, LA, United States: ML Research Press.","ista":"Frantar E, Singh SP, Alistarh D-A. 2022. Optimal brain compression: A framework for accurate post-training quantization and pruning. 36th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, NeurIPS, vol. 35.","chicago":"Frantar, Elias, Sidak Pal Singh, and Dan-Adrian Alistarh. “Optimal Brain Compression: A Framework for Accurate Post-Training Quantization and Pruning.” In <i>36th Conference on Neural Information Processing Systems</i>, Vol. 35. ML Research Press, 2022.","short":"E. Frantar, S.P. Singh, D.-A. Alistarh, in:, 36th Conference on Neural Information Processing Systems, ML Research Press, 2022.","ama":"Frantar E, Singh SP, Alistarh D-A. Optimal brain compression: A framework for accurate post-training quantization and pruning. In: <i>36th Conference on Neural Information Processing Systems</i>. Vol 35. ML Research Press; 2022."},"intvolume":"        35","publication_status":"published","arxiv":1,"department":[{"_id":"DaAl"}],"conference":{"end_date":"2022-12-09","start_date":"2022-11-28","location":"New Orleans, LA, United States","name":"NeurIPS: Neural Information Processing Systems"},"project":[{"call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223"}],"scopus_import":"1","has_accepted_license":"1","volume":35,"day":"01","oa_version":"Submitted Version"},{"doi":"10.1145/3528535.3565248","oa_version":"Published Version","day":"01","conference":{"end_date":"2022-11-11","start_date":"2022-11-07","location":"Quebec, QC, Canada","name":"Middleware: International Middleware Conference"},"has_accepted_license":"1","scopus_import":"1","department":[{"_id":"DaAl"}],"arxiv":1,"publication_status":"published","publisher":"Association for Computing Machinery","citation":{"mla":"Markov, Ilia, et al. “CGX: Adaptive System Support for Communication-Efficient Deep Learning.” <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>, Association for Computing Machinery, 2022, pp. 241–54, doi:<a href=\"https://doi.org/10.1145/3528535.3565248\">10.1145/3528535.3565248</a>.","apa":"Markov, I., Ramezanikebrya, H., &#38; Alistarh, D.-A. (2022). CGX: Adaptive system support for communication-efficient deep learning. In <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i> (pp. 241–254). Quebec, QC, Canada: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3528535.3565248\">https://doi.org/10.1145/3528535.3565248</a>","ista":"Markov I, Ramezanikebrya H, Alistarh D-A. 2022. CGX: Adaptive system support for communication-efficient deep learning. Proceedings of the 23rd ACM/IFIP International Middleware Conference. Middleware: International Middleware Conference, 241–254.","ieee":"I. Markov, H. Ramezanikebrya, and D.-A. Alistarh, “CGX: Adaptive system support for communication-efficient deep learning,” in <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>, Quebec, QC, Canada, 2022, pp. 241–254.","short":"I. Markov, H. Ramezanikebrya, D.-A. Alistarh, in:, Proceedings of the 23rd ACM/IFIP International Middleware Conference, Association for Computing Machinery, 2022, pp. 241–254.","ama":"Markov I, Ramezanikebrya H, Alistarh D-A. CGX: Adaptive system support for communication-efficient deep learning. In: <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>. Association for Computing Machinery; 2022:241-254. doi:<a href=\"https://doi.org/10.1145/3528535.3565248\">10.1145/3528535.3565248</a>","chicago":"Markov, Ilia, Hamidreza Ramezanikebrya, and Dan-Adrian Alistarh. “CGX: Adaptive System Support for Communication-Efficient Deep Learning.” In <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>, 241–54. Association for Computing Machinery, 2022. <a href=\"https://doi.org/10.1145/3528535.3565248\">https://doi.org/10.1145/3528535.3565248</a>."},"oa":1,"corr_author":"1","title":"CGX: Adaptive system support for communication-efficient deep learning","ddc":["000"],"month":"11","_id":"12780","date_updated":"2026-04-07T13:00:54Z","type":"conference","user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"file":[{"file_id":"12795","file_size":1514169,"date_updated":"2023-04-03T06:17:58Z","content_type":"application/pdf","creator":"dernst","access_level":"open_access","success":1,"date_created":"2023-04-03T06:17:58Z","file_name":"2022_ACMMiddleware_Markov.pdf","checksum":"1a397746235f245da5468819247ff663","relation":"main_file"}],"page":"241-254","abstract":[{"text":"The ability to scale out training workloads has been one of the key performance enablers of deep learning. The main scaling approach is data-parallel GPU-based training, which has been boosted by hardware and software support for highly efficient point-to-point communication, and in particular via hardware bandwidth over-provisioning. Overprovisioning comes at a cost: there is an order of magnitude price difference between \"cloud-grade\" servers with such support, relative to their popular \"consumer-grade\" counterparts, although single server-grade and consumer-grade GPUs can have similar computational envelopes.\r\n\r\nIn this paper, we show that the costly hardware overprovisioning approach can be supplanted via algorithmic and system design, and propose a framework called CGX, which provides efficient software support for compressed communication in ML applications, for both multi-GPU single-node training, as well as larger-scale multi-node training. CGX is based on two technical advances: At the system level, it relies on a re-developed communication stack for ML frameworks, which provides flexible, highly-efficient support for compressed communication. At the application level, it provides seamless, parameter-free integration with popular frameworks, so that end-users do not have to modify training recipes, nor significant training code. This is complemented by a layer-wise adaptive compression technique which dynamically balances compression gains with accuracy preservation. CGX integrates with popular ML frameworks, providing up to 3X speedups for multi-GPU nodes based on commodity hardware, and order-of-magnitude improvements in the multi-node setting, with negligible impact on accuracy.","lang":"eng"}],"related_material":{"record":[{"relation":"dissertation_contains","status":"public","id":"17490"}]},"external_id":{"isi":["001061556200024"],"arxiv":["2111.08617"]},"date_created":"2023-03-31T06:17:00Z","date_published":"2022-11-01T00:00:00Z","article_processing_charge":"Yes (via OA deal)","status":"public","file_date_updated":"2023-04-03T06:17:58Z","acknowledgement":"The authors sincerely thank Nikoli Dryden, Tal Ben-Nun, Torsten Hoefler and Bapi Chatterjee for useful discussions throughout the development of this project.","publication_identifier":{"isbn":["9781450393409"]},"language":[{"iso":"eng"}],"isi":1,"publication":"Proceedings of the 23rd ACM/IFIP International Middleware Conference","year":"2022","quality_controlled":"1","author":[{"full_name":"Markov, Ilia","id":"D0CF4148-C985-11E9-8066-0BDEE5697425","first_name":"Ilia","last_name":"Markov"},{"first_name":"Hamidreza","full_name":"Ramezanikebrya, Hamidreza","last_name":"Ramezanikebrya"},{"last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X"}]},{"doi":"10.1109/cvpr52688.2022.01195","oa_version":"Preprint","day":"27","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2111.13445"}],"conference":{"location":"New Orleans, LA, United States","name":"CVPR: Computer Vision and Pattern Recognition","start_date":"2022-06-18","end_date":"2022-06-24"},"project":[{"_id":"9B9290DE-BA93-11EA-9121-9846C619BF3A","grant_number":"W1260-N35","name":"Vienna Graduate School on Computational Optimization"},{"name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223","call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425"}],"scopus_import":"1","department":[{"_id":"DaAl"},{"_id":"ChLa"}],"arxiv":1,"publication_status":"published","publisher":"Institute of Electrical and Electronics Engineers","citation":{"ama":"Iofinova EB, Krumes A, Kurtz M, Alistarh D-A. How well do sparse ImageNet models transfer? In: <i>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>. Institute of Electrical and Electronics Engineers; 2022:12256-12266. doi:<a href=\"https://doi.org/10.1109/cvpr52688.2022.01195\">10.1109/cvpr52688.2022.01195</a>","short":"E.B. Iofinova, A. Krumes, M. Kurtz, D.-A. Alistarh, in:, 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition, Institute of Electrical and Electronics Engineers, 2022, pp. 12256–12266.","chicago":"Iofinova, Eugenia B, Alexandra Krumes, Mark Kurtz, and Dan-Adrian Alistarh. “How Well Do Sparse ImageNet Models Transfer?” In <i>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, 12256–66. Institute of Electrical and Electronics Engineers, 2022. <a href=\"https://doi.org/10.1109/cvpr52688.2022.01195\">https://doi.org/10.1109/cvpr52688.2022.01195</a>.","apa":"Iofinova, E. B., Krumes, A., Kurtz, M., &#38; Alistarh, D.-A. (2022). How well do sparse ImageNet models transfer? In <i>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i> (pp. 12256–12266). New Orleans, LA, United States: Institute of Electrical and Electronics Engineers. <a href=\"https://doi.org/10.1109/cvpr52688.2022.01195\">https://doi.org/10.1109/cvpr52688.2022.01195</a>","ieee":"E. B. Iofinova, A. Krumes, M. Kurtz, and D.-A. Alistarh, “How well do sparse ImageNet models transfer?,” in <i>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, New Orleans, LA, United States, 2022, pp. 12256–12266.","ista":"Iofinova EB, Krumes A, Kurtz M, Alistarh D-A. 2022. How well do sparse ImageNet models transfer? 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition. CVPR: Computer Vision and Pattern Recognition, 12256–12266.","mla":"Iofinova, Eugenia B., et al. “How Well Do Sparse ImageNet Models Transfer?” <i>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, Institute of Electrical and Electronics Engineers, 2022, pp. 12256–66, doi:<a href=\"https://doi.org/10.1109/cvpr52688.2022.01195\">10.1109/cvpr52688.2022.01195</a>."},"oa":1,"corr_author":"1","title":"How well do sparse ImageNet models transfer?","month":"09","_id":"12299","date_updated":"2026-04-07T13:30:19Z","ec_funded":1,"type":"conference","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","page":"12256-12266","abstract":[{"text":"Transfer learning is a classic paradigm by which models pretrained on large “upstream” datasets are adapted to yield good results on “downstream” specialized datasets. Generally, more accurate models on the “upstream” dataset tend to provide better transfer accuracy “downstream”. In this work, we perform an in-depth investigation of this phenomenon in the context of convolutional neural networks (CNNs) trained on the ImageNet dataset, which have been pruned-that is, compressed by sparsifiying their connections. We consider transfer using unstructured pruned models obtained by applying several state-of-the-art pruning methods, including magnitude-based, second-order, regrowth, lottery-ticket, and regularization approaches, in the context of twelve standard transfer tasks. In a nutshell, our study shows that sparse models can match or even outperform the transfer performance of dense models, even at high sparsities, and, while doing so, can lead to significant inference and even training speedups. At the same time, we observe and analyze significant differences in the behaviour of different pruning methods. The code is available at: https://github.com/IST-DASLab/sparse-imagenet-transfer.","lang":"eng"}],"related_material":{"record":[{"status":"public","id":"13074","relation":"dissertation_contains"}]},"external_id":{"arxiv":["2111.13445"],"isi":["000870759105034"]},"date_published":"2022-09-27T00:00:00Z","date_created":"2023-01-16T10:06:00Z","article_processing_charge":"No","status":"public","acknowledgement":"he authors would like to sincerely thank Christoph Lampert and Nir Shavit for fruitful discussions during the development of this work, and Eldar Kurtic for experimental support. EI was supported in part by the FWF DK VGSCO, grant agreement number W1260-N35, while AP and DA acknowledge generous support by the ERC, via Starting Grant 805223 ScaleML.","publication_identifier":{"eissn":["2575-7075"]},"language":[{"iso":"eng"}],"isi":1,"year":"2022","publication":"2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition","quality_controlled":"1","author":[{"last_name":"Iofinova","full_name":"Iofinova, Eugenia B","orcid":"0000-0002-7778-3221","first_name":"Eugenia B","id":"f9a17499-f6e0-11ea-865d-fdf9a3f77117"},{"last_name":"Peste","first_name":"Elena-Alexandra","id":"32D78294-F248-11E8-B48F-1D18A9856A87","full_name":"Peste, Elena-Alexandra"},{"last_name":"Kurtz","full_name":"Kurtz, Mark","first_name":"Mark"},{"first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh"}]},{"type":"journal_article","user_id":"8b945eb4-e2f2-11eb-945a-df72226e66a9","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"corr_author":"1","ddc":["000"],"title":"Mean-field analysis of piecewise linear solutions for wide ReLU networks","month":"04","date_updated":"2026-05-30T22:30:05Z","_id":"11420","intvolume":"        23","arxiv":1,"publication_status":"published","department":[{"_id":"MaMo"},{"_id":"DaAl"}],"publisher":"Journal of Machine Learning Research","oa":1,"citation":{"apa":"Shevchenko, A., Kungurtsev, V., &#38; Mondelli, M. (2022). Mean-field analysis of piecewise linear solutions for wide ReLU networks. <i>Journal of Machine Learning Research</i>. Journal of Machine Learning Research.","ieee":"A. Shevchenko, V. Kungurtsev, and M. Mondelli, “Mean-field analysis of piecewise linear solutions for wide ReLU networks,” <i>Journal of Machine Learning Research</i>, vol. 23, no. 130. Journal of Machine Learning Research, pp. 1–55, 2022.","ista":"Shevchenko A, Kungurtsev V, Mondelli M. 2022. Mean-field analysis of piecewise linear solutions for wide ReLU networks. Journal of Machine Learning Research. 23(130), 1–55.","mla":"Shevchenko, Alexander, et al. “Mean-Field Analysis of Piecewise Linear Solutions for Wide ReLU Networks.” <i>Journal of Machine Learning Research</i>, vol. 23, no. 130, Journal of Machine Learning Research, 2022, pp. 1–55.","short":"A. Shevchenko, V. Kungurtsev, M. Mondelli, Journal of Machine Learning Research 23 (2022) 1–55.","ama":"Shevchenko A, Kungurtsev V, Mondelli M. Mean-field analysis of piecewise linear solutions for wide ReLU networks. <i>Journal of Machine Learning Research</i>. 2022;23(130):1-55.","chicago":"Shevchenko, Alexander, Vyacheslav Kungurtsev, and Marco Mondelli. “Mean-Field Analysis of Piecewise Linear Solutions for Wide ReLU Networks.” <i>Journal of Machine Learning Research</i>. Journal of Machine Learning Research, 2022."},"day":"01","oa_version":"Published Version","project":[{"name":"Prix Lopez-Loretta 2019 - Marco Mondelli","_id":"059876FA-7A3F-11EA-A408-12923DDC885E"}],"scopus_import":"1","has_accepted_license":"1","volume":23,"quality_controlled":"1","author":[{"first_name":"Aleksandr","id":"F2B06EC2-C99E-11E9-89F0-752EE6697425","full_name":"Shevchenko, Aleksandr","last_name":"Shevchenko"},{"last_name":"Kungurtsev","full_name":"Kungurtsev, Vyacheslav","first_name":"Vyacheslav"},{"orcid":"0000-0002-3242-7020","full_name":"Mondelli, Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco","last_name":"Mondelli"}],"issue":"130","acknowledgement":"We would like to thank Mert Pilanci for several exploratory discussions in the early stage\r\nof the project, Jan Maas for clarifications about Jordan et al. (1998), and Max Zimmer for\r\nsuggestive numerical experiments. A. Shevchenko and M. Mondelli are partially supported\r\nby the 2019 Lopez-Loreta Prize. V. Kungurtsev acknowledges support to the OP VVV\r\nproject CZ.02.1.01/0.0/0.0/16 019/0000765 Research Center for Informatics.\r\n","file_date_updated":"2022-05-30T08:22:55Z","publication_identifier":{"issn":["1532-4435"],"eissn":["1533-7928"]},"language":[{"iso":"eng"}],"year":"2022","publication":"Journal of Machine Learning Research","related_material":{"link":[{"relation":"other","url":"https://www.jmlr.org/papers/v23/21-1365.html"}],"record":[{"id":"17465","status":"public","relation":"dissertation_contains"}]},"external_id":{"arxiv":["2111.02278"]},"article_type":"original","article_processing_charge":"No","date_created":"2022-05-29T22:01:54Z","date_published":"2022-04-01T00:00:00Z","status":"public","file":[{"relation":"main_file","date_created":"2022-05-30T08:22:55Z","file_name":"21-1365.pdf","checksum":"d4ff5d1affb34848b5c5e4002483fc62","content_type":"application/pdf","success":1,"creator":"cchlebak","access_level":"open_access","file_id":"11422","date_updated":"2022-05-30T08:22:55Z","file_size":1521701}],"abstract":[{"lang":"eng","text":"Understanding the properties of neural networks trained via stochastic gradient descent (SGD) is at the heart of the theory of deep learning. In this work, we take a mean-field view, and consider a two-layer ReLU network trained via noisy-SGD for a univariate regularized regression problem. Our main result is that SGD with vanishingly small noise injected in the gradients is biased towards a simple solution: at convergence, the ReLU network implements a piecewise linear map of the inputs, and the number of “knot” points -- i.e., points where the tangent of the ReLU network estimator changes -- between two consecutive training inputs is at most three. In particular, as the number of neurons of the network grows, the SGD dynamics is captured by the solution of a gradient flow and, at convergence, the distribution of the weights approaches the unique minimizer of a related free energy, which has a Gibbs form. Our key technical contribution consists in the analysis of the estimator resulting from this minimizer: we show that its second derivative vanishes everywhere, except at some specific locations which represent the “knot” points. We also provide empirical evidence that knots at locations distinct from the data points might occur, as predicted by our theory."}],"page":"1-55"},{"external_id":{"arxiv":["2110.14391"]},"date_created":"2022-06-19T22:01:58Z","date_published":"2021-12-01T00:00:00Z","article_processing_charge":"No","status":"public","page":"2823-2834","abstract":[{"lang":"eng","text":"We study efficient distributed algorithms for the fundamental problem of principal component analysis and leading eigenvector computation on the sphere, when the data are randomly distributed among a set of computational nodes. We propose a new quantized variant of Riemannian gradient descent to solve this problem, and prove that the algorithm converges with high probability under a set of necessary spherical-convexity properties. We give bounds on the number of bits transmitted by the algorithm under common initialization schemes, and investigate the dependency on the problem dimension in each case."}],"quality_controlled":"1","author":[{"last_name":"Alimisis","first_name":"Foivos","full_name":"Alimisis, Foivos"},{"id":"11396234-BB50-11E9-B24C-90FCE5697425","first_name":"Peter","full_name":"Davies, Peter","orcid":"0000-0002-5646-9524","last_name":"Davies"},{"last_name":"Vandereycken","first_name":"Bart","full_name":"Vandereycken, Bart"},{"last_name":"Alistarh","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","full_name":"Alistarh, Dan-Adrian"}],"acknowledgement":"We would like to thank the anonymous reviewers for helpful comments and suggestions. We also thank Aurelien Lucchi and Antonio Orvieto for fruitful discussions at an early stage of this work. FA is partially supported by the SNSF under research project No. 192363 and conducted part of this work while at IST Austria under the European Union’s Horizon 2020 research and innovation programme (grant agreement No. 805223 ScaleML). PD partly conducted this work while at IST Austria and was supported by the European Union’s Horizon 2020 programme under the Marie Skłodowska-Curie grant agreement No. 754411.","publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"year":"2021","language":[{"iso":"eng"}],"publication":"Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems","intvolume":"         4","department":[{"_id":"DaAl"}],"arxiv":1,"publication_status":"published","publisher":"Neural Information Processing Systems Foundation","citation":{"mla":"Alimisis, Foivos, et al. “Distributed Principal Component Analysis with Limited Communication.” <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, vol. 4, Neural Information Processing Systems Foundation, 2021, pp. 2823–34.","ieee":"F. Alimisis, P. Davies, B. Vandereycken, and D.-A. Alistarh, “Distributed principal component analysis with limited communication,” in <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 4, pp. 2823–2834.","apa":"Alimisis, F., Davies, P., Vandereycken, B., &#38; Alistarh, D.-A. (2021). Distributed principal component analysis with limited communication. In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i> (Vol. 4, pp. 2823–2834). Virtual, Online: Neural Information Processing Systems Foundation.","ista":"Alimisis F, Davies P, Vandereycken B, Alistarh D-A. 2021. Distributed principal component analysis with limited communication. Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 4, 2823–2834.","ama":"Alimisis F, Davies P, Vandereycken B, Alistarh D-A. Distributed principal component analysis with limited communication. In: <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>. Vol 4. Neural Information Processing Systems Foundation; 2021:2823-2834.","short":"F. Alimisis, P. Davies, B. Vandereycken, D.-A. Alistarh, in:, Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 2823–2834.","chicago":"Alimisis, Foivos, Peter Davies, Bart Vandereycken, and Dan-Adrian Alistarh. “Distributed Principal Component Analysis with Limited Communication.” In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, 4:2823–34. Neural Information Processing Systems Foundation, 2021."},"oa":1,"oa_version":"Published Version","main_file_link":[{"open_access":"1","url":"https://proceedings.neurips.cc/paper/2021/file/1680e9fa7b4dd5d62ece800239bb53bd-Paper.pdf"}],"day":"01","project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning"},{"_id":"260C2330-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","name":"ISTplus - Postdoctoral Fellowships","grant_number":"754411"}],"conference":{"end_date":"2021-12-14","start_date":"2021-12-06","location":"Virtual, Online","name":"NeurIPS: Neural Information Processing Systems"},"volume":4,"scopus_import":"1","ec_funded":1,"type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","corr_author":"1","title":"Distributed principal component analysis with limited communication","month":"12","_id":"11452","date_updated":"2025-04-14T07:43:57Z"},{"publisher":"Neural Information Processing Systems Foundation","oa":1,"citation":{"short":"E. Frantar, E. Kurtic, D.-A. Alistarh, in:, 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 14873–14886.","ama":"Frantar E, Kurtic E, Alistarh D-A. M-FAC: Efficient matrix-free approximations of second-order information. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Neural Information Processing Systems Foundation; 2021:14873-14886.","chicago":"Frantar, Elias, Eldar Kurtic, and Dan-Adrian Alistarh. “M-FAC: Efficient Matrix-Free Approximations of Second-Order Information.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:14873–86. Neural Information Processing Systems Foundation, 2021.","mla":"Frantar, Elias, et al. “M-FAC: Efficient Matrix-Free Approximations of Second-Order Information.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Neural Information Processing Systems Foundation, 2021, pp. 14873–86.","apa":"Frantar, E., Kurtic, E., &#38; Alistarh, D.-A. (2021). M-FAC: Efficient matrix-free approximations of second-order information. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 14873–14886). Virtual, Online: Neural Information Processing Systems Foundation.","ista":"Frantar E, Kurtic E, Alistarh D-A. 2021. M-FAC: Efficient matrix-free approximations of second-order information. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 34, 14873–14886.","ieee":"E. Frantar, E. Kurtic, and D.-A. Alistarh, “M-FAC: Efficient matrix-free approximations of second-order information,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 14873–14886."},"intvolume":"        34","arxiv":1,"publication_status":"published","department":[{"_id":"DaAl"}],"conference":{"end_date":"2021-12-14","start_date":"2021-12-06","name":"NeurIPS: Neural Information Processing Systems","location":"Virtual, Online"},"project":[{"name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223","call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425"}],"scopus_import":"1","volume":34,"main_file_link":[{"open_access":"1","url":"https://proceedings.neurips.cc/paper/2021/file/7cfd5df443b4eb0d69886a583b33de4c-Paper.pdf"}],"oa_version":"Published Version","day":"06","type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","ec_funded":1,"month":"12","date_updated":"2025-05-14T11:28:00Z","_id":"11463","alternative_title":["Advances in Neural Information Processing Systems"],"corr_author":"1","title":"M-FAC: Efficient matrix-free approximations of second-order information","article_processing_charge":"No","date_published":"2021-12-06T00:00:00Z","date_created":"2022-06-26T22:01:35Z","status":"public","external_id":{"arxiv":["2010.08222"]},"abstract":[{"text":"Efficiently approximating local curvature information of the loss function is a key tool for optimization and compression of deep neural networks. Yet, most existing methods to approximate second-order information have high computational\r\nor storage costs, which limits their practicality. In this work, we investigate matrix-free, linear-time approaches for estimating Inverse-Hessian Vector Products (IHVPs) for the case when the Hessian can be approximated as a sum of rank-one matrices, as in the classic approximation of the Hessian by the empirical Fisher matrix. We propose two new algorithms: the first is tailored towards network compression and can compute the IHVP for dimension d, if the Hessian is given as a sum of m rank-one matrices, using O(dm2) precomputation, O(dm) cost for computing the IHVP, and query cost O(m) for any single element of the inverse Hessian. The second algorithm targets an optimization setting, where we wish to compute the product between the inverse Hessian, estimated over a sliding window of optimization steps, and a given gradient direction, as required for preconditioned SGD. We give an algorithm with cost O(dm + m2) for computing the IHVP and O(dm + m3) for adding or removing any gradient from the sliding window. These\r\ntwo algorithms yield state-of-the-art results for network pruning and optimization with lower computational overhead relative to existing second-order methods. Implementations are available at [9] and [17].","lang":"eng"}],"page":"14873-14886","quality_controlled":"1","author":[{"last_name":"Frantar","first_name":"Elias","id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f","full_name":"Frantar, Elias"},{"last_name":"Kurtic","full_name":"Kurtic, Eldar","first_name":"Eldar","id":"47beb3a5-07b5-11eb-9b87-b108ec578218"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","last_name":"Alistarh"}],"publication_identifier":{"issn":["1049-5258"],"isbn":["9781713845393"]},"publication":"35th Conference on Neural Information Processing Systems","language":[{"iso":"eng"}],"year":"2021","acknowledgement":"We gratefully acknowledge funding the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML), as well as computational support from Amazon Web Services (AWS) EC2."},{"language":[{"iso":"eng"}],"year":"2021","publication":"35th Conference on Neural Information Processing Systems","publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"acknowledgement":"We thank the NeurIPS reviewers for insightful comments that helped us improve the positioning of our results, as well as for pointing out the subsampling approach for complementing the randomised lower bound. We also thank Foivos Alimisis and Peter Davies for useful discussions. This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML).","author":[{"last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"},{"full_name":"Korhonen, Janne","id":"C5402D42-15BC-11E9-A202-CA2BE6697425","first_name":"Janne","last_name":"Korhonen"}],"quality_controlled":"1","page":"7254-7266","abstract":[{"text":"We consider a standard distributed optimisation setting where N machines, each holding a d-dimensional function\r\nfi, aim to jointly minimise the sum of the functions ∑Ni=1fi(x). This problem arises naturally in large-scale distributed optimisation, where a standard solution is to apply variants of (stochastic) gradient descent. We focus on the communication complexity of this problem: our main result provides the first fully unconditional bounds on total number of bits which need to be sent and received by the N machines to solve this problem under point-to-point communication, within a given error-tolerance. Specifically, we show that Ω(Ndlogd/Nε) total bits need to be communicated between the machines to find an additive ϵ-approximation to the minimum of ∑Ni=1fi(x). The result holds for both deterministic and randomised algorithms, and, importantly, requires no assumptions on the algorithm structure. The lower bound is tight under certain restrictions on parameter values, and is matched within constant factors for quadratic objectives by a new variant of quantised gradient descent, which we describe and analyse. Our results bring over tools from communication complexity to distributed optimisation, which has potential for further applications.","lang":"eng"}],"status":"public","date_published":"2021-12-06T00:00:00Z","date_created":"2022-06-26T22:01:35Z","article_processing_charge":"No","external_id":{"arxiv":["2010.08222"]},"_id":"11464","date_updated":"2025-05-14T11:27:51Z","month":"12","title":"Towards tight communication lower bounds for distributed optimisation","corr_author":"1","alternative_title":["Advances in Neural Information Processing Systems"],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","type":"conference","ec_funded":1,"volume":34,"scopus_import":"1","project":[{"name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223","call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425"}],"conference":{"end_date":"2021-12-14","start_date":"2021-12-06","location":"Virtual, Online","name":"NeurIPS: Neural Information Processing Systems"},"main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2021/file/3b92d18aa7a6176dd37d372bc2f1eb71-Paper.pdf","open_access":"1"}],"oa_version":"Published Version","day":"06","citation":{"short":"D.-A. Alistarh, J. Korhonen, in:, 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 7254–7266.","ama":"Alistarh D-A, Korhonen J. Towards tight communication lower bounds for distributed optimisation. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Neural Information Processing Systems Foundation; 2021:7254-7266.","chicago":"Alistarh, Dan-Adrian, and Janne Korhonen. “Towards Tight Communication Lower Bounds for Distributed Optimisation.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:7254–66. Neural Information Processing Systems Foundation, 2021.","mla":"Alistarh, Dan-Adrian, and Janne Korhonen. “Towards Tight Communication Lower Bounds for Distributed Optimisation.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Neural Information Processing Systems Foundation, 2021, pp. 7254–66.","apa":"Alistarh, D.-A., &#38; Korhonen, J. (2021). Towards tight communication lower bounds for distributed optimisation. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 7254–7266). Virtual, Online: Neural Information Processing Systems Foundation.","ieee":"D.-A. Alistarh and J. Korhonen, “Towards tight communication lower bounds for distributed optimisation,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 7254–7266.","ista":"Alistarh D-A, Korhonen J. 2021. Towards tight communication lower bounds for distributed optimisation. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 34, 7254–7266."},"oa":1,"publisher":"Neural Information Processing Systems Foundation","department":[{"_id":"DaAl"}],"publication_status":"published","arxiv":1,"intvolume":"        34"},{"ec_funded":1,"type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)"},"corr_author":"1","ddc":["000"],"title":"Communication-efficient distributed optimization with quantized preconditioners","month":"07","date_updated":"2025-07-10T11:50:37Z","_id":"13147","intvolume":"       139","arxiv":1,"publication_status":"published","department":[{"_id":"DaAl"}],"publisher":"ML Research Press","oa":1,"citation":{"chicago":"Alimisis, Foivos, Peter Davies, and Dan-Adrian Alistarh. “Communication-Efficient Distributed Optimization with Quantized Preconditioners.” In <i>Proceedings of the 38th International Conference on Machine Learning</i>, 139:196–206. ML Research Press, 2021.","short":"F. Alimisis, P. Davies, D.-A. Alistarh, in:, Proceedings of the 38th International Conference on Machine Learning, ML Research Press, 2021, pp. 196–206.","ama":"Alimisis F, Davies P, Alistarh D-A. Communication-efficient distributed optimization with quantized preconditioners. In: <i>Proceedings of the 38th International Conference on Machine Learning</i>. Vol 139. ML Research Press; 2021:196-206.","ieee":"F. Alimisis, P. Davies, and D.-A. Alistarh, “Communication-efficient distributed optimization with quantized preconditioners,” in <i>Proceedings of the 38th International Conference on Machine Learning</i>, Virtual, 2021, vol. 139, pp. 196–206.","ista":"Alimisis F, Davies P, Alistarh D-A. 2021. Communication-efficient distributed optimization with quantized preconditioners. Proceedings of the 38th International Conference on Machine Learning. ICML: International Conference on Machine Learning vol. 139, 196–206.","apa":"Alimisis, F., Davies, P., &#38; Alistarh, D.-A. (2021). Communication-efficient distributed optimization with quantized preconditioners. In <i>Proceedings of the 38th International Conference on Machine Learning</i> (Vol. 139, pp. 196–206). Virtual: ML Research Press.","mla":"Alimisis, Foivos, et al. “Communication-Efficient Distributed Optimization with Quantized Preconditioners.” <i>Proceedings of the 38th International Conference on Machine Learning</i>, vol. 139, ML Research Press, 2021, pp. 196–206."},"oa_version":"Published Version","day":"01","project":[{"grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425"},{"call_identifier":"H2020","_id":"260C2330-B435-11E9-9278-68D0E5697425","name":"ISTplus - Postdoctoral Fellowships","grant_number":"754411"}],"conference":{"end_date":"2021-07-24","start_date":"2021-07-18","location":"Virtual","name":"ICML: International Conference on Machine Learning"},"scopus_import":"1","has_accepted_license":"1","volume":139,"quality_controlled":"1","author":[{"full_name":"Alimisis, Foivos","first_name":"Foivos","last_name":"Alimisis"},{"last_name":"Davies","orcid":"0000-0002-5646-9524","full_name":"Davies, Peter","id":"11396234-BB50-11E9-B24C-90FCE5697425","first_name":"Peter"},{"full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","last_name":"Alistarh"}],"file_date_updated":"2023-06-19T10:41:05Z","acknowledgement":"The authors would like to thank Janne Korhonen, Aurelien Lucchi, Celestine MendlerDunner and Antonio Orvieto for helpful discussions. FA ¨and DA were supported during this work by the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML). PD was supported by the European Union’s Horizon 2020 programme under the Marie Skłodowska-Curie grant agreement No. 754411.","publication_identifier":{"eissn":["2640-3498"],"isbn":["9781713845065"]},"publication":"Proceedings of the 38th International Conference on Machine Learning","year":"2021","language":[{"iso":"eng"}],"external_id":{"arxiv":["2102.07214"]},"article_processing_charge":"No","date_published":"2021-07-01T00:00:00Z","date_created":"2023-06-18T22:00:48Z","status":"public","file":[{"relation":"main_file","file_name":"2021_PMLR_Alimisis.pdf","checksum":"7ec0d59bac268b49c76bf2e036dedd7a","date_created":"2023-06-19T10:41:05Z","access_level":"open_access","creator":"dernst","success":1,"content_type":"application/pdf","file_size":429087,"date_updated":"2023-06-19T10:41:05Z","file_id":"13154"}],"abstract":[{"text":"We investigate fast and communication-efficient algorithms for the classic problem of minimizing a sum of strongly convex and smooth functions that are distributed among n\r\n different nodes, which can communicate using a limited number of bits. Most previous communication-efficient approaches for this problem are limited to first-order optimization, and therefore have \\emph{linear} dependence on the condition number in their communication complexity. We show that this dependence is not inherent: communication-efficient methods can in fact have sublinear dependence on the condition number. For this, we design and analyze the first communication-efficient distributed variants of preconditioned gradient descent for Generalized Linear Models, and for Newton’s method. Our results rely on a new technique for quantizing both the preconditioner and the descent direction at each step of the algorithms, while controlling their convergence rate. We also validate our findings experimentally, showing faster convergence and reduced communication relative to previous methods.","lang":"eng"}],"page":"196-206"}]