[{"ddc":["005"],"publisher":"Institute of Science and Technology Austria","OA_place":"publisher","related_material":{"record":[{"relation":"part_of_dissertation","status":"public","id":"20819"},{"status":"public","id":"17411","relation":"part_of_dissertation"},{"relation":"part_of_dissertation","status":"public","id":"18120"},{"relation":"part_of_dissertation","id":"21207","status":"public"}]},"has_accepted_license":"1","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"alternative_title":["ISTA Thesis"],"corr_author":"1","file":[{"relation":"source_file","date_created":"2026-02-17T11:46:22Z","date_updated":"2026-02-17T11:46:22Z","file_name":"2026_Scott_Jonathan_Thesis_Source.zip","creator":"jscott","checksum":"121c1d968bd86f3630aa7e81d5bbbcb0","access_level":"closed","file_id":"21298","content_type":"application/zip","file_size":272379252},{"success":1,"file_id":"21366","content_type":"application/pdf","file_size":15220298,"relation":"main_file","date_created":"2026-02-27T10:25:41Z","file_name":"2026_Jonathan_Scott_Thesis.pdf","date_updated":"2026-02-27T10:25:41Z","checksum":"6e3e08ba474bbee8511cc8a839ab2077","creator":"jscott","access_level":"open_access"}],"publication_status":"published","type":"dissertation","supervisor":[{"first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph"}],"doi":"10.15479/AT-ISTA-21198","acknowledged_ssus":[{"_id":"ScienComp"}],"acknowledgement":"This research was funded in part by the Austrian Science Fund (FWF)\r\n[10.55776/COE12]. Furthermore, the candidate acknowledges the support from the Scientific\r\nService Units (SSU) of ISTA through resources provided by Scientific Computing (SciComp).","day":"09","citation":{"short":"J.A. Scott, Data Heterogeneity and Personalization in Federated Learning, Institute of Science and Technology Austria, 2026.","ieee":"J. A. Scott, “Data heterogeneity and personalization in federated learning,” Institute of Science and Technology Austria, 2026.","mla":"Scott, Jonathan A. <i>Data Heterogeneity and Personalization in Federated Learning</i>. Institute of Science and Technology Austria, 2026, doi:<a href=\"https://doi.org/10.15479/AT-ISTA-21198\">10.15479/AT-ISTA-21198</a>.","ista":"Scott JA. 2026. Data heterogeneity and personalization in federated learning. Institute of Science and Technology Austria.","chicago":"Scott, Jonathan A. “Data Heterogeneity and Personalization in Federated Learning.” Institute of Science and Technology Austria, 2026. <a href=\"https://doi.org/10.15479/AT-ISTA-21198\">https://doi.org/10.15479/AT-ISTA-21198</a>.","ama":"Scott JA. Data heterogeneity and personalization in federated learning. 2026. doi:<a href=\"https://doi.org/10.15479/AT-ISTA-21198\">10.15479/AT-ISTA-21198</a>","apa":"Scott, J. A. (2026). <i>Data heterogeneity and personalization in federated learning</i>. Institute of Science and Technology Austria. <a href=\"https://doi.org/10.15479/AT-ISTA-21198\">https://doi.org/10.15479/AT-ISTA-21198</a>"},"file_date_updated":"2026-02-27T10:25:41Z","page":"158","abstract":[{"text":"In recent years there has been a massive increase in the amount of data generated in a\r\ndecentralized manner. Ever more powerful edge devices, such as smartphones, have become\r\nubiquitous in most societies on earth. Through text typed, photos taken and apps used,\r\nthese devices, which we refer to as clients, generate enormous amounts of high quality and\r\ncomplex data. Moreover, the nature of these devices means the data they generate is often\r\nsensitive and privacy concerns prevent it being gathered and stored in a central location. This\r\npresents a challenge to the modern machine learning paradigm that requires central access\r\nto large amounts of data. Federated learning (FL) has emerged as one of the answers to\r\nthis problem. Rather than bringing the data to the model, FL sends the model to the data.\r\nModel training takes place on device, with periodically synchronized updates, allowing data to\r\nremain locally stored. While this approach offers significant privacy advantages it comes with\r\nits own set of unique challenges. These include: data heterogeneity, the notion that different\r\ndevices generate data in distinct ways which can negatively impact training dynamics; systems\r\nheterogeneity, meaning that different devices may have differing hardware specifications; high\r\ncommunication costs, which are induced by the repeated transferring of models over the\r\nnetwork and low device computational power, which limits the use of larger models on device.\r\nIn this thesis we present a range of methods for federated learning. We focus primarily on\r\nthe challenge of data heterogeneity, though the methods presented are designed to be well\r\nadapted to the other challenges of a federated setting, such as the constraints of limited\r\ncompute and communication overhead. We first present a method for explicitly modeling client\r\ndata heterogeneity. The approach formulates clients as samples from a certain probability\r\ndistribution and infers the parameters of this distribution from the available training clients.\r\nThis learned distribution then represents the heterogeneity present among the clients and can\r\nbe sampled from in order to create new simulated clients that are similar to the real clients we\r\nhave observed so far. Following this we present two methods for directly dealing with data\r\nheterogeneity through personalization. Highly heterogeneous client data distributions can mean\r\nthat learning a single global model becomes suboptimal, and some form of personalization of\r\nmodels to each individual client is required. Our approaches are based around hypernetworks,\r\nwhich we use to generate personalized model parameters without the need for additional\r\ntraining or finetuning. In the first approach we focus on generating full parameterizations of\r\nclient models using learned embeddings of client data and labels, with a hypernetwork located\r\non the central server. In the second approach we address the more challenging scenario where\r\nwe want to generate a personalized model for a client without any label information. The\r\nhypernetwork is trained to generate a low dimensional representation of a client’s personalized\r\nmodel parameters, allowing it to be transferred to and run on the client devices. In our final\r\npresented method, we change our focus and rather than aim to directly address the challenge\r\nof data heterogeneity, we instead ensure we are unaffected by it. This is done in the context\r\nof k-means clustering and we present a method for federated clustering with a focus on added\r\nprivacy guarantees.","lang":"eng"}],"oa_version":"Published Version","user_id":"ba8df636-2132-11f1-aed0-ed93e2281fdd","oa":1,"publication_identifier":{"issn":["2663-337X"]},"date_updated":"2026-04-07T11:46:11Z","date_created":"2026-02-09T14:59:53Z","author":[{"last_name":"Scott","first_name":"Jonathan A","id":"e499926b-f6e0-11ea-865d-9c63db0031e8","full_name":"Scott, Jonathan A"}],"date_published":"2026-02-09T00:00:00Z","month":"02","title":"Data heterogeneity and personalization in federated learning","degree_awarded":"PhD","_id":"21198","year":"2026","language":[{"iso":"eng"}],"article_processing_charge":"No","status":"public"},{"PlanS_conform":"1","status":"public","language":[{"iso":"eng"}],"article_type":"original","article_processing_charge":"Yes (via OA deal)","intvolume":"        37","year":"2025","_id":"12662","date_published":"2025-10-01T00:00:00Z","title":"Generalization in multi-objective machine learning","scopus_import":"1","month":"10","author":[{"first_name":"Peter","last_name":"Súkeník","id":"d64d6a8d-eb8e-11eb-b029-96fd216dec3c","full_name":"Súkeník, Peter"},{"id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","first_name":"Christoph","full_name":"Lampert, Christoph","orcid":"0000-0001-8622-7887"}],"date_created":"2023-02-20T08:23:06Z","publication_identifier":{"eissn":["1433-3058"],"issn":["0941-0643"]},"date_updated":"2025-12-30T06:39:56Z","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","external_id":{"arxiv":["2208.13499"]},"OA_type":"hybrid","oa":1,"volume":37,"abstract":[{"text":"Modern machine learning tasks often require considering not just one but multiple objectives. For example, besides the prediction quality, this could be the efficiency, robustness or fairness of the learned models, or any of their combinations. Multi-objective learning offers a natural framework for handling such problems without having to commit to early trade-offs. Surprisingly, statistical learning theory so far offers almost no insight into the generalization properties of multi-objective learning. In this work, we make first steps to fill this gap: We establish foundational generalization bounds for the multi-objective setting as well as generalization and excess bounds for learning with scalarizations. We also provide the first theoretical analysis of the relation between the Pareto-optimal sets of the true objectives and the Pareto-optimal sets of their empirical approximations from training data. In particular, we show a surprising asymmetry: All Pareto-optimal solutions can be approximated by empirically Pareto-optimal ones, but not vice versa.","lang":"eng"}],"arxiv":1,"oa_version":"Published Version","page":"24669–24683","file_date_updated":"2025-12-30T06:39:11Z","citation":{"short":"P. Súkeník, C. Lampert, Neural Computing and Applications 37 (2025) 24669–24683.","ieee":"P. Súkeník and C. Lampert, “Generalization in multi-objective machine learning,” <i>Neural Computing and Applications</i>, vol. 37. Springer Nature, pp. 24669–24683, 2025.","chicago":"Súkeník, Peter, and Christoph Lampert. “Generalization in Multi-Objective Machine Learning.” <i>Neural Computing and Applications</i>. Springer Nature, 2025. <a href=\"https://doi.org/10.1007/s00521-024-10616-1\">https://doi.org/10.1007/s00521-024-10616-1</a>.","ista":"Súkeník P, Lampert C. 2025. Generalization in multi-objective machine learning. Neural Computing and Applications. 37, 24669–24683.","ama":"Súkeník P, Lampert C. Generalization in multi-objective machine learning. <i>Neural Computing and Applications</i>. 2025;37:24669–24683. doi:<a href=\"https://doi.org/10.1007/s00521-024-10616-1\">10.1007/s00521-024-10616-1</a>","apa":"Súkeník, P., &#38; Lampert, C. (2025). Generalization in multi-objective machine learning. <i>Neural Computing and Applications</i>. Springer Nature. <a href=\"https://doi.org/10.1007/s00521-024-10616-1\">https://doi.org/10.1007/s00521-024-10616-1</a>","mla":"Súkeník, Peter, and Christoph Lampert. “Generalization in Multi-Objective Machine Learning.” <i>Neural Computing and Applications</i>, vol. 37, Springer Nature, 2025, pp. 24669–24683, doi:<a href=\"https://doi.org/10.1007/s00521-024-10616-1\">10.1007/s00521-024-10616-1</a>."},"day":"01","license":"https://creativecommons.org/licenses/by/4.0/","tmp":{"image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"acknowledgement":"Open access funding provided by Institute of Science and Technology (IST Austria).","doi":"10.1007/s00521-024-10616-1","type":"journal_article","publication_status":"published","quality_controlled":"1","department":[{"_id":"ChLa"}],"corr_author":"1","file":[{"date_updated":"2025-12-30T06:39:11Z","file_name":"2025_NeuralCompApplic_Sukenik.pdf","creator":"dernst","access_level":"open_access","checksum":"61ad4591aee16b1e02daf6c164321a42","relation":"main_file","date_created":"2025-12-30T06:39:11Z","content_type":"application/pdf","file_size":500213,"file_id":"20877","success":1}],"publisher":"Springer Nature","OA_place":"publisher","has_accepted_license":"1","publication":"Neural Computing and Applications","ddc":["004"]},{"ddc":["000"],"publication":"7th Annual Learning for Dynamics & Control Conference","publisher":"ML Research Press","has_accepted_license":"1","OA_place":"publisher","department":[{"_id":"ToHe"},{"_id":"ChLa"}],"corr_author":"1","file":[{"success":1,"file_id":"20283","file_size":489639,"content_type":"application/pdf","relation":"main_file","date_created":"2025-09-03T10:32:12Z","date_updated":"2025-09-03T10:32:12Z","file_name":"2025_L4DC_HenzingerT.pdf","access_level":"open_access","checksum":"d5236e561560635f5ae1d17de4903033","creator":"dernst"}],"alternative_title":["PMLR"],"publication_status":"published","type":"conference","quality_controlled":"1","acknowledgement":"This work was supported in part by the ERC project ERC-2020-AdG 101020093.\r\n","day":"01","citation":{"chicago":"Henzinger, Thomas A, Fabian Kresse, Kaushik Mallik, Emily Yu, and Dorde Zikelic. “Predictive Monitoring of Black-Box Dynamical Systems.” In <i>7th Annual Learning for Dynamics &#38; Control Conference</i>, 283:804–16. ML Research Press, 2025.","ista":"Henzinger TA, Kresse F, Mallik K, Yu E, Zikelic D. 2025. Predictive monitoring of black-box dynamical systems. 7th Annual Learning for Dynamics &#38; Control Conference. L4DC: Learning for Dynamics &#38; Control, PMLR, vol. 283, 804–816.","apa":"Henzinger, T. A., Kresse, F., Mallik, K., Yu, E., &#38; Zikelic, D. (2025). Predictive monitoring of black-box dynamical systems. In <i>7th Annual Learning for Dynamics &#38; Control Conference</i> (Vol. 283, pp. 804–816). Ann Arbor, MI, United States: ML Research Press.","ama":"Henzinger TA, Kresse F, Mallik K, Yu E, Zikelic D. Predictive monitoring of black-box dynamical systems. In: <i>7th Annual Learning for Dynamics &#38; Control Conference</i>. Vol 283. ML Research Press; 2025:804-816.","mla":"Henzinger, Thomas A., et al. “Predictive Monitoring of Black-Box Dynamical Systems.” <i>7th Annual Learning for Dynamics &#38; Control Conference</i>, vol. 283, ML Research Press, 2025, pp. 804–16.","short":"T.A. Henzinger, F. Kresse, K. Mallik, E. Yu, D. Zikelic, in:, 7th Annual Learning for Dynamics &#38; Control Conference, ML Research Press, 2025, pp. 804–816.","ieee":"T. A. Henzinger, F. Kresse, K. Mallik, E. Yu, and D. Zikelic, “Predictive monitoring of black-box dynamical systems,” in <i>7th Annual Learning for Dynamics &#38; Control Conference</i>, Ann Arbor, MI, United States, 2025, vol. 283, pp. 804–816."},"file_date_updated":"2025-09-03T10:32:12Z","page":"804-816","conference":{"start_date":"2025-06-04","location":"Ann Arbor, MI, United States","name":"L4DC: Learning for Dynamics & Control","end_date":"2025-06-06"},"abstract":[{"text":"We study the problem of predictive runtime monitoring of black-box dynamical systems with quantitative safety properties. The black-box setting stipulates that the exact semantics of the dynamical system and the controller are unknown, and that we are only able to observe the state of the controlled (aka, closed-loop) system at finitely many time points. We present a novel framework for predicting future states of the system based on the states observed in the past. The numbers of past states and of predicted future states are parameters provided by the user. Our method is based on a combination of Taylor’s expansion and the backward difference operator for numerical differentiation. We also derive an upper bound on the prediction error under the assumption that the system dynamics and the controller are smooth. The predicted states are then used to predict safety violations ahead in time. Our experiments demonstrate practical applicability of our method for complex black-box systems, showing that it is computationally lightweight and yet significantly more accurate than the state-of-the-art predictive safety monitoring techniques.","lang":"eng"}],"arxiv":1,"oa_version":"Published Version","external_id":{"arxiv":["2412.16564"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","volume":283,"oa":1,"OA_type":"gold","publication_identifier":{"eissn":["2640-3498"]},"date_updated":"2025-09-03T10:37:59Z","project":[{"name":"Vigilant Algorithmic Monitoring of Software","grant_number":"101020093","_id":"62781420-2b32-11ec-9570-8d9b63373d4d","call_identifier":"H2020"}],"date_created":"2025-08-31T22:01:32Z","author":[{"orcid":"0000-0002-2985-7724","full_name":"Henzinger, Thomas A","first_name":"Thomas A","last_name":"Henzinger","id":"40876CD8-F248-11E8-B48F-1D18A9856A87"},{"id":"faff3c84-23f6-11ef-9085-e5187b51c604","first_name":"Fabian","last_name":"Kresse","full_name":"Kresse, Fabian"},{"full_name":"Mallik, Kaushik","orcid":"0000-0001-9864-7475","last_name":"Mallik","first_name":"Kaushik","id":"0834ff3c-6d72-11ec-94e0-b5b0a4fb8598"},{"full_name":"Yu, Zhengqi","last_name":"Yu","first_name":"Zhengqi","id":"20aa2ae8-f2f1-11ed-bbfa-8205053f1342"},{"orcid":"0000-0002-4681-1699","full_name":"Zikelic, Dorde","first_name":"Dorde","last_name":"Zikelic","id":"294AA7A6-F248-11E8-B48F-1D18A9856A87"}],"date_published":"2025-06-01T00:00:00Z","scopus_import":"1","month":"06","title":"Predictive monitoring of black-box dynamical systems","intvolume":"       283","_id":"20256","year":"2025","language":[{"iso":"eng"}],"article_processing_charge":"No","status":"public","ec_funded":1},{"ec_funded":1,"status":"public","language":[{"iso":"eng"}],"article_processing_charge":"No","intvolume":"       288","year":"2025","_id":"20296","date_published":"2025-06-01T00:00:00Z","title":"Logic gate neural networks are good for verification","scopus_import":"1","month":"06","project":[{"call_identifier":"H2020","_id":"62781420-2b32-11ec-9570-8d9b63373d4d","grant_number":"101020093","name":"Vigilant Algorithmic Monitoring of Software"}],"author":[{"full_name":"Kresse, Fabian","id":"faff3c84-23f6-11ef-9085-e5187b51c604","first_name":"Fabian","last_name":"Kresse"},{"last_name":"Yu","first_name":"Zhengqi","id":"20aa2ae8-f2f1-11ed-bbfa-8205053f1342","full_name":"Yu, Zhengqi"},{"full_name":"Lampert, Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","first_name":"Christoph"},{"id":"40876CD8-F248-11E8-B48F-1D18A9856A87","last_name":"Henzinger","first_name":"Thomas A","full_name":"Henzinger, Thomas A","orcid":"0000-0002-2985-7724"}],"date_created":"2025-09-07T22:01:34Z","publication_identifier":{"eissn":["2640-3498"]},"date_updated":"2025-09-09T08:12:44Z","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","external_id":{"arxiv":["2505.19932"]},"OA_type":"diamond","oa":1,"volume":288,"abstract":[{"lang":"eng","text":"Learning-based systems are increasingly deployed across various domains, yet the complexity of traditional neural networks poses significant challenges for formal verification. Unlike conventional neural networks, learned Logic Gate Networks (LGNs) replace multiplications with Boolean logic gates, yielding a sparse, netlist-like architecture that is inherently more amenable to symbolic verification, while still delivering promising performance. In this paper, we introduce a SAT encoding for verifying global robustness and fairness in LGNs. We evaluate our method on five benchmark datasets, including a newly constructed 5-class variant, and find that LGNs are both verification-friendly and maintain strong predictive performance."}],"arxiv":1,"oa_version":"Published Version","conference":{"start_date":"2025-05-28","end_date":"2025-05-30","name":"NeuS: International Conferenceon Neuro-Symbolic Systems","location":"Philadephia, PA, United States"},"file_date_updated":"2025-09-09T08:10:13Z","citation":{"mla":"Kresse, Fabian, et al. “Logic Gate Neural Networks Are Good for Verification.” <i>2nd International Conferenceon Neuro-Symbolic Systems</i>, vol. 288, 26, ML Research Press, 2025.","ama":"Kresse F, Yu E, Lampert C, Henzinger TA. Logic gate neural networks are good for verification. In: <i>2nd International Conferenceon Neuro-Symbolic Systems</i>. Vol 288. ML Research Press; 2025.","apa":"Kresse, F., Yu, E., Lampert, C., &#38; Henzinger, T. A. (2025). Logic gate neural networks are good for verification. In <i>2nd International Conferenceon Neuro-Symbolic Systems</i> (Vol. 288). Philadephia, PA, United States: ML Research Press.","chicago":"Kresse, Fabian, Emily Yu, Christoph Lampert, and Thomas A Henzinger. “Logic Gate Neural Networks Are Good for Verification.” In <i>2nd International Conferenceon Neuro-Symbolic Systems</i>, Vol. 288. ML Research Press, 2025.","ista":"Kresse F, Yu E, Lampert C, Henzinger TA. 2025. Logic gate neural networks are good for verification. 2nd International Conferenceon Neuro-Symbolic Systems. NeuS: International Conferenceon Neuro-Symbolic Systems, PMLR, vol. 288, 26.","ieee":"F. Kresse, E. Yu, C. Lampert, and T. A. Henzinger, “Logic gate neural networks are good for verification,” in <i>2nd International Conferenceon Neuro-Symbolic Systems</i>, Philadephia, PA, United States, 2025, vol. 288.","short":"F. Kresse, E. Yu, C. Lampert, T.A. Henzinger, in:, 2nd International Conferenceon Neuro-Symbolic Systems, ML Research Press, 2025."},"day":"01","article_number":"26","acknowledgement":"This work is supported in part by the ERC grant under Grant No. ERC-2020-AdG 101020093 and\r\nthe Austrian Science Fund (FWF) [10.55776/COE12]. This research was supported by the Scientific\r\nService Units (SSU) of ISTA through resources provided by Scientific Computing (SciComp).","acknowledged_ssus":[{"_id":"ScienComp"}],"type":"conference","publication_status":"published","quality_controlled":"1","department":[{"_id":"ChLa"},{"_id":"ToHe"}],"alternative_title":["PMLR"],"corr_author":"1","file":[{"success":1,"file_id":"20314","file_size":295466,"content_type":"application/pdf","date_created":"2025-09-09T08:10:13Z","relation":"main_file","creator":"dernst","access_level":"open_access","checksum":"90a32defed34787e771a5c1623b6b0d2","date_updated":"2025-09-09T08:10:13Z","file_name":"2025_NeuS_Kresse.pdf"}],"publisher":"ML Research Press","OA_place":"publisher","has_accepted_license":"1","publication":"2nd International Conferenceon Neuro-Symbolic Systems","ddc":["000"]},{"page":"118-126","conference":{"location":"Mai Khao, Thailand","name":"AISTATS: Conference on Artificial Intelligence and Statistics","end_date":"2025-05-05","start_date":"2025-05-03"},"file_date_updated":"2025-09-09T08:26:44Z","day":"01","citation":{"ieee":"N. Kalinin and L. Steinberger, “Efficient estimation of a Gaussian mean with local differential privacy,” in <i>Proceedings of the 28th International Conference on Artificial Intelligence and Statistics</i>, Mai Khao, Thailand, 2025, vol. 258, pp. 118–126.","short":"N. Kalinin, L. Steinberger, in:, Proceedings of the 28th International Conference on Artificial Intelligence and Statistics, ML Research Press, 2025, pp. 118–126.","mla":"Kalinin, Nikita, and Lukas Steinberger. “Efficient Estimation of a Gaussian Mean with Local Differential Privacy.” <i>Proceedings of the 28th International Conference on Artificial Intelligence and Statistics</i>, vol. 258, ML Research Press, 2025, pp. 118–26.","ama":"Kalinin N, Steinberger L. Efficient estimation of a Gaussian mean with local differential privacy. In: <i>Proceedings of the 28th International Conference on Artificial Intelligence and Statistics</i>. Vol 258. ML Research Press; 2025:118-126.","apa":"Kalinin, N., &#38; Steinberger, L. (2025). Efficient estimation of a Gaussian mean with local differential privacy. In <i>Proceedings of the 28th International Conference on Artificial Intelligence and Statistics</i> (Vol. 258, pp. 118–126). Mai Khao, Thailand: ML Research Press.","chicago":"Kalinin, Nikita, and Lukas Steinberger. “Efficient Estimation of a Gaussian Mean with Local Differential Privacy.” In <i>Proceedings of the 28th International Conference on Artificial Intelligence and Statistics</i>, 258:118–26. ML Research Press, 2025.","ista":"Kalinin N, Steinberger L. 2025. Efficient estimation of a Gaussian mean with local differential privacy. Proceedings of the 28th International Conference on Artificial Intelligence and Statistics. AISTATS: Conference on Artificial Intelligence and Statistics, PMLR, vol. 258, 118–126."},"acknowledgement":"We would like to express our gratitude to Christoph Lampert for his valuable insights and fruitful discussions that significantly contributed to the development of this paper.\r\nWe also thank Salil Vadhan for his constructive feedback on an earlier version of this draft.\r\nThe second author gratefully acknowledges support by the Austrian Science Fund (FWF): I 5484-N, as part of the Research Unit 5381 of the German Research Foundation.","tmp":{"image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"quality_controlled":"1","publication_status":"published","type":"conference","corr_author":"1","file":[{"content_type":"application/pdf","file_size":395864,"success":1,"file_id":"20316","checksum":"3dcd59988ca974b98662ba09a516e616","creator":"dernst","access_level":"open_access","file_name":"2025_AISTATS_Kalinin.pdf","date_updated":"2025-09-09T08:26:44Z","date_created":"2025-09-09T08:26:44Z","relation":"main_file"}],"alternative_title":["PMLR"],"department":[{"_id":"ChLa"}],"OA_place":"publisher","has_accepted_license":"1","publisher":"ML Research Press","publication":"Proceedings of the 28th International Conference on Artificial Intelligence and Statistics","ddc":["000"],"status":"public","article_processing_charge":"No","language":[{"iso":"eng"}],"_id":"20298","year":"2025","intvolume":"       258","scopus_import":"1","month":"05","title":"Efficient estimation of a Gaussian mean with local differential privacy","date_published":"2025-05-01T00:00:00Z","date_created":"2025-09-07T22:01:34Z","author":[{"full_name":"Kalinin, Nikita","first_name":"Nikita","last_name":"Kalinin","id":"4b14526e-14d2-11ed-ba64-c14c9553d137"},{"full_name":"Steinberger, Lukas","last_name":"Steinberger","first_name":"Lukas"}],"date_updated":"2025-09-09T08:28:41Z","publication_identifier":{"eissn":["2640-3498"]},"OA_type":"diamond","volume":258,"oa":1,"external_id":{"arxiv":["2402.04840"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Published Version","arxiv":1,"abstract":[{"text":"In this paper, we study the problem of estimating the unknown mean θ of a unit variance Gaussian distribution in a locally differentially private (LDP) way. In the high-privacy regime (ϵ≤1\r\n), we identify an optimal privacy mechanism that minimizes the variance of the estimator asymptotically. Our main technical contribution is the maximization of the Fisher-Information of the sanitized data with respect to the local privacy mechanism Q. We find that the exact solution Qθ,ϵ of this maximization is the sign mechanism that applies randomized response to the sign of Xi−θ, where X1,…,Xn are the confidential iid original samples. However, since this optimal local mechanism depends on the unknown mean θ, we employ a two-stage LDP parameter estimation procedure which requires splitting agents into two groups. The first n1 observations are used to consistently but not necessarily efficiently estimate the parameter θ by θn1~\r\n. Then this estimate is updated by applying the sign mechanism with θ~n1 instead of θ\r\n to the remaining n−n1 observations, to obtain an LDP and efficient estimator of the unknown mean.","lang":"eng"}]},{"status":"public","language":[{"iso":"eng"}],"article_processing_charge":"No","year":"2025","_id":"20455","date_published":"2025-06-15T00:00:00Z","title":"Intriguing properties of robust classification","scopus_import":"1","month":"06","author":[{"full_name":"Prach, Bernd","id":"2D561D42-C427-11E9-89B4-9C1AE6697425","first_name":"Bernd","last_name":"Prach"},{"last_name":"Lampert","first_name":"Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","full_name":"Lampert, Christoph","orcid":"0000-0001-8622-7887"}],"date_created":"2025-10-12T22:01:26Z","publication_identifier":{"eissn":["2160-7516"],"issn":["2160-7508"],"isbn":["9798331599942"]},"date_updated":"2025-10-13T07:18:26Z","external_id":{"arxiv":["2412.04245"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa":1,"OA_type":"green","abstract":[{"lang":"eng","text":"Despite extensive research since the community learned about adversarial examples 10 years ago, we still do not know how to train high-accuracy classifiers that are guaranteed to be robust to small perturbations of their inputs. Previous works often argued that this might be because no classifier exists that is robust and accurate at the same time. However, in computer vision this assumption does not match reality where humans are usually accurate and robust on most tasks of interest. We offer an alternative explanation and show that in certain settings robust generalization is only possible with unrealistically large amounts of data. Specifically, we find a setting where a robust classifier exists, it is easy to learn an accurate classifier, yet it requires an exponential amount of data to learn a robust classifier. Based on this theoretical result, we evaluate the influence of the amount of training data on datasets such as CIFAR10. Our findings indicate that the the amount of training data is the main factor determining the robust performance. Furthermore we show that that there are low magnitude directions in the data which are useful for non-robust generalization but are not available for robust classifiers. This implies that robust classification is a strictly harder tasks than normal classification, thereby providing an explanation why robust classification requires more data."}],"oa_version":"Preprint","arxiv":1,"conference":{"start_date":"2025-06-11","name":"CVPR: Conference on Computer Vision and Pattern Recognition","end_date":"2025-06-12","location":"Nashville, TN, United States"},"page":"660-669","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2412.04245","open_access":"1"}],"citation":{"mla":"Prach, Bernd, and Christoph Lampert. “Intriguing Properties of Robust Classification.” <i>2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops</i>, IEEE, 2025, pp. 660–69, doi:<a href=\"https://doi.org/10.1109/CVPRW67362.2025.00071\">10.1109/CVPRW67362.2025.00071</a>.","apa":"Prach, B., &#38; Lampert, C. (2025). Intriguing properties of robust classification. In <i>2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops</i> (pp. 660–669). Nashville, TN, United States: IEEE. <a href=\"https://doi.org/10.1109/CVPRW67362.2025.00071\">https://doi.org/10.1109/CVPRW67362.2025.00071</a>","ama":"Prach B, Lampert C. Intriguing properties of robust classification. In: <i>2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops</i>. IEEE; 2025:660-669. doi:<a href=\"https://doi.org/10.1109/CVPRW67362.2025.00071\">10.1109/CVPRW67362.2025.00071</a>","chicago":"Prach, Bernd, and Christoph Lampert. “Intriguing Properties of Robust Classification.” In <i>2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops</i>, 660–69. IEEE, 2025. <a href=\"https://doi.org/10.1109/CVPRW67362.2025.00071\">https://doi.org/10.1109/CVPRW67362.2025.00071</a>.","ista":"Prach B, Lampert C. 2025. Intriguing properties of robust classification. 2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. CVPR: Conference on Computer Vision and Pattern Recognition, 660–669.","ieee":"B. Prach and C. Lampert, “Intriguing properties of robust classification,” in <i>2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops</i>, Nashville, TN, United States, 2025, pp. 660–669.","short":"B. Prach, C. Lampert, in:, 2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops, IEEE, 2025, pp. 660–669."},"day":"15","doi":"10.1109/CVPRW67362.2025.00071","type":"conference","publication_status":"published","quality_controlled":"1","department":[{"_id":"ChLa"}],"corr_author":"1","publisher":"IEEE","related_material":{"record":[{"id":"18874","status":"public","relation":"earlier_version"}]},"OA_place":"repository","publication":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops"},{"has_accepted_license":"1","OA_place":"publisher","related_material":{"record":[{"relation":"dissertation_contains","id":"21198","status":"public"}]},"publisher":"ML Research Press","ddc":["000"],"publication":"42nd International Conference on Machine Learning","quality_controlled":"1","type":"conference","publication_status":"published","alternative_title":["PMLR"],"file":[{"checksum":"815b32b463023ca21e569c2158745c15","access_level":"open_access","creator":"dernst","file_name":"2025_ICML_Scott.pdf","date_updated":"2025-12-16T12:38:29Z","date_created":"2025-12-16T12:38:29Z","relation":"main_file","file_size":746612,"content_type":"application/pdf","success":1,"file_id":"20829"}],"corr_author":"1","department":[{"_id":"ChLa"},{"_id":"MoHe"}],"citation":{"mla":"Scott, Jonathan A., et al. “Differentially Private Federated K-Means Clustering with Server-Side Data.” <i>42nd International Conference on Machine Learning</i>, vol. 267, ML Research Press, 2025, pp. 53757–90.","ama":"Scott JA, Lampert C, Saulpic D. Differentially private federated k-means clustering with server-side data. In: <i>42nd International Conference on Machine Learning</i>. Vol 267. ML Research Press; 2025:53757-53790.","apa":"Scott, J. A., Lampert, C., &#38; Saulpic, D. (2025). Differentially private federated k-means clustering with server-side data. In <i>42nd International Conference on Machine Learning</i> (Vol. 267, pp. 53757–53790). Vancouver, Canada: ML Research Press.","chicago":"Scott, Jonathan A, Christoph Lampert, and David Saulpic. “Differentially Private Federated K-Means Clustering with Server-Side Data.” In <i>42nd International Conference on Machine Learning</i>, 267:53757–90. ML Research Press, 2025.","ista":"Scott JA, Lampert C, Saulpic D. 2025. Differentially private federated k-means clustering with server-side data. 42nd International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 267, 53757–53790.","ieee":"J. A. Scott, C. Lampert, and D. Saulpic, “Differentially private federated k-means clustering with server-side data,” in <i>42nd International Conference on Machine Learning</i>, Vancouver, Canada, 2025, vol. 267, pp. 53757–53790.","short":"J.A. Scott, C. Lampert, D. Saulpic, in:, 42nd International Conference on Machine Learning, ML Research Press, 2025, pp. 53757–53790."},"day":"01","tmp":{"image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"acknowledgement":"This research was funded in part by the Austrian Science Fund (FWF) [10.55776/COE12] and supported by the Scientific Service Units (SSU) of ISTA through resources provided by Scientific Computing (SciComp).\r\n","acknowledged_ssus":[{"_id":"ScienComp"}],"page":"53757-53790","conference":{"start_date":"2025-07-13","location":"Vancouver, Canada","end_date":"2025-07-19","name":"ICML: International Conference on Machine Learning"},"file_date_updated":"2025-12-16T12:38:29Z","oa":1,"volume":267,"OA_type":"gold","external_id":{"arxiv":["2506.05408"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","arxiv":1,"oa_version":"Published Version","abstract":[{"lang":"eng","text":"Clustering is a cornerstone of data analysis that is particularly suited to identifying coherent subgroups or substructures in unlabeled data, as are generated continuously in large amounts these days. However, in many cases traditional clustering methods are not applicable, because data are increasingly being produced and stored in a distributed way, e.g. on edge devices, and privacy concerns prevent it from being transferred to a central server. To address this challenge, we present FedDP-KMeans, a new algorithm for \r\n-means clustering that is fully-federated as well as differentially private. Our approach leverages (potentially small and out-of-distribution) server-side data to overcome the primary challenge of differentially private clustering methods: the need for a good initialization. Combining our initialization with a simple federated DP-Lloyds algorithm we obtain an algorithm that achieves excellent results on synthetic and real-world benchmark tasks. We also provide a theoretical analysis of our method that provides bounds on the convergence speed and cluster identification success."}],"author":[{"first_name":"Jonathan A","last_name":"Scott","id":"e499926b-f6e0-11ea-865d-9c63db0031e8","full_name":"Scott, Jonathan A"},{"id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","first_name":"Christoph","full_name":"Lampert, Christoph","orcid":"0000-0001-8622-7887"},{"id":"f8e48cf0-b0ff-11ed-b0e9-b4c35598f964","first_name":"David","last_name":"Saulpic","full_name":"Saulpic, David"}],"date_created":"2025-12-14T23:02:05Z","date_updated":"2026-04-07T11:46:11Z","publication_identifier":{"eissn":["2640-3498"]},"year":"2025","_id":"20819","intvolume":"       267","title":"Differentially private federated k-means clustering with server-side data","month":"05","scopus_import":"1","date_published":"2025-05-01T00:00:00Z","status":"public","article_processing_charge":"No","language":[{"iso":"eng"}]},{"abstract":[{"lang":"eng","text":"Personalized federated learning has emerged as a popular approach to training on devices holding statistically heterogeneous data, known as clients. However, most existing approaches require a client to have labeled data for training or finetuning in order to obtain their own personalized model. In this paper we address this by proposing FLowDUP, a novel method that is able to generate a personalized model using only a forward pass with unlabeled data. The generated model parameters reside in a low-dimensional subspace, enabling efficient communication and computation. FLowDUP's learning objective is theoretically motivated by our new transductive multi-task PAC-Bayesian generalization bound, that provides performance guarantees for unlabeled clients. The objective is structured in such a way that it allows both clients with labeled data and clients with only unlabeled data to contribute to the training process. To supplement our theoretical results we carry out a thorough experimental evaluation of FLowDUP, demonstrating strong empirical performance on a range of datasets with differing sorts of statistically heterogeneous clients. Through numerous ablation studies, we test the efficacy of the individual components of the method."}],"publication":"arXiv","oa_version":"Preprint","user_id":"8b945eb4-e2f2-11eb-945a-df72226e66a9","related_material":{"record":[{"relation":"dissertation_contains","status":"public","id":"21198"}]},"oa":1,"OA_place":"repository","department":[{"_id":"ChLa"}],"date_updated":"2026-04-07T11:46:11Z","corr_author":"1","type":"preprint","publication_status":"draft","author":[{"orcid":"0009-0007-3977-6462","full_name":"Zakerinia, Hossein","id":"653bd8b6-f394-11eb-9cf6-c0bbf6cd78d4","first_name":"Hossein","last_name":"Zakerinia"},{"id":"e499926b-f6e0-11ea-865d-9c63db0031e8","last_name":"Scott","first_name":"Jonathan A","full_name":"Scott, Jonathan A"},{"orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph","first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87"}],"date_created":"2026-02-10T08:20:59Z","date_published":"2025-05-21T00:00:00Z","title":"Federated learning with unlabeled clients: Personalization can happen in low dimensions","tmp":{"image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"doi":"10.48550/ARXIV.2505.15579","month":"05","year":"2025","_id":"21207","citation":{"chicago":"Zakerinia, Hossein, Jonathan A Scott, and Christoph Lampert. “Federated Learning with Unlabeled Clients: Personalization Can Happen in Low Dimensions.” <i>ArXiv</i>, n.d. <a href=\"https://doi.org/10.48550/ARXIV.2505.15579\">https://doi.org/10.48550/ARXIV.2505.15579</a>.","ista":"Zakerinia H, Scott JA, Lampert C. Federated learning with unlabeled clients: Personalization can happen in low dimensions. arXiv, <a href=\"https://doi.org/10.48550/ARXIV.2505.15579\">10.48550/ARXIV.2505.15579</a>.","apa":"Zakerinia, H., Scott, J. A., &#38; Lampert, C. (n.d.). Federated learning with unlabeled clients: Personalization can happen in low dimensions. <i>arXiv</i>. <a href=\"https://doi.org/10.48550/ARXIV.2505.15579\">https://doi.org/10.48550/ARXIV.2505.15579</a>","ama":"Zakerinia H, Scott JA, Lampert C. Federated learning with unlabeled clients: Personalization can happen in low dimensions. <i>arXiv</i>. doi:<a href=\"https://doi.org/10.48550/ARXIV.2505.15579\">10.48550/ARXIV.2505.15579</a>","mla":"Zakerinia, Hossein, et al. “Federated Learning with Unlabeled Clients: Personalization Can Happen in Low Dimensions.” <i>ArXiv</i>, doi:<a href=\"https://doi.org/10.48550/ARXIV.2505.15579\">10.48550/ARXIV.2505.15579</a>.","short":"H. Zakerinia, J.A. Scott, C. Lampert, ArXiv (n.d.).","ieee":"H. Zakerinia, J. A. Scott, and C. Lampert, “Federated learning with unlabeled clients: Personalization can happen in low dimensions,” <i>arXiv</i>. ."},"main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2505.15579","open_access":"1"}],"day":"21","language":[{"iso":"eng"}],"article_processing_charge":"No","status":"public"},{"ddc":["000"],"publisher":"Institute of Science and Technology Austria","OA_place":"publisher","has_accepted_license":"1","related_material":{"record":[{"status":"public","id":"15039","relation":"part_of_dissertation"},{"status":"public","id":"18874","relation":"part_of_dissertation"},{"status":"public","id":"17426","relation":"part_of_dissertation"},{"relation":"part_of_dissertation","id":"11839","status":"public"}]},"department":[{"_id":"GradSch"},{"_id":"ChLa"}],"alternative_title":["ISTA Thesis"],"file":[{"file_size":3578077,"content_type":"application/pdf","file_id":"19829","checksum":"e5108e759014e2a9020c973c778fafc9","access_level":"open_access","creator":"bprach","file_name":"ThesisFinal.pdf","date_updated":"2025-06-10T18:11:05Z","date_created":"2025-06-10T18:11:05Z","relation":"main_file"},{"access_level":"closed","creator":"bprach","checksum":"51bf6c11fb6d8a9f8010b458c600a83f","file_name":"ThesisFinal.zip","date_updated":"2025-06-10T18:14:03Z","date_created":"2025-06-10T18:14:03Z","relation":"source_file","content_type":"application/x-zip-compressed","file_size":74894357,"file_id":"19830"}],"corr_author":"1","publication_status":"published","type":"dissertation","supervisor":[{"orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph","first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87"}],"doi":"10.15479/10.15479/at-ista-19759","day":"30","citation":{"ieee":"B. Prach, “Robust image classification with 1-Lipschitz networks,” Institute of Science and Technology Austria, 2025.","short":"B. Prach, Robust Image Classification with 1-Lipschitz Networks, Institute of Science and Technology Austria, 2025.","ama":"Prach B. Robust image classification with 1-Lipschitz networks. 2025. doi:<a href=\"https://doi.org/10.15479/10.15479/at-ista-19759\">10.15479/10.15479/at-ista-19759</a>","apa":"Prach, B. (2025). <i>Robust image classification with 1-Lipschitz networks</i>. Institute of Science and Technology Austria. <a href=\"https://doi.org/10.15479/10.15479/at-ista-19759\">https://doi.org/10.15479/10.15479/at-ista-19759</a>","chicago":"Prach, Bernd. “Robust Image Classification with 1-Lipschitz Networks.” Institute of Science and Technology Austria, 2025. <a href=\"https://doi.org/10.15479/10.15479/at-ista-19759\">https://doi.org/10.15479/10.15479/at-ista-19759</a>.","ista":"Prach B. 2025. Robust image classification with 1-Lipschitz networks. Institute of Science and Technology Austria.","mla":"Prach, Bernd. <i>Robust Image Classification with 1-Lipschitz Networks</i>. Institute of Science and Technology Austria, 2025, doi:<a href=\"https://doi.org/10.15479/10.15479/at-ista-19759\">10.15479/10.15479/at-ista-19759</a>."},"file_date_updated":"2025-06-10T18:14:03Z","page":"84","abstract":[{"text":"Despite generating remarkable results in various computer vision tasks, deep learning comes\r\nwith some surprising shortcomings. For example, tiny perturbations, often imperceptible to\r\nthe human eye, can completely change the predictions of image classifiers. Despite a decade\r\nof research, the field has made limited progress in developing image classifiers that are both\r\naccurate and robust. This thesis aims to address this gap.\r\nAs our first contribution, we aim to simplify the process of training certifiably robust image\r\nclassifiers. We do this by designing a convolutional layer that does not require executing an\r\niterative procedure in every forward pass, but relies on an explicit bound instead. We also\r\npropose a loss function that allows optimizing for a particular margin more precisely.\r\nNext, we provide an overview and comparison of various methods that create robust image\r\nclassifiers by constraining the Lipschitz constant. This is important since generally longer\r\ntraining times and more parameters improve the performance of robust classifiers, making it\r\nchallenging to determine the most practical and effective methods from existing literature.\r\nIn 1-Lipschitz classification, the performance of current methods is still much worse than what\r\nwe expect on the simple tasks we consider. Therefore, we next investigate potential causes of\r\nthis shortcoming. We first consider the role of the activation function. We prove a theoretical\r\nshortcoming of the commonly used activation function, and provide an alternative without it.\r\nHowever this theoretical improvement does barely translate to the empirical performance of\r\nrobust classifiers, suggesting a different bottleneck.\r\nTherefore, in the final chapter, we study how the performance depends on the amount of\r\ntraining data. We prove that in the worst case, we might require far more data to train a\r\nrobust classifier compared to a normal one. We furthermore find that the amount of training\r\ndata is a key determinant of the performance current methods achieve on popular datasets.\r\nAdditionally, we show that linear subspaces exist with tiny data variance, and yet we can\r\nstill train very accurate classifiers after projecting into those subspaces. This shows that on\r\nthe datasets considered, enforcing robustness in classification makes the task strictly more\r\nchallenging.\r\n\r\n-----------------“In reference to IEEE copyrighted material which is used with permission in this thesis, the IEEE does not endorse any of [name of university or educational entity]’s products or services. Internal or personal use of this material is permitted. If interested in reprinting/republishing IEEE copyrighted material for advertising or promotional purposes or for creating new collective works for resale or redistribution, please go to http://www.ieee.org/publications_standards/publications/rights/rights_link.html to learn how to obtain a License from RightsLink. If applicable, University Microfilms and/or ProQuest Library, or the Archives of Canada may supply single copies of the dissertation.”\r\n","lang":"eng"}],"oa_version":"Published Version","user_id":"ba8df636-2132-11f1-aed0-ed93e2281fdd","oa":1,"publication_identifier":{"issn":["2663-337X"]},"date_updated":"2026-04-07T11:49:52Z","date_created":"2025-05-28T16:20:48Z","author":[{"full_name":"Prach, Bernd","last_name":"Prach","first_name":"Bernd","id":"2D561D42-C427-11E9-89B4-9C1AE6697425"}],"date_published":"2025-05-30T00:00:00Z","month":"05","title":"Robust image classification with 1-Lipschitz networks","degree_awarded":"PhD","year":"2025","_id":"19759","language":[{"iso":"eng"}],"article_processing_charge":"No","status":"public"},{"publisher":"ML Research Press","publication":"Proceedings of the 41st International Conference on Machine Learning","quality_controlled":"1","type":"conference","publication_status":"published","corr_author":"1","alternative_title":["PMLR"],"department":[{"_id":"ChLa"}],"citation":{"mla":"Zakerinia, Hossein, et al. “More Flexible PAC-Bayesian Meta-Learning by Learning Learning Algorithms.” <i>Proceedings of the 41st International Conference on Machine Learning</i>, vol. 235, ML Research Press, 2024, pp. 58122–39.","ama":"Zakerinia H, Behjati A, Lampert C. More flexible PAC-Bayesian meta-learning by learning learning algorithms. In: <i>Proceedings of the 41st International Conference on Machine Learning</i>. Vol 235. ML Research Press; 2024:58122-58139.","apa":"Zakerinia, H., Behjati, A., &#38; Lampert, C. (2024). More flexible PAC-Bayesian meta-learning by learning learning algorithms. In <i>Proceedings of the 41st International Conference on Machine Learning</i> (Vol. 235, pp. 58122–58139). Vienna, Austria: ML Research Press.","ista":"Zakerinia H, Behjati A, Lampert C. 2024. More flexible PAC-Bayesian meta-learning by learning learning algorithms. Proceedings of the 41st International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 235, 58122–58139.","chicago":"Zakerinia, Hossein, Amin Behjati, and Christoph Lampert. “More Flexible PAC-Bayesian Meta-Learning by Learning Learning Algorithms.” In <i>Proceedings of the 41st International Conference on Machine Learning</i>, 235:58122–39. ML Research Press, 2024.","ieee":"H. Zakerinia, A. Behjati, and C. Lampert, “More flexible PAC-Bayesian meta-learning by learning learning algorithms,” in <i>Proceedings of the 41st International Conference on Machine Learning</i>, Vienna, Austria, 2024, vol. 235, pp. 58122–58139.","short":"H. Zakerinia, A. Behjati, C. Lampert, in:, Proceedings of the 41st International Conference on Machine Learning, ML Research Press, 2024, pp. 58122–58139."},"main_file_link":[{"open_access":"1","url":" https://doi.org/10.48550/arXiv.2402.04054"}],"day":"01","page":"58122-58139","conference":{"start_date":"2024-07-21","name":"ICML: International Conference on Machine Learning","end_date":"2024-07-27","location":"Vienna, Austria"},"oa":1,"volume":235,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","external_id":{"arxiv":["2402.04054"]},"arxiv":1,"oa_version":"Published Version","abstract":[{"lang":"eng","text":"We introduce a new framework for studying meta-learning methods using PAC-Bayesian theory. Its main advantage over previous work is that it allows for more flexibility in how the transfer of knowledge between tasks is realized. For previous approaches, this could only happen indirectly, by means of learning prior distributions over models. In contrast, the new generalization bounds that we prove express the process of meta-learning much more directly as learning the learning algorithm that should be used for future tasks. The flexibility of our framework makes it suitable to analyze a wide range of meta-learning mechanisms and even design new mechanisms. Other than our theoretical contributions we also show empirically that our framework improves the prediction quality in practical meta-learning mechanisms."}],"author":[{"first_name":"Hossein","last_name":"Zakerinia","id":"653bd8b6-f394-11eb-9cf6-c0bbf6cd78d4","full_name":"Zakerinia, Hossein"},{"last_name":"Behjati","first_name":"Amin","full_name":"Behjati, Amin"},{"full_name":"Lampert, Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","first_name":"Christoph"}],"date_created":"2024-09-22T22:01:45Z","date_updated":"2024-10-01T09:30:03Z","publication_identifier":{"eissn":["2640-3498"]},"_id":"18118","year":"2024","intvolume":"       235","title":"More flexible PAC-Bayesian meta-learning by learning learning algorithms","month":"09","scopus_import":"1","date_published":"2024-09-01T00:00:00Z","status":"public","article_processing_charge":"No","language":[{"iso":"eng"}]},{"page":"69-99","file_date_updated":"2025-01-20T08:41:10Z","day":"26","citation":{"mla":"Lutsai, Kateryna, and Christoph Lampert. “Predicting the Geolocation of Tweets Using Transformer Models on Customized Data.” <i>Journal of Spatial Information Science</i>, no. 29, University of Maine, 2024, pp. 69–99, doi:<a href=\"https://doi.org/10.5311/JOSIS.2024.29.295\">10.5311/JOSIS.2024.29.295</a>.","ista":"Lutsai K, Lampert C. 2024. Predicting the geolocation of tweets using transformer models on customized data. Journal of Spatial Information Science. (29), 69–99.","chicago":"Lutsai, Kateryna, and Christoph Lampert. “Predicting the Geolocation of Tweets Using Transformer Models on Customized Data.” <i>Journal of Spatial Information Science</i>. University of Maine, 2024. <a href=\"https://doi.org/10.5311/JOSIS.2024.29.295\">https://doi.org/10.5311/JOSIS.2024.29.295</a>.","apa":"Lutsai, K., &#38; Lampert, C. (2024). Predicting the geolocation of tweets using transformer models on customized data. <i>Journal of Spatial Information Science</i>. University of Maine. <a href=\"https://doi.org/10.5311/JOSIS.2024.29.295\">https://doi.org/10.5311/JOSIS.2024.29.295</a>","ama":"Lutsai K, Lampert C. Predicting the geolocation of tweets using transformer models on customized data. <i>Journal of Spatial Information Science</i>. 2024;(29):69-99. doi:<a href=\"https://doi.org/10.5311/JOSIS.2024.29.295\">10.5311/JOSIS.2024.29.295</a>","short":"K. Lutsai, C. Lampert, Journal of Spatial Information Science (2024) 69–99.","ieee":"K. Lutsai and C. Lampert, “Predicting the geolocation of tweets using transformer models on customized data,” <i>Journal of Spatial Information Science</i>, no. 29. University of Maine, pp. 69–99, 2024."},"license":"https://creativecommons.org/licenses/by/3.0/","issue":"29","doi":"10.5311/JOSIS.2024.29.295","acknowledgement":"The authors acknowledge the Institute of Science and Technology (ISTA) for their material support and for granting access to the Twitter database archive, which was essential for the research.","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/3.0/legalcode","name":"Creative Commons Attribution 3.0 Unported (CC BY 3.0)","short":"CC BY (3.0)","image":"/images/cc_by.png"},"publication_status":"published","type":"journal_article","quality_controlled":"1","department":[{"_id":"ChLa"}],"corr_author":"1","file":[{"date_created":"2025-01-20T08:41:10Z","relation":"main_file","access_level":"open_access","creator":"dernst","checksum":"b82413f00398ffb5168e8e747571a98d","file_name":"2024_JourSpatialInfoScience_Lutsai.pdf","date_updated":"2025-01-20T08:41:10Z","success":1,"file_id":"18857","content_type":"application/pdf","file_size":7250655}],"publisher":"University of Maine","OA_place":"publisher","related_material":{"link":[{"relation":"software","url":"https://github.com/K4TEL/geo-twitter.git"}]},"has_accepted_license":"1","ddc":["500"],"publication":"Journal of Spatial Information Science","status":"public","DOAJ_listed":"1","language":[{"iso":"eng"}],"article_processing_charge":"Yes","article_type":"original","_id":"18856","year":"2024","date_published":"2024-12-26T00:00:00Z","month":"12","scopus_import":"1","title":"Predicting the geolocation of tweets using transformer models on customized data","date_created":"2025-01-19T23:01:53Z","author":[{"full_name":"Lutsai, Kateryna","last_name":"Lutsai","first_name":"Kateryna"},{"id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","first_name":"Christoph","last_name":"Lampert","orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph"}],"publication_identifier":{"eissn":["1948-660X"]},"date_updated":"2025-06-05T13:47:12Z","user_id":"68b8ca59-c5b3-11ee-8790-cd641c68093d","OA_type":"gold","oa":1,"abstract":[{"text":"This research is aimed to solve the tweet/user geolocation prediction task and provide a flexible methodology for the geo-tagging of textual big data. The suggested approach implements neural networks for natural language processing (NLP) to estimate the location as coordinate pairs (longitude, latitude) and two-dimensional Gaussian Mixture Models (GMMs). The scope of proposed models has been finetuned on a Twitter dataset using pretrained Bidirectional Encoder Representations from Transformers (BERT) as base models. Performance metrics show a median error of fewer than 30 km on a worldwide-level, and fewer than 15 km on the US-level datasets for the models trained and evaluated on text features of tweets' content and metadata context. Our source code and data are available at https://github.com/K4TEL/geo-twitter.git.","lang":"eng"}],"oa_version":"Published Version"},{"oa_version":"Published Version","arxiv":1,"abstract":[{"lang":"eng","text":"Current state-of-the-art methods for differentially private model training are based on matrix factorization techniques. However, these methods suffer from high computational overhead because they require numerically solving a demanding optimization problem to determine an approximately optimal factorization prior to the actual model training. In this work, we present a new matrix factorization approach, BSR, which overcomes this computational bottleneck. By exploiting properties of the standard matrix square root, BSR allows to efficiently handle also large-scale problems. For the key scenario of stochastic gradient descent with momentum and weight decay, we even derive analytical expressions for BSR that render the computational overhead negligible. We prove bounds on the approximation quality that hold both in the centralized and in the federated learning setting. Our numerical experiments demonstrate that models trained using BSR perform on par with the best existing methods, while completely avoiding their computational overhead."}],"oa":1,"OA_type":"gold","volume":37,"external_id":{"arxiv":["2405.13763"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","date_updated":"2025-05-14T11:34:20Z","publication_identifier":{"eissn":["1049-5258"]},"author":[{"id":"4b14526e-14d2-11ed-ba64-c14c9553d137","last_name":"Kalinin","first_name":"Nikita","full_name":"Kalinin, Nikita"},{"id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","first_name":"Christoph","full_name":"Lampert, Christoph","orcid":"0000-0001-8622-7887"}],"date_created":"2025-01-24T17:58:16Z","title":"Banded square root matrix factorization for differentially private model training","month":"12","scopus_import":"1","date_published":"2024-12-01T00:00:00Z","year":"2024","_id":"18875","intvolume":"        37","article_processing_charge":"No","language":[{"iso":"eng"}],"status":"public","ddc":["000"],"publication":"38th Annual Conference on Neural Information Processing Systems","OA_place":"publisher","has_accepted_license":"1","publisher":"Neural Information Processing Systems Foundation","corr_author":"1","alternative_title":["Advances in Neural Information Processing Systems"],"file":[{"file_size":1144656,"content_type":"application/pdf","file_id":"18888","success":1,"checksum":"a216cab8eddc1fe7840aede0e2c0d41e","access_level":"open_access","creator":"dernst","file_name":"2024_NeurIPS_Nikita.pdf","date_updated":"2025-01-27T09:52:15Z","date_created":"2025-01-27T09:52:15Z","relation":"main_file"}],"department":[{"_id":"GradSch"},{"_id":"ChLa"}],"quality_controlled":"1","type":"conference","publication_status":"published","tmp":{"image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"citation":{"short":"N. Kalinin, C. Lampert, in:, 38th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","ieee":"N. Kalinin and C. Lampert, “Banded square root matrix factorization for differentially private model training,” in <i>38th Annual Conference on Neural Information Processing Systems</i>, Vancouver, Canada, 2024, vol. 37.","chicago":"Kalinin, Nikita, and Christoph Lampert. “Banded Square Root Matrix Factorization for Differentially Private Model Training.” In <i>38th Annual Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024.","ista":"Kalinin N, Lampert C. 2024. Banded square root matrix factorization for differentially private model training. 38th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 37.","apa":"Kalinin, N., &#38; Lampert, C. (2024). Banded square root matrix factorization for differentially private model training. In <i>38th Annual Conference on Neural Information Processing Systems</i> (Vol. 37). Vancouver, Canada: Neural Information Processing Systems Foundation.","ama":"Kalinin N, Lampert C. Banded square root matrix factorization for differentially private model training. In: <i>38th Annual Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024.","mla":"Kalinin, Nikita, and Christoph Lampert. “Banded Square Root Matrix Factorization for Differentially Private Model Training.” <i>38th Annual Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024."},"day":"01","file_date_updated":"2025-01-27T09:52:15Z","conference":{"start_date":"2024-12-16","location":"Vancouver, Canada","name":"NeurIPS: Neural Information Processing Systems","end_date":"2024-12-16"}},{"conference":{"location":"Vancouver, Canada","name":"NeurIPS: Neural Information Processing Systems","end_date":"2024-12-16","start_date":"2024-12-16"},"file_date_updated":"2025-02-04T08:11:25Z","day":"01","citation":{"ieee":"P. Súkeník, C. Lampert, and M. Mondelli, “Neural collapse versus low-rank bias: Is deep neural collapse really optimal?,” in <i>38th Annual Conference on Neural Information Processing Systems</i>, Vancouver, Canada, 2024, vol. 37.","short":"P. Súkeník, C. Lampert, M. Mondelli, in:, 38th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","ama":"Súkeník P, Lampert C, Mondelli M. Neural collapse versus low-rank bias: Is deep neural collapse really optimal? In: <i>38th Annual Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024.","apa":"Súkeník, P., Lampert, C., &#38; Mondelli, M. (2024). Neural collapse versus low-rank bias: Is deep neural collapse really optimal? In <i>38th Annual Conference on Neural Information Processing Systems</i> (Vol. 37). Vancouver, Canada: Neural Information Processing Systems Foundation.","ista":"Súkeník P, Lampert C, Mondelli M. 2024. Neural collapse versus low-rank bias: Is deep neural collapse really optimal? 38th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 37.","chicago":"Súkeník, Peter, Christoph Lampert, and Marco Mondelli. “Neural Collapse versus Low-Rank Bias: Is Deep Neural Collapse Really Optimal?” In <i>38th Annual Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024.","mla":"Súkeník, Peter, et al. “Neural Collapse versus Low-Rank Bias: Is Deep Neural Collapse Really Optimal?” <i>38th Annual Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024."},"acknowledged_ssus":[{"_id":"ScienComp"}],"tmp":{"image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"acknowledgement":"Marco Mondelli is partially supported by the 2019 Lopez-Loreta prize. This research was supported by the Scientific Service Units (SSU) of ISTA through resources provided by Scientific Computing (SciComp).","publication_status":"published","type":"conference","quality_controlled":"1","department":[{"_id":"GradSch"},{"_id":"MaMo"},{"_id":"ChLa"}],"corr_author":"1","file":[{"content_type":"application/pdf","file_size":1784118,"success":1,"file_id":"18989","creator":"dernst","checksum":"b7b79f1ea3ac1e9e11b3d91faaeb0780","access_level":"open_access","file_name":"2024_NeurIPS_Sukenik.pdf","date_updated":"2025-02-04T08:11:25Z","date_created":"2025-02-04T08:11:25Z","relation":"main_file"}],"alternative_title":["Advances in Neural Information Processing Systems"],"publisher":"Neural Information Processing Systems Foundation","has_accepted_license":"1","OA_place":"publisher","ddc":["000"],"publication":"38th Annual Conference on Neural Information Processing Systems","status":"public","language":[{"iso":"eng"}],"article_processing_charge":"No","intvolume":"        37","year":"2024","_id":"18891","date_published":"2024-12-01T00:00:00Z","month":"12","title":"Neural collapse versus low-rank bias: Is deep neural collapse really optimal?","project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"}],"date_created":"2025-01-27T11:15:18Z","author":[{"full_name":"Súkeník, Peter","first_name":"Peter","last_name":"Súkeník","id":"d64d6a8d-eb8e-11eb-b029-96fd216dec3c"},{"first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph"},{"id":"27EB676C-8706-11E9-9510-7717E6697425","last_name":"Mondelli","first_name":"Marco","full_name":"Mondelli, Marco","orcid":"0000-0002-3242-7020"}],"date_updated":"2025-06-04T07:19:21Z","external_id":{"arxiv":["2405.14468"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","OA_type":"gold","volume":37,"oa":1,"abstract":[{"text":"Deep neural networks (DNNs) exhibit a surprising structure in their final layer\r\nknown as neural collapse (NC), and a growing body of works has currently investigated the propagation of neural collapse to earlier layers of DNNs – a phenomenon\r\ncalled deep neural collapse (DNC). However, existing theoretical results are restricted to special cases: linear models, only two layers or binary classification.\r\nIn contrast, we focus on non-linear models of arbitrary depth in multi-class classification and reveal a surprising qualitative shift. As soon as we go beyond two\r\nlayers or two classes, DNC stops being optimal for the deep unconstrained features\r\nmodel (DUFM) – the standard theoretical framework for the analysis of collapse.\r\nThe main culprit is a low-rank bias of multi-layer regularization schemes: this bias\r\nleads to optimal solutions of even lower rank than the neural collapse. We support\r\nour theoretical findings with experiments on both DUFM and real data, which show\r\nthe emergence of the low-rank structure in the solution found by gradient descent.","lang":"eng"}],"arxiv":1,"oa_version":"Published Version"},{"date_updated":"2025-02-24T12:52:23Z","date_created":"2025-02-20T10:13:42Z","author":[{"full_name":"Zverev, Egor","first_name":"Egor","last_name":"Zverev","id":"05162b19-1340-11ed-8f02-fa94e0e8c3bc"},{"full_name":"Abdelnabi, Sahar","first_name":"Sahar","last_name":"Abdelnabi"},{"orcid":"0009-0003-4119-6281","full_name":"Tabesh, Soroush","first_name":"Soroush","last_name":"Tabesh","id":"06000900-6068-11ef-8d61-c2472ef2e752"},{"first_name":"Mario","last_name":"Fritz","full_name":"Fritz, Mario"},{"id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","first_name":"Christoph","last_name":"Lampert","orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph"}],"abstract":[{"text":"Instruction-tuned Large Language Models (LLMs) show impressive results in numerous practical applications, but they lack essential safety features that are common in other areas of computer science, particularly an explicit separation of instructions and data. This makes them vulnerable to manipulations such as indirect prompt injections and generally unsuitable for safety-critical tasks. Surprisingly, there is currently no established definition or benchmark to quantify this phenomenon. In this work, we close this gap by introducing a formal measure for instruction-data separation and an empirical variant that is calculable from a model's outputs. We also present a new dataset, SEP, that allows estimating the measure for real-world models. Our results on various LLMs show that the problem of instruction-data separation is real: all models fail to achieve high separation, and canonical mitigation techniques, such as prompt engineering and fine-tuning, either fail to substantially improve separation or reduce model utility. The source code and SEP dataset are openly accessible at https://github.com/egozverev/Shold-It-Be-Executed-Or-Processed.\r\n","lang":"eng"}],"oa_version":"Preprint","arxiv":1,"external_id":{"arxiv":["2403.06833"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","OA_type":"green","oa":1,"language":[{"iso":"eng"}],"article_processing_charge":"No","status":"public","date_published":"2024-03-01T00:00:00Z","month":"03","title":"Can LLMs separate instructions from data? And what do we even mean by that?","year":"2024","_id":"19063","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"corr_author":"1","file":[{"file_id":"19064","success":1,"content_type":"application/pdf","file_size":530972,"relation":"main_file","date_created":"2025-02-20T10:11:45Z","date_updated":"2025-02-20T10:11:45Z","file_name":"2403.06833v3.pdf","creator":"ezverev","access_level":"open_access","checksum":"35eb43968684b87be59144603ef10af0"}],"publication_status":"published","type":"preprint","publication":"arXiv","ddc":["000"],"has_accepted_license":"1","OA_place":"repository","related_material":{"link":[{"relation":"software","url":" https://github.com/egozverev/Shold-It-Be-Executed-Or-Processed"}]},"file_date_updated":"2025-02-20T10:11:45Z","license":"https://creativecommons.org/licenses/by-sa/4.0/","article_number":"2403.06833","doi":"10.48550/arXiv.2403.06833","acknowledged_ssus":[{"_id":"ScienComp"}],"tmp":{"legal_code_url":"https://creativecommons.org/licenses/by-sa/4.0/legalcode","image":"/images/cc_by_sa.png","name":"Creative Commons Attribution-ShareAlike 4.0 International Public License (CC BY-SA 4.0)","short":"CC BY-SA (4.0)"},"acknowledgement":"The authors would like to sincerely thank Juan Rocamonde for valuable feedback to our manuscript. We acknowledge the support from the Scientific Service Units (SSU) of ISTA through resources provided by Scientific Computing (SciComp). We thank Dan Alistarh for providing us with computational resources. This work was partially funded by the German Federal Ministry of Education and Research (BMBF) under the grant AIgenCY (16KIS2012) and ELSA – European Lighthouse on Secure and Safe AI funded by the European Union under grant agreement No. 101070617. Views and opinions expressed are however those of the authors only and do not necessarily reflect those of the European Union or European Commission. Neither the European Union nor the European Commission can be held responsible for them.","day":"01","citation":{"ieee":"E. Zverev, S. Abdelnabi, S. Tabesh, M. Fritz, and C. Lampert, “Can LLMs separate instructions from data? And what do we even mean by that?,” <i>arXiv</i>. 2024.","short":"E. Zverev, S. Abdelnabi, S. Tabesh, M. Fritz, C. Lampert, ArXiv (2024).","ama":"Zverev E, Abdelnabi S, Tabesh S, Fritz M, Lampert C. Can LLMs separate instructions from data? And what do we even mean by that? <i>arXiv</i>. 2024. doi:<a href=\"https://doi.org/10.48550/arXiv.2403.06833\">10.48550/arXiv.2403.06833</a>","apa":"Zverev, E., Abdelnabi, S., Tabesh, S., Fritz, M., &#38; Lampert, C. (2024). Can LLMs separate instructions from data? And what do we even mean by that? <i>arXiv</i>. <a href=\"https://doi.org/10.48550/arXiv.2403.06833\">https://doi.org/10.48550/arXiv.2403.06833</a>","ista":"Zverev E, Abdelnabi S, Tabesh S, Fritz M, Lampert C. 2024. Can LLMs separate instructions from data? And what do we even mean by that? arXiv, 2403.06833.","chicago":"Zverev, Egor, Sahar Abdelnabi, Soroush Tabesh, Mario Fritz, and Christoph Lampert. “Can LLMs Separate Instructions from Data? And What Do We Even Mean by That?” <i>ArXiv</i>, 2024. <a href=\"https://doi.org/10.48550/arXiv.2403.06833\">https://doi.org/10.48550/arXiv.2403.06833</a>.","mla":"Zverev, Egor, et al. “Can LLMs Separate Instructions from Data? And What Do We Even Mean by That?” <i>ArXiv</i>, 2403.06833, 2024, doi:<a href=\"https://doi.org/10.48550/arXiv.2403.06833\">10.48550/arXiv.2403.06833</a>."},"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2403.06833"}]},{"quality_controlled":"1","type":"journal_article","publication_status":"published","file":[{"success":1,"file_id":"19426","content_type":"application/pdf","file_size":1367966,"relation":"main_file","date_created":"2025-03-20T09:02:18Z","date_updated":"2025-03-20T09:02:18Z","file_name":"2024_TMLR_Verwimp.pdf","access_level":"open_access","creator":"dernst","checksum":"0714e12f7423cd098976ed9974561155"}],"alternative_title":["TMLR"],"department":[{"_id":"ChLa"}],"OA_place":"publisher","has_accepted_license":"1","publisher":"Transactions on Machine Learning Research","ddc":["000"],"publication":"Transactions on Machine Learning Research","file_date_updated":"2025-03-20T09:02:18Z","citation":{"short":"E. Verwimp, R. Aljundi, S. Ben-David, M. Bethge, A. Cossu, A. Gepperth, T.L. Hayes, E. Hüllermeier, C. Kanan, D. Kudithipudi, C. Lampert, M. Mundt, R. Pascanu, A. Popescu, A.S. Tolias, J. Van De Weijer, B. Liu, V. Lomonaco, T. Tuytelaars, G.M. Van De Ven, Transactions on Machine Learning Research 2024 (2024).","ieee":"E. Verwimp <i>et al.</i>, “Continual learning: Applications and the road forward,” <i>Transactions on Machine Learning Research</i>, vol. 2024. Transactions on Machine Learning Research, 2024.","mla":"Verwimp, Eli, et al. “Continual Learning: Applications and the Road Forward.” <i>Transactions on Machine Learning Research</i>, vol. 2024, Transactions on Machine Learning Research, 2024.","ista":"Verwimp E, Aljundi R, Ben-David S, Bethge M, Cossu A, Gepperth A, Hayes TL, Hüllermeier E, Kanan C, Kudithipudi D, Lampert C, Mundt M, Pascanu R, Popescu A, Tolias AS, Van De Weijer J, Liu B, Lomonaco V, Tuytelaars T, Van De Ven GM. 2024. Continual learning: Applications and the road forward. Transactions on Machine Learning Research. 2024.","chicago":"Verwimp, Eli, Rahaf Aljundi, Shai Ben-David, Matthias Bethge, Andrea Cossu, Alexander Gepperth, Tyler L. Hayes, et al. “Continual Learning: Applications and the Road Forward.” <i>Transactions on Machine Learning Research</i>. Transactions on Machine Learning Research, 2024.","ama":"Verwimp E, Aljundi R, Ben-David S, et al. Continual learning: Applications and the road forward. <i>Transactions on Machine Learning Research</i>. 2024;2024.","apa":"Verwimp, E., Aljundi, R., Ben-David, S., Bethge, M., Cossu, A., Gepperth, A., … Van De Ven, G. M. (2024). Continual learning: Applications and the road forward. <i>Transactions on Machine Learning Research</i>. Transactions on Machine Learning Research."},"day":"12","tmp":{"image":"/images/cc_by.png","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"author":[{"full_name":"Verwimp, Eli","first_name":"Eli","last_name":"Verwimp"},{"full_name":"Aljundi, Rahaf","last_name":"Aljundi","first_name":"Rahaf"},{"full_name":"Ben-David, Shai","last_name":"Ben-David","first_name":"Shai"},{"full_name":"Bethge, Matthias","first_name":"Matthias","last_name":"Bethge"},{"last_name":"Cossu","first_name":"Andrea","full_name":"Cossu, Andrea"},{"full_name":"Gepperth, Alexander","first_name":"Alexander","last_name":"Gepperth"},{"full_name":"Hayes, Tyler L.","last_name":"Hayes","first_name":"Tyler L."},{"first_name":"Eyke","last_name":"Hüllermeier","full_name":"Hüllermeier, Eyke"},{"last_name":"Kanan","first_name":"Christopher","full_name":"Kanan, Christopher"},{"first_name":"Dhireesha","last_name":"Kudithipudi","full_name":"Kudithipudi, Dhireesha"},{"first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph"},{"first_name":"Martin","last_name":"Mundt","full_name":"Mundt, Martin"},{"last_name":"Pascanu","first_name":"Razvan","full_name":"Pascanu, Razvan"},{"first_name":"Adrian","last_name":"Popescu","full_name":"Popescu, Adrian"},{"last_name":"Tolias","first_name":"Andreas S.","full_name":"Tolias, Andreas S."},{"last_name":"Van De Weijer","first_name":"Joost","full_name":"Van De Weijer, Joost"},{"first_name":"Bing","last_name":"Liu","full_name":"Liu, Bing"},{"first_name":"Vincenzo","last_name":"Lomonaco","full_name":"Lomonaco, Vincenzo"},{"last_name":"Tuytelaars","first_name":"Tinne","full_name":"Tuytelaars, Tinne"},{"full_name":"Van De Ven, Gido M.","last_name":"Van De Ven","first_name":"Gido M."}],"date_created":"2025-03-16T23:01:25Z","date_updated":"2025-03-20T09:21:02Z","publication_identifier":{"eissn":["2835-8856"]},"oa":1,"OA_type":"diamond","volume":2024,"external_id":{"arxiv":["2311.11908"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","arxiv":1,"oa_version":"Published Version","abstract":[{"text":"Continual learning is a subfield of machine learning, which aims to allow machine learning models to continuously learn on new data, by accumulating knowledge without forgetting what was learned in the past. In this work, we take a step back, and ask: \"Why should one care about continual learning in the first place?\". We set the stage by examining recent continual learning papers published at four major machine learning conferences, and show that memory-constrained settings dominate the field. Then, we discuss five open problems in machine learning, and even though they might seem unrelated to continual learning at first sight, we show that continual learning will inevitably be part of their solution. These problems are model editing, personalization and specialization, on-device learning, faster (re-)training and reinforcement learning. Finally, by comparing the desiderata from these unsolved problems and the current assumptions in continual learning, we highlight and discuss four future directions for continual learning research. We hope that this work offers an interesting perspective on the future of continual learning, while displaying its potential value and the paths we have to pursue in order to make it successful. This work is the result of the many discussions the authors had at the Dagstuhl seminar on Deep Continual Learning, in March 2023.","lang":"eng"}],"status":"public","article_type":"original","article_processing_charge":"No","language":[{"iso":"eng"}],"_id":"19408","year":"2024","intvolume":"      2024","title":"Continual learning: Applications and the road forward","scopus_import":"1","month":"04","date_published":"2024-04-12T00:00:00Z"},{"scopus_import":"1","month":"05","title":"Communication-efficient federated learning with data and client heterogeneity","date_published":"2024-05-01T00:00:00Z","_id":"17093","year":"2024","intvolume":"       238","article_processing_charge":"No","language":[{"iso":"eng"}],"status":"public","arxiv":1,"oa_version":"Preprint","abstract":[{"lang":"eng","text":"Federated Learning (FL) enables large-scale distributed training of machine learning models, while still allowing individual nodes to maintain data locally. However, executing FL at scale comes with inherent practical challenges: 1) heterogeneity of the local node data distributions, 2) heterogeneity of node computational speeds (asynchrony), but also 3) constraints in the amount of communication between the clients and the server. In this work, we present the first variant of the classic federated averaging (FedAvg) algorithm which, at the same time, supports data heterogeneity, partial client asynchrony, and communication compression. Our algorithm comes with a novel, rigorous analysis showing that, in spite of these system relaxations, it can provide similar convergence to FedAvg in interesting parameter regimes. Experimental results in the rigorous LEAF benchmark on setups of up to 300 nodes show that our algorithm ensures fast convergence for standard federated tasks, improving upon prior quantized and asynchronous approaches."}],"oa":1,"volume":238,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","external_id":{"arxiv":["2206.10032"]},"date_updated":"2024-10-09T21:08:57Z","publication_identifier":{"eissn":["2640-3498"]},"date_created":"2024-06-02T22:00:57Z","author":[{"first_name":"Hossein","last_name":"Zakerinia","id":"653bd8b6-f394-11eb-9cf6-c0bbf6cd78d4","full_name":"Zakerinia, Hossein"},{"full_name":"Talaei, Shayan","last_name":"Talaei","first_name":"Shayan"},{"first_name":"Giorgi","last_name":"Nadiradze","id":"3279A00C-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-5634-0731","full_name":"Nadiradze, Giorgi"},{"last_name":"Alistarh","first_name":"Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X"}],"day":"01","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2206.10032","open_access":"1"}],"citation":{"short":"H. Zakerinia, S. Talaei, G. Nadiradze, D.-A. Alistarh, in:, Proceedings of the 27th International Conference on Artificial Intelligence and Statistics, ML Research Press, 2024, pp. 3448–3456.","ieee":"H. Zakerinia, S. Talaei, G. Nadiradze, and D.-A. Alistarh, “Communication-efficient federated learning with data and client heterogeneity,” in <i>Proceedings of the 27th International Conference on Artificial Intelligence and Statistics</i>, Valencia, Spain, 2024, vol. 238, pp. 3448–3456.","ista":"Zakerinia H, Talaei S, Nadiradze G, Alistarh D-A. 2024. Communication-efficient federated learning with data and client heterogeneity. Proceedings of the 27th International Conference on Artificial Intelligence and Statistics. AISTATS: Conference on Artificial Intelligence and Statistics, PMLR, vol. 238, 3448–3456.","chicago":"Zakerinia, Hossein, Shayan Talaei, Giorgi Nadiradze, and Dan-Adrian Alistarh. “Communication-Efficient Federated Learning with Data and Client Heterogeneity.” In <i>Proceedings of the 27th International Conference on Artificial Intelligence and Statistics</i>, 238:3448–56. ML Research Press, 2024.","ama":"Zakerinia H, Talaei S, Nadiradze G, Alistarh D-A. Communication-efficient federated learning with data and client heterogeneity. In: <i>Proceedings of the 27th International Conference on Artificial Intelligence and Statistics</i>. Vol 238. ML Research Press; 2024:3448-3456.","apa":"Zakerinia, H., Talaei, S., Nadiradze, G., &#38; Alistarh, D.-A. (2024). Communication-efficient federated learning with data and client heterogeneity. In <i>Proceedings of the 27th International Conference on Artificial Intelligence and Statistics</i> (Vol. 238, pp. 3448–3456). Valencia, Spain: ML Research Press.","mla":"Zakerinia, Hossein, et al. “Communication-Efficient Federated Learning with Data and Client Heterogeneity.” <i>Proceedings of the 27th International Conference on Artificial Intelligence and Statistics</i>, vol. 238, ML Research Press, 2024, pp. 3448–56."},"page":"3448-3456","conference":{"start_date":"2024-05-02","location":"Valencia, Spain","name":"AISTATS: Conference on Artificial Intelligence and Statistics","end_date":"2024-05-04"},"publication":"Proceedings of the 27th International Conference on Artificial Intelligence and Statistics","publisher":"ML Research Press","alternative_title":["PMLR"],"corr_author":"1","department":[{"_id":"DaAl"},{"_id":"ChLa"}],"quality_controlled":"1","publication_status":"published","type":"conference"},{"conference":{"start_date":"2024-03-07","location":"Vienna, Austria","end_date":"2024-03-07","name":"ICLR: International Conference on Learning Representations"},"file_date_updated":"2024-08-12T07:38:06Z","day":"07","citation":{"apa":"Scott, J. A., Zakerinia, H., &#38; Lampert, C. (2024). PEFLL: Personalized federated learning by learning to learn. In <i>12th International Conference on Learning Representations</i>. Vienna, Austria: OpenReview.","ama":"Scott JA, Zakerinia H, Lampert C. PEFLL: Personalized federated learning by learning to learn. In: <i>12th International Conference on Learning Representations</i>. OpenReview; 2024.","ista":"Scott JA, Zakerinia H, Lampert C. 2024. PEFLL: Personalized federated learning by learning to learn. 12th International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","chicago":"Scott, Jonathan A, Hossein Zakerinia, and Christoph Lampert. “PEFLL: Personalized Federated Learning by Learning to Learn.” In <i>12th International Conference on Learning Representations</i>. OpenReview, 2024.","mla":"Scott, Jonathan A., et al. “PEFLL: Personalized Federated Learning by Learning to Learn.” <i>12th International Conference on Learning Representations</i>, OpenReview, 2024.","ieee":"J. A. Scott, H. Zakerinia, and C. Lampert, “PEFLL: Personalized federated learning by learning to learn,” in <i>12th International Conference on Learning Representations</i>, Vienna, Austria, 2024.","short":"J.A. Scott, H. Zakerinia, C. Lampert, in:, 12th International Conference on Learning Representations, OpenReview, 2024."},"acknowledged_ssus":[{"_id":"ScienComp"}],"acknowledgement":"This research was supported by the Scientific Service Units (SSU) of ISTA through resources provided by Scientific Computing (SciComp).\r\n","quality_controlled":"1","publication_status":"published","type":"conference","file":[{"date_created":"2024-08-12T07:38:06Z","relation":"main_file","checksum":"81b7ea2e667adaf9c7a7b6b376b1f251","access_level":"open_access","creator":"dernst","file_name":"2024_ICLR_Scott.pdf","date_updated":"2024-08-12T07:38:06Z","file_id":"17415","success":1,"file_size":1029219,"content_type":"application/pdf"}],"corr_author":"1","department":[{"_id":"ChLa"}],"related_material":{"record":[{"id":"21198","status":"public","relation":"dissertation_contains"}]},"has_accepted_license":"1","publisher":"OpenReview","publication":"12th International Conference on Learning Representations","ddc":["000"],"status":"public","article_processing_charge":"No","language":[{"iso":"eng"}],"_id":"17411","year":"2024","month":"03","scopus_import":"1","title":"PEFLL: Personalized federated learning by learning to learn","date_published":"2024-03-07T00:00:00Z","date_created":"2024-08-11T22:01:12Z","author":[{"id":"e499926b-f6e0-11ea-865d-9c63db0031e8","first_name":"Jonathan A","last_name":"Scott","full_name":"Scott, Jonathan A"},{"id":"653bd8b6-f394-11eb-9cf6-c0bbf6cd78d4","last_name":"Zakerinia","first_name":"Hossein","full_name":"Zakerinia, Hossein","orcid":"0009-0007-3977-6462"},{"first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph"}],"date_updated":"2026-04-07T11:46:11Z","oa":1,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","external_id":{"arxiv":["2306.05515"]},"arxiv":1,"oa_version":"Published Version","abstract":[{"text":"We present PeFLL, a new personalized federated learning algorithm that improves\r\nover the state-of-the-art in three aspects: 1) it produces more accurate models,\r\nespecially in the low-data regime, and not only for clients present during its\r\ntraining phase, but also for any that may emerge in the future; 2) it reduces the\r\namount of on-client computation and client-server communication by providing\r\nfuture clients with ready-to-use personalized models that require no additional\r\nfinetuning or optimization; 3) it comes with theoretical guarantees that establish\r\ngeneralization from the observed clients to future ones.\r\nAt the core of PeFLL lies a learning-to-learn approach that jointly trains an\r\nembedding network and a hypernetwork. The embedding network is used to\r\nrepresent clients in a latent descriptor space in a way that reflects their similarity\r\nto each other. The hypernetwork takes as input such descriptors and outputs the\r\nparameters of fully personalized client models. In combination, both networks\r\nconstitute a learning algorithm that achieves state-of-the-art performance in several\r\npersonalized federated learning benchmarks","lang":"eng"}]},{"date_published":"2024-09-01T00:00:00Z","title":"Improved modelling of federated datasets using mixtures-of-Dirichlet-multinomials","scopus_import":"1","month":"09","intvolume":"       235","_id":"18120","year":"2024","language":[{"iso":"eng"}],"article_processing_charge":"No","status":"public","abstract":[{"lang":"eng","text":"In practice, training using federated learning can be orders of magnitude slower than standard centralized training. This severely limits the amount of experimentation and tuning that can be done, making it challenging to obtain good performance on a given task. Server-side proxy data can be used to run training simulations, for instance for hyperparameter tuning. This can greatly speed up the training pipeline by reducing the number of tuning runs to be performed overall on the true clients. However, it is challenging to ensure that these simulations accurately reflect the dynamics of the real federated training. In particular, the proxy data used for simulations often comes as a single centralized dataset without a partition into distinct clients, and partitioning this data in a naive way can lead to simulations that poorly reflect real federated training. In this paper we address the challenge of how to partition centralized data in a way that reflects the statistical heterogeneity of the true federated clients. We propose a fully federated, theoretically justified, algorithm that efficiently learns the distribution of the true clients and observe improved server-side simulations when using the inferred distribution to create simulated clients from the centralized data."}],"arxiv":1,"oa_version":"Preprint","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","external_id":{"arxiv":["2406.02416"]},"volume":235,"oa":1,"publication_identifier":{"eissn":["2640-3498"]},"date_updated":"2026-04-07T11:46:11Z","author":[{"full_name":"Scott, Jonathan A","first_name":"Jonathan A","last_name":"Scott","id":"e499926b-f6e0-11ea-865d-9c63db0031e8"},{"full_name":"Cahill, Áine","last_name":"Cahill","first_name":"Áine"}],"date_created":"2024-09-22T22:01:45Z","acknowledgement":"We would like to thank: Mona Chitnis and everyone in the Private Federated Learning team at Apple for their help and support throughout the entire project; Audra McMillan, Martin Pelikan, Anosh Raj and Barry Theobold for feedback on the initial versions of the paper; and Christoph Lampert for valuable feedback on the paper structure and suggestions for additional experiments.","citation":{"short":"J.A. Scott, Á. Cahill, in:, Proceedings of the 41st International Conference on Machine Learning, ML Research Press, 2024, pp. 44012–44037.","ieee":"J. A. Scott and Á. Cahill, “Improved modelling of federated datasets using mixtures-of-Dirichlet-multinomials,” in <i>Proceedings of the 41st International Conference on Machine Learning</i>, Vienna, Austria, 2024, vol. 235, pp. 44012–44037.","chicago":"Scott, Jonathan A, and Áine Cahill. “Improved Modelling of Federated Datasets Using Mixtures-of-Dirichlet-Multinomials.” In <i>Proceedings of the 41st International Conference on Machine Learning</i>, 235:44012–37. ML Research Press, 2024.","ista":"Scott JA, Cahill Á. 2024. Improved modelling of federated datasets using mixtures-of-Dirichlet-multinomials. Proceedings of the 41st International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 235, 44012–44037.","apa":"Scott, J. A., &#38; Cahill, Á. (2024). Improved modelling of federated datasets using mixtures-of-Dirichlet-multinomials. In <i>Proceedings of the 41st International Conference on Machine Learning</i> (Vol. 235, pp. 44012–44037). Vienna, Austria: ML Research Press.","ama":"Scott JA, Cahill Á. Improved modelling of federated datasets using mixtures-of-Dirichlet-multinomials. In: <i>Proceedings of the 41st International Conference on Machine Learning</i>. Vol 235. ML Research Press; 2024:44012-44037.","mla":"Scott, Jonathan A., and Áine Cahill. “Improved Modelling of Federated Datasets Using Mixtures-of-Dirichlet-Multinomials.” <i>Proceedings of the 41st International Conference on Machine Learning</i>, vol. 235, ML Research Press, 2024, pp. 44012–37."},"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2406.02416"}],"day":"01","conference":{"start_date":"2024-07-21","location":"Vienna, Austria","end_date":"2024-07-27","name":"ICML: International Conference on Machine Learning"},"page":"44012-44037","publication":"Proceedings of the 41st International Conference on Machine Learning","publisher":"ML Research Press","related_material":{"record":[{"id":"21198","status":"public","relation":"dissertation_contains"}]},"department":[{"_id":"ChLa"}],"alternative_title":["PMLR"],"corr_author":"1","type":"conference","publication_status":"published","quality_controlled":"1"},{"oa":1,"OA_type":"green","external_id":{"arxiv":["2311.16833"],"isi":["001344387500055"]},"user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","oa_version":"Preprint","arxiv":1,"abstract":[{"text":"The robustness of neural networks against input perturbations with bounded\r\nmagnitude represents a serious concern in the deployment of deep learning\r\nmodels in safety-critical systems. Recently, the scientific community has\r\nfocused on enhancing certifiable robustness guarantees by crafting 1-Lipschitz\r\nneural networks that leverage Lipschitz bounded dense and convolutional layers.\r\nAlthough different methods have been proposed in the literature to achieve this\r\ngoal, understanding the performance of such methods is not straightforward,\r\nsince different metrics can be relevant (e.g., training time, memory usage,\r\naccuracy, certifiable robustness) for different applications. For this reason,\r\nthis work provides a thorough theoretical and empirical comparison between\r\nmethods by evaluating them in terms of memory usage, speed, and certifiable\r\nrobust accuracy. The paper also provides some guidelines and recommendations to\r\nsupport the user in selecting the methods that work best depending on the\r\navailable resources. We provide code at\r\nhttps://github.com/berndprach/1LipschitzLayersCompared.","lang":"eng"}],"date_created":"2024-08-14T08:42:32Z","author":[{"full_name":"Prach, Bernd","id":"2D561D42-C427-11E9-89B4-9C1AE6697425","first_name":"Bernd","last_name":"Prach"},{"full_name":"Brau, Fabio","first_name":"Fabio","last_name":"Brau"},{"full_name":"Buttazzo, Giorgio","last_name":"Buttazzo","first_name":"Giorgio"},{"full_name":"Lampert, Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","first_name":"Christoph"}],"date_updated":"2026-04-07T11:49:51Z","_id":"17426","year":"2024","month":"06","title":"1-Lipschitz layers compared: Memory, speed, and certifiable robustness","date_published":"2024-06-01T00:00:00Z","status":"public","article_processing_charge":"No","language":[{"iso":"eng"}],"related_material":{"link":[{"url":"https://github.com/berndprach/1LipschitzLayersCompared","relation":"software"}],"record":[{"relation":"dissertation_contains","status":"public","id":"19759"}]},"has_accepted_license":"1","OA_place":"repository","publisher":"Computer Vision Foundation","publication":"Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition","quality_controlled":"1","publication_status":"published","type":"conference","corr_author":"1","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"day":"01","isi":1,"citation":{"short":"B. Prach, F. Brau, G. Buttazzo, C. Lampert, in:, Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, Computer Vision Foundation, 2024, pp. 24574–24583.","ieee":"B. Prach, F. Brau, G. Buttazzo, and C. Lampert, “1-Lipschitz layers compared: Memory, speed, and certifiable robustness,” in <i>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, Seattle, WA, United States, 2024, pp. 24574–24583.","chicago":"Prach, Bernd, Fabio Brau, Giorgio Buttazzo, and Christoph Lampert. “1-Lipschitz Layers Compared: Memory, Speed, and Certifiable Robustness.” In <i>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, 24574–83. Computer Vision Foundation, 2024. <a href=\"https://doi.org/10.1109/CVPR52733.2024.02320\">https://doi.org/10.1109/CVPR52733.2024.02320</a>.","ista":"Prach B, Brau F, Buttazzo G, Lampert C. 2024. 1-Lipschitz layers compared: Memory, speed, and certifiable robustness. Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. CVPR: Conference on Computer Vision and Pattern Recognition, 24574–24583.","ama":"Prach B, Brau F, Buttazzo G, Lampert C. 1-Lipschitz layers compared: Memory, speed, and certifiable robustness. In: <i>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>. Computer Vision Foundation; 2024:24574-24583. doi:<a href=\"https://doi.org/10.1109/CVPR52733.2024.02320\">10.1109/CVPR52733.2024.02320</a>","apa":"Prach, B., Brau, F., Buttazzo, G., &#38; Lampert, C. (2024). 1-Lipschitz layers compared: Memory, speed, and certifiable robustness. In <i>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</i> (pp. 24574–24583). Seattle, WA, United States: Computer Vision Foundation. <a href=\"https://doi.org/10.1109/CVPR52733.2024.02320\">https://doi.org/10.1109/CVPR52733.2024.02320</a>","mla":"Prach, Bernd, et al. “1-Lipschitz Layers Compared: Memory, Speed, and Certifiable Robustness.” <i>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, Computer Vision Foundation, 2024, pp. 24574–83, doi:<a href=\"https://doi.org/10.1109/CVPR52733.2024.02320\">10.1109/CVPR52733.2024.02320</a>."},"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2311.16833"}],"doi":"10.1109/CVPR52733.2024.02320","acknowledgement":"This work was partially supported by project SERICS (PE00000014) under the MUR National Recovery and Resilience Plan funded by the European Union - NextGenerationEU.\r\n","conference":{"end_date":"2024-06-22","name":"CVPR: Conference on Computer Vision and Pattern Recognition","location":"Seattle, WA, United States","start_date":"2024-06-16"},"page":"24574-24583"},{"corr_author":"1","date_updated":"2026-04-07T11:49:51Z","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"date_created":"2025-01-24T16:57:29Z","author":[{"full_name":"Prach, Bernd","last_name":"Prach","first_name":"Bernd","id":"2D561D42-C427-11E9-89B4-9C1AE6697425"},{"full_name":"Lampert, Christoph","orcid":"0000-0001-8622-7887","last_name":"Lampert","first_name":"Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87"}],"publication_status":"draft","type":"preprint","arxiv":1,"oa_version":"Preprint","publication":"arXiv","abstract":[{"text":"Despite extensive research since the community learned about adversarial\r\nexamples 10 years ago, we still do not know how to train high-accuracy\r\nclassifiers that are guaranteed to be robust to small perturbations of their\r\ninputs. Previous works often argued that this might be because no classifier\r\nexists that is robust and accurate at the same time. However, in computer\r\nvision this assumption does not match reality where humans are usually accurate\r\nand robust on most tasks of interest. We offer an alternative explanation and\r\nshow that in certain settings robust generalization is only possible with\r\nunrealistically large amounts of data. More precisely we find a setting where a\r\nrobust classifier exists, it is easy to learn an accurate classifier, yet it\r\nrequires an exponential amount of data to learn a robust classifier. Based on\r\nthis theoretical result, we explore how well robust classifiers generalize on\r\ndatasets such as CIFAR-10. We come to the conclusion that on this datasets, the\r\nlimitation of current robust models also lies in the generalization, and that\r\nthey require a lot of data to do well on the test set. We also show that the\r\nproblem is not in the expressiveness or generalization capabilities of current\r\narchitectures, and that there are low magnitude features in the data which are\r\nuseful for non-robust generalization but are not available for robust\r\nclassifiers.","lang":"eng"}],"OA_place":"repository","related_material":{"record":[{"relation":"later_version","status":"public","id":"20455"},{"relation":"dissertation_contains","id":"19759","status":"public"}]},"oa":1,"user_id":"8b945eb4-e2f2-11eb-945a-df72226e66a9","external_id":{"arxiv":["2412.04245"]},"article_processing_charge":"No","language":[{"iso":"eng"}],"status":"public","month":"12","doi":"10.48550/arXiv.2412.04245","title":"Intriguing properties of robust classification","article_number":"2412.04245","date_published":"2024-12-05T00:00:00Z","day":"05","citation":{"short":"B. Prach, C. Lampert, ArXiv (n.d.).","ieee":"B. Prach and C. Lampert, “Intriguing properties of robust classification,” <i>arXiv</i>. .","mla":"Prach, Bernd, and Christoph Lampert. “Intriguing Properties of Robust Classification.” <i>ArXiv</i>, 2412.04245, doi:<a href=\"https://doi.org/10.48550/arXiv.2412.04245\">10.48550/arXiv.2412.04245</a>.","ista":"Prach B, Lampert C. Intriguing properties of robust classification. arXiv, 2412.04245.","chicago":"Prach, Bernd, and Christoph Lampert. “Intriguing Properties of Robust Classification.” <i>ArXiv</i>, n.d. <a href=\"https://doi.org/10.48550/arXiv.2412.04245\">https://doi.org/10.48550/arXiv.2412.04245</a>.","ama":"Prach B, Lampert C. Intriguing properties of robust classification. <i>arXiv</i>. doi:<a href=\"https://doi.org/10.48550/arXiv.2412.04245\">10.48550/arXiv.2412.04245</a>","apa":"Prach, B., &#38; Lampert, C. (n.d.). Intriguing properties of robust classification. <i>arXiv</i>. <a href=\"https://doi.org/10.48550/arXiv.2412.04245\">https://doi.org/10.48550/arXiv.2412.04245</a>"},"_id":"18874","year":"2024","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2412.04245"}]}]
