[{"intvolume":"       267","volume":267,"alternative_title":["PMLR"],"_id":"20819","external_id":{"arxiv":["2506.05408"]},"author":[{"full_name":"Scott, Jonathan A","id":"e499926b-f6e0-11ea-865d-9c63db0031e8","last_name":"Scott","first_name":"Jonathan A"},{"id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","full_name":"Lampert, Christoph","last_name":"Lampert","first_name":"Christoph","orcid":"0000-0001-8622-7887"},{"last_name":"Saulpic","id":"f8e48cf0-b0ff-11ed-b0e9-b4c35598f964","full_name":"Saulpic, David","first_name":"David"}],"publisher":"ML Research Press","ddc":["000"],"year":"2025","date_published":"2025-05-01T00:00:00Z","article_processing_charge":"No","page":"53757-53790","abstract":[{"text":"Clustering is a cornerstone of data analysis that is particularly suited to identifying coherent subgroups or substructures in unlabeled data, as are generated continuously in large amounts these days. However, in many cases traditional clustering methods are not applicable, because data are increasingly being produced and stored in a distributed way, e.g. on edge devices, and privacy concerns prevent it from being transferred to a central server. To address this challenge, we present FedDP-KMeans, a new algorithm for \r\n-means clustering that is fully-federated as well as differentially private. Our approach leverages (potentially small and out-of-distribution) server-side data to overcome the primary challenge of differentially private clustering methods: the need for a good initialization. Combining our initialization with a simple federated DP-Lloyds algorithm we obtain an algorithm that achieves excellent results on synthetic and real-world benchmark tasks. We also provide a theoretical analysis of our method that provides bounds on the convergence speed and cluster identification success.","lang":"eng"}],"publication":"42nd International Conference on Machine Learning","date_created":"2025-12-14T23:02:05Z","publication_identifier":{"eissn":["2640-3498"]},"file_date_updated":"2025-12-16T12:38:29Z","type":"conference","date_updated":"2026-04-07T11:46:11Z","OA_place":"publisher","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","title":"Differentially private federated k-means clustering with server-side data","OA_type":"gold","conference":{"end_date":"2025-07-19","location":"Vancouver, Canada","start_date":"2025-07-13","name":"ICML: International Conference on Machine Learning"},"has_accepted_license":"1","oa":1,"status":"public","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png"},"citation":{"ieee":"J. A. Scott, C. Lampert, and D. Saulpic, “Differentially private federated k-means clustering with server-side data,” in <i>42nd International Conference on Machine Learning</i>, Vancouver, Canada, 2025, vol. 267, pp. 53757–53790.","apa":"Scott, J. A., Lampert, C., &#38; Saulpic, D. (2025). Differentially private federated k-means clustering with server-side data. In <i>42nd International Conference on Machine Learning</i> (Vol. 267, pp. 53757–53790). Vancouver, Canada: ML Research Press.","ama":"Scott JA, Lampert C, Saulpic D. Differentially private federated k-means clustering with server-side data. In: <i>42nd International Conference on Machine Learning</i>. Vol 267. ML Research Press; 2025:53757-53790.","chicago":"Scott, Jonathan A, Christoph Lampert, and David Saulpic. “Differentially Private Federated K-Means Clustering with Server-Side Data.” In <i>42nd International Conference on Machine Learning</i>, 267:53757–90. ML Research Press, 2025.","short":"J.A. Scott, C. Lampert, D. Saulpic, in:, 42nd International Conference on Machine Learning, ML Research Press, 2025, pp. 53757–53790.","mla":"Scott, Jonathan A., et al. “Differentially Private Federated K-Means Clustering with Server-Side Data.” <i>42nd International Conference on Machine Learning</i>, vol. 267, ML Research Press, 2025, pp. 53757–90.","ista":"Scott JA, Lampert C, Saulpic D. 2025. Differentially private federated k-means clustering with server-side data. 42nd International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 267, 53757–53790."},"file":[{"content_type":"application/pdf","relation":"main_file","checksum":"815b32b463023ca21e569c2158745c15","success":1,"access_level":"open_access","date_updated":"2025-12-16T12:38:29Z","file_size":746612,"creator":"dernst","file_id":"20829","date_created":"2025-12-16T12:38:29Z","file_name":"2025_ICML_Scott.pdf"}],"scopus_import":"1","acknowledgement":"This research was funded in part by the Austrian Science Fund (FWF) [10.55776/COE12] and supported by the Scientific Service Units (SSU) of ISTA through resources provided by Scientific Computing (SciComp).\r\n","related_material":{"record":[{"relation":"dissertation_contains","id":"21198","status":"public"}]},"corr_author":"1","month":"05","publication_status":"published","department":[{"_id":"ChLa"},{"_id":"MoHe"}],"day":"01","arxiv":1,"acknowledged_ssus":[{"_id":"ScienComp"}],"oa_version":"Published Version","quality_controlled":"1","language":[{"iso":"eng"}]},{"oa_version":"Preprint","quality_controlled":"1","language":[{"iso":"eng"}],"arxiv":1,"publication_status":"published","department":[{"_id":"MoHe"}],"day":"04","ec_funded":1,"doi":"10.1137/1.9781611977929.17","month":"01","corr_author":"1","scopus_import":"1","acknowledgement":"This   project   has   received   funding   from   the   Euro-pean  Research  Council  (ERC)  under  the  EuropeanUnion’s  Horizon  2020  research  and  innovation  programme  (Grant  agreement  No.   101019564  “The  De-sign  of  Modern  Fully  Dynamic  Data  Structures  (Mo-DynStruct)”  and  the  Austrian  Science  Fund  (FWF)project Z 422-N, project “Static and Dynamic Hierar-chical  Graph  Decompositions”,  I  5982-N,  and  project“Fast  Algorithms  for  a  Reactive  Network  Layer  (Re-actNet)”, P 33775-N, with additional funding from thenetidee SCIENCE Stiftung, 2020–2024.D.  Sauplic  has  received  funding  from  the  Euro-pean  Union’s  Horizon  2020  research  and  innovation programme under the Marie Sklodowska-Curie    grant    agreementNo 101034413.","citation":{"ieee":"M. Henzinger, D. Saulpic, and L. Sidl, “Experimental evaluation of fully dynamic k-means via coresets,” in <i>2024 Proceedings of the Symposium on Algorithm Engineering and Experiments</i>, Alexandria, VA, United States, 2024, pp. 220–233.","ama":"Henzinger M, Saulpic D, Sidl L. Experimental evaluation of fully dynamic k-means via coresets. In: <i>2024 Proceedings of the Symposium on Algorithm Engineering and Experiments</i>. Society for Industrial and Applied Mathematics; 2024:220-233. doi:<a href=\"https://doi.org/10.1137/1.9781611977929.17\">10.1137/1.9781611977929.17</a>","apa":"Henzinger, M., Saulpic, D., &#38; Sidl, L. (2024). Experimental evaluation of fully dynamic k-means via coresets. In <i>2024 Proceedings of the Symposium on Algorithm Engineering and Experiments</i> (pp. 220–233). Alexandria, VA, United States: Society for Industrial and Applied Mathematics. <a href=\"https://doi.org/10.1137/1.9781611977929.17\">https://doi.org/10.1137/1.9781611977929.17</a>","chicago":"Henzinger, Monika, David Saulpic, and Leonhard Sidl. “Experimental Evaluation of Fully Dynamic K-Means via Coresets.” In <i>2024 Proceedings of the Symposium on Algorithm Engineering and Experiments</i>, 220–33. Society for Industrial and Applied Mathematics, 2024. <a href=\"https://doi.org/10.1137/1.9781611977929.17\">https://doi.org/10.1137/1.9781611977929.17</a>.","short":"M. Henzinger, D. Saulpic, L. Sidl, in:, 2024 Proceedings of the Symposium on Algorithm Engineering and Experiments, Society for Industrial and Applied Mathematics, 2024, pp. 220–233.","mla":"Henzinger, Monika, et al. “Experimental Evaluation of Fully Dynamic K-Means via Coresets.” <i>2024 Proceedings of the Symposium on Algorithm Engineering and Experiments</i>, Society for Industrial and Applied Mathematics, 2024, pp. 220–33, doi:<a href=\"https://doi.org/10.1137/1.9781611977929.17\">10.1137/1.9781611977929.17</a>.","ista":"Henzinger M, Saulpic D, Sidl L. 2024. Experimental evaluation of fully dynamic k-means via coresets. 2024 Proceedings of the Symposium on Algorithm Engineering and Experiments. ALENEX: Workshop on Algorithm Engineering and Experiments, 220–233."},"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2310.18034"}],"status":"public","oa":1,"conference":{"end_date":"2024-01-08","location":"Alexandria, VA, United States","start_date":"2024-01-07","name":"ALENEX: Workshop on Algorithm Engineering and Experiments"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","title":"Experimental evaluation of fully dynamic k-means via coresets","type":"conference","date_updated":"2025-04-14T13:50:50Z","publication":"2024 Proceedings of the Symposium on Algorithm Engineering and Experiments","date_created":"2024-01-09T16:22:47Z","publication_identifier":{"eisbn":["9781611977929"]},"abstract":[{"lang":"eng","text":"For a set of points in Rd, the Euclidean k-means problems consists of finding k centers such that the sum of distances squared from each data point to its closest center is minimized. Coresets are one the main tools developed recently to solve this problem in a big data context. They allow to compress the initial dataset while preserving its structure: running any algorithm on the coreset provides a guarantee almost equivalent to running it on the full data. In this work, we study coresets in a fully-dynamic setting: points are added and deleted with the goal to efficiently maintain a coreset with which a k-means solution can be computed. Based on an algorithm from Henzinger and Kale [ESA'20], we present an efficient and practical implementation of a fully dynamic coreset algorithm, that improves the running time by up to a factor of 20 compared to our non-optimized implementation of the algorithm by Henzinger and Kale, without sacrificing more than 7% on the quality of the k-means solution."}],"page":"220-233","article_processing_charge":"No","date_published":"2024-01-04T00:00:00Z","year":"2024","project":[{"_id":"bd9ca328-d553-11ed-ba76-dc4f890cfe62","name":"The design and evaluation of modern fully dynamic data structures","call_identifier":"H2020","grant_number":"101019564"},{"_id":"34def286-11ca-11ed-8bc3-da5948e1613c","name":"Efficient algorithms","grant_number":"Z00422"},{"_id":"bda196b2-d553-11ed-ba76-8e8ee6c21103","name":"Static and Dynamic Hierarchical Graph Decompositions","grant_number":"I05982"},{"_id":"bd9e3a2e-d553-11ed-ba76-8aa684ce17fe","grant_number":"P33775","name":"Fast Algorithms for a Reactive Network Layer"},{"name":"IST-BRIDGE: International postdoctoral program","call_identifier":"H2020","grant_number":"101034413","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c"}],"publisher":"Society for Industrial and Applied Mathematics","author":[{"orcid":"0000-0002-5008-6530","first_name":"Monika H","full_name":"Henzinger, Monika H","id":"540c9bbd-f2de-11ec-812d-d04a5be85630","last_name":"Henzinger"},{"first_name":"David","id":"f8e48cf0-b0ff-11ed-b0e9-b4c35598f964","full_name":"Saulpic, David","last_name":"Saulpic"},{"first_name":"Leonhard","last_name":"Sidl","full_name":"Sidl, Leonhard","id":"8b563fd0-b441-11ee-9101-a3891c61efa6"}],"external_id":{"arxiv":["2310.18034"]},"_id":"14769"},{"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","title":"Data-efficient learning via clustering-based sensitivity sampling: Foundation models and beyond","conference":{"location":"Vienna, Austria","end_date":"2024-07-27","name":"ICML: International Conference on Machine Learning","start_date":"2024-07-21"},"oa":1,"abstract":[{"lang":"eng","text":"We study the data selection problem, whose aim is to select a small representative subset of data that can be used to efficiently train a machine learning model. We present a new data selection approach based on k-means clustering and sensitivity sampling. Assuming access to an embedding representation of the data with respect to which the model loss is Holder continuous, our approach provably allows selecting a set of “typical” k+1/ε2 elements whose average loss corresponds to the average loss of the whole dataset, up to a multiplicative (1±ε)\r\n factor and an additive ελΦk, where Φk represents the k-means cost for the input embeddings and λ is the Holder constant. We furthermore demonstrate the performance and scalability of our approach on fine-tuning foundation models and show that it outperforms state-of-the-art methods. We also show how it can be applied on linear regression, leading to a new sampling strategy that surprisingly matches the performance of leverage score sampling, while being conceptually simpler and more scalable."}],"publication_identifier":{"eissn":["2640-3498"]},"publication":"Proceedings of the 41st International Conference on Machine Learning","date_created":"2024-09-22T22:01:44Z","date_updated":"2026-06-18T18:00:19Z","type":"conference","month":"09","day":"01","ec_funded":1,"department":[{"_id":"MoHe"}],"publication_status":"published","arxiv":1,"language":[{"iso":"eng"}],"quality_controlled":"1","oa_version":"Published Version","status":"public","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2402.17327"}],"citation":{"ieee":"K. Axiotis <i>et al.</i>, “Data-efficient learning via clustering-based sensitivity sampling: Foundation models and beyond,” in <i>Proceedings of the 41st International Conference on Machine Learning</i>, Vienna, Austria, 2024, vol. 235, pp. 2086–2107.","apa":"Axiotis, K., Cohen-Addad, V., Henzinger, M., Jerome, S., Mirrokni, V., Saulpic, D., … Wunder, M. (2024). Data-efficient learning via clustering-based sensitivity sampling: Foundation models and beyond. In <i>Proceedings of the 41st International Conference on Machine Learning</i> (Vol. 235, pp. 2086–2107). Vienna, Austria: ML Research Press.","ama":"Axiotis K, Cohen-Addad V, Henzinger M, et al. Data-efficient learning via clustering-based sensitivity sampling: Foundation models and beyond. In: <i>Proceedings of the 41st International Conference on Machine Learning</i>. Vol 235. ML Research Press; 2024:2086-2107.","chicago":"Axiotis, Kyriakos, Vincent Cohen-Addad, Monika Henzinger, Sammy Jerome, Vahab Mirrokni, David Saulpic, David P. Woodruff, and Michael Wunder. “Data-Efficient Learning via Clustering-Based Sensitivity Sampling: Foundation Models and Beyond.” In <i>Proceedings of the 41st International Conference on Machine Learning</i>, 235:2086–2107. ML Research Press, 2024.","mla":"Axiotis, Kyriakos, et al. “Data-Efficient Learning via Clustering-Based Sensitivity Sampling: Foundation Models and Beyond.” <i>Proceedings of the 41st International Conference on Machine Learning</i>, vol. 235, ML Research Press, 2024, pp. 2086–107.","short":"K. Axiotis, V. Cohen-Addad, M. Henzinger, S. Jerome, V. Mirrokni, D. Saulpic, D.P. Woodruff, M. Wunder, in:, Proceedings of the 41st International Conference on Machine Learning, ML Research Press, 2024, pp. 2086–2107.","ista":"Axiotis K, Cohen-Addad V, Henzinger M, Jerome S, Mirrokni V, Saulpic D, Woodruff DP, Wunder M. 2024. Data-efficient learning via clustering-based sensitivity sampling: Foundation models and beyond. Proceedings of the 41st International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 235, 2086–2107."},"acknowledgement":"Monika Henzinger: This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (Grant agreement No. 101019564) and the Austrian Science Fund (FWF) grant DOI 10.55776/Z422, grant DOI 10.55776/I5982, and grant DOI 10.55776/P33775 with additional funding from the netidee SCIENCE Stiftung, 2020–2024. This work was partially done while David Saulpic was at the Institute for Science and Technology, Austria (ISTA). David Sauplic has received funding from the European Union’s Horizon 2020 research and innovation programme under the\r\nMarie Sklodowska-Curie grant agreement No 101034413. Work was done while David Woodruff was visiting Google Research.","scopus_import":"1","_id":"18115","external_id":{"arxiv":["2402.17327"]},"alternative_title":["PMLR"],"author":[{"first_name":"Kyriakos","full_name":"Axiotis, Kyriakos","last_name":"Axiotis"},{"full_name":"Cohen-Addad, Vincent","last_name":"Cohen-Addad","first_name":"Vincent"},{"orcid":"0000-0002-5008-6530","first_name":"Monika H","full_name":"Henzinger, Monika H","id":"540c9bbd-f2de-11ec-812d-d04a5be85630","last_name":"Henzinger"},{"first_name":"Sammy","full_name":"Jerome, Sammy","last_name":"Jerome"},{"last_name":"Mirrokni","full_name":"Mirrokni, Vahab","first_name":"Vahab"},{"first_name":"David","last_name":"Saulpic","id":"f8e48cf0-b0ff-11ed-b0e9-b4c35598f964","full_name":"Saulpic, David"},{"last_name":"Woodruff","full_name":"Woodruff, David P.","first_name":"David P."},{"first_name":"Michael","full_name":"Wunder, Michael","last_name":"Wunder"}],"intvolume":"       235","volume":235,"date_published":"2024-09-01T00:00:00Z","article_processing_charge":"No","page":"2086-2107","publisher":"ML Research Press","project":[{"_id":"bd9ca328-d553-11ed-ba76-dc4f890cfe62","grant_number":"101019564","name":"The design and evaluation of modern fully dynamic data structures","call_identifier":"H2020"},{"name":"Efficient algorithms","grant_number":"Z00422","_id":"34def286-11ca-11ed-8bc3-da5948e1613c"},{"name":"Static and Dynamic Hierarchical Graph Decompositions","grant_number":"I05982","_id":"bda196b2-d553-11ed-ba76-8e8ee6c21103"},{"_id":"bd9e3a2e-d553-11ed-ba76-8aa684ce17fe","name":"Fast Algorithms for a Reactive Network Layer","grant_number":"P33775"},{"grant_number":"101034413","call_identifier":"H2020","name":"IST-BRIDGE: International postdoctoral program","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c"}],"ddc":["000"],"year":"2024"},{"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2406.11649"}],"status":"public","citation":{"chicago":"La Tour, Max Dupré, Monika Henzinger, and David Saulpic. “Making Old Things New: A Unified Algorithm for Differentially Private Clustering.” In <i>Proceedings of the 41st International Conference on Machine Learning</i>, 235:12046–86. ML Research Press, 2024.","mla":"La Tour, Max Dupré, et al. “Making Old Things New: A Unified Algorithm for Differentially Private Clustering.” <i>Proceedings of the 41st International Conference on Machine Learning</i>, vol. 235, ML Research Press, 2024, pp. 12046–86.","short":"M.D. La Tour, M. Henzinger, D. Saulpic, in:, Proceedings of the 41st International Conference on Machine Learning, ML Research Press, 2024, pp. 12046–12086.","ista":"La Tour MD, Henzinger M, Saulpic D. 2024. Making old things new: A unified algorithm for differentially private clustering. Proceedings of the 41st International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 235, 12046–12086.","ieee":"M. D. La Tour, M. Henzinger, and D. Saulpic, “Making old things new: A unified algorithm for differentially private clustering,” in <i>Proceedings of the 41st International Conference on Machine Learning</i>, Vienna, Austria, 2024, vol. 235, pp. 12046–12086.","apa":"La Tour, M. D., Henzinger, M., &#38; Saulpic, D. (2024). Making old things new: A unified algorithm for differentially private clustering. In <i>Proceedings of the 41st International Conference on Machine Learning</i> (Vol. 235, pp. 12046–12086). Vienna, Austria: ML Research Press.","ama":"La Tour MD, Henzinger M, Saulpic D. Making old things new: A unified algorithm for differentially private clustering. In: <i>Proceedings of the 41st International Conference on Machine Learning</i>. Vol 235. ML Research Press; 2024:12046-12086."},"acknowledgement":"Monika Henzinger: This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (Grant agreement No. 101019564) and the Austrian Science Fund (FWF) grant DOI 10.55776/Z422, grant DOI 10.55776/I5982, and grant DOI 10.55776/P33775 with additional funding from the netidee SCIENCE Stiftung, 2020–2024.This work was partially done while David Saulpic was at the Institute for Science and Technology, Austria (ISTA). David Sauplic has received funding from the European Union’s Horizon 2020 research and innovation programme under the Marie Sklodowska-Curie grant agreement No 101034413.","scopus_import":"1","month":"09","corr_author":"1","publication_status":"published","department":[{"_id":"MoHe"}],"ec_funded":1,"day":"01","arxiv":1,"quality_controlled":"1","language":[{"iso":"eng"}],"oa_version":"Published Version","abstract":[{"lang":"eng","text":"As a staple of data analysis and unsupervised learning, the problem of private clustering has been widely studied, under various privacy models. Centralized differential privacy is the first of them, and the problem has also been studied for the local and the shuffle variation. In each case, the goal is to design an algorithm that computes privately a clustering, with the smallest possible error. The study of each variation gave rise to new algorithm: the landscape of private clustering algorithm is therefore quite intricate. In this paper, we show that a 20 year-old algorithm can be slightly modified to work for any of those models. This provides a unified picture: while matching almost all previously known results, it allows us to improve some of them, and extend to a new privacy model, the continual observation setting, where the input is changing over time and the algorithm must output a new solution at each time step."}],"publication":"Proceedings of the 41st International Conference on Machine Learning","date_created":"2024-09-22T22:01:44Z","publication_identifier":{"eissn":["2640-3498"]},"type":"conference","date_updated":"2026-06-18T18:01:02Z","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","title":"Making old things new: A unified algorithm for differentially private clustering","conference":{"name":"ICML: International Conference on Machine Learning","start_date":"2024-07-21","location":"Vienna, Austria","end_date":"2024-07-27"},"oa":1,"project":[{"_id":"bd9ca328-d553-11ed-ba76-dc4f890cfe62","grant_number":"101019564","name":"The design and evaluation of modern fully dynamic data structures","call_identifier":"H2020"},{"_id":"34def286-11ca-11ed-8bc3-da5948e1613c","grant_number":"Z00422","name":"Efficient algorithms"},{"_id":"bda196b2-d553-11ed-ba76-8e8ee6c21103","name":"Static and Dynamic Hierarchical Graph Decompositions","grant_number":"I05982"},{"grant_number":"P33775","name":"Fast Algorithms for a Reactive Network Layer","_id":"bd9e3a2e-d553-11ed-ba76-8aa684ce17fe"},{"grant_number":"101034413","name":"IST-BRIDGE: International postdoctoral program","call_identifier":"H2020","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c"}],"publisher":"ML Research Press","ddc":["000"],"year":"2024","date_published":"2024-09-01T00:00:00Z","article_processing_charge":"No","page":"12046-12086","intvolume":"       235","volume":235,"alternative_title":["PMLR"],"external_id":{"arxiv":["2406.11649"]},"_id":"18116","author":[{"full_name":"La Tour, Max Dupré","last_name":"La Tour","first_name":"Max Dupré"},{"orcid":"0000-0002-5008-6530","first_name":"Monika H","last_name":"Henzinger","full_name":"Henzinger, Monika H","id":"540c9bbd-f2de-11ec-812d-d04a5be85630"},{"first_name":"David","last_name":"Saulpic","full_name":"Saulpic, David","id":"f8e48cf0-b0ff-11ed-b0e9-b4c35598f964"}]},{"project":[{"_id":"bd9ca328-d553-11ed-ba76-dc4f890cfe62","name":"The design and evaluation of modern fully dynamic data structures","call_identifier":"H2020","grant_number":"101019564"},{"name":"Efficient algorithms","grant_number":"Z00422","_id":"34def286-11ca-11ed-8bc3-da5948e1613c"},{"_id":"bda196b2-d553-11ed-ba76-8e8ee6c21103","name":"Static and Dynamic Hierarchical Graph Decompositions","grant_number":"I05982"},{"_id":"bd9e3a2e-d553-11ed-ba76-8aa684ce17fe","grant_number":"P33775","name":"Fast Algorithms for a Reactive Network Layer"},{"call_identifier":"H2020","name":"IST-BRIDGE: International postdoctoral program","grant_number":"101034413","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c"}],"publisher":"Schloss Dagstuhl - Leibniz-Zentrum für Informatik","ddc":["000"],"year":"2024","article_number":"100","date_published":"2024-09-23T00:00:00Z","article_processing_charge":"Yes","intvolume":"       308","volume":308,"alternative_title":["LIPIcs"],"external_id":{"arxiv":["2406.19926"],"isi":["001545622400100"]},"_id":"18308","isi":1,"author":[{"last_name":"La Tour","full_name":"La Tour, Max Dupré","first_name":"Max Dupré"},{"last_name":"Henzinger","full_name":"Henzinger, Monika H","id":"540c9bbd-f2de-11ec-812d-d04a5be85630","orcid":"0000-0002-5008-6530","first_name":"Monika H"},{"id":"f8e48cf0-b0ff-11ed-b0e9-b4c35598f964","full_name":"Saulpic, David","last_name":"Saulpic","first_name":"David"}],"status":"public","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png"},"citation":{"ista":"La Tour MD, Henzinger M, Saulpic D. 2024. Fully dynamic k-means coreset in near-optimal update time. 32nd Annual European Symposium on Algorithms. ESA: European Symposium on Algorithms, LIPIcs, vol. 308, 100.","mla":"La Tour, Max Dupré, et al. “Fully Dynamic K-Means Coreset in near-Optimal Update Time.” <i>32nd Annual European Symposium on Algorithms</i>, vol. 308, 100, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2024, doi:<a href=\"https://doi.org/10.4230/LIPIcs.ESA.2024.100\">10.4230/LIPIcs.ESA.2024.100</a>.","short":"M.D. La Tour, M. Henzinger, D. Saulpic, in:, 32nd Annual European Symposium on Algorithms, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2024.","chicago":"La Tour, Max Dupré, Monika Henzinger, and David Saulpic. “Fully Dynamic K-Means Coreset in near-Optimal Update Time.” In <i>32nd Annual European Symposium on Algorithms</i>, Vol. 308. Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2024. <a href=\"https://doi.org/10.4230/LIPIcs.ESA.2024.100\">https://doi.org/10.4230/LIPIcs.ESA.2024.100</a>.","ama":"La Tour MD, Henzinger M, Saulpic D. Fully dynamic k-means coreset in near-optimal update time. In: <i>32nd Annual European Symposium on Algorithms</i>. Vol 308. Schloss Dagstuhl - Leibniz-Zentrum für Informatik; 2024. doi:<a href=\"https://doi.org/10.4230/LIPIcs.ESA.2024.100\">10.4230/LIPIcs.ESA.2024.100</a>","apa":"La Tour, M. D., Henzinger, M., &#38; Saulpic, D. (2024). Fully dynamic k-means coreset in near-optimal update time. In <i>32nd Annual European Symposium on Algorithms</i> (Vol. 308). London, United Kingdom: Schloss Dagstuhl - Leibniz-Zentrum für Informatik. <a href=\"https://doi.org/10.4230/LIPIcs.ESA.2024.100\">https://doi.org/10.4230/LIPIcs.ESA.2024.100</a>","ieee":"M. D. La Tour, M. Henzinger, and D. Saulpic, “Fully dynamic k-means coreset in near-optimal update time,” in <i>32nd Annual European Symposium on Algorithms</i>, London, United Kingdom, 2024, vol. 308."},"file":[{"checksum":"8e8c0b13049f11bb0133dfac22e32718","relation":"main_file","content_type":"application/pdf","access_level":"open_access","success":1,"creator":"dernst","file_size":873561,"date_updated":"2024-10-21T09:41:48Z","file_name":"2024_LIPICs_DuprelaTour.pdf","file_id":"18454","date_created":"2024-10-21T09:41:48Z"}],"acknowledgement":"Monika Henzinger: This project has received funding from the European Research Council\r\n(ERC) under the European Union’s Horizon 2020 research and innovation programme (MoDynStruct Grant agreement No. 101019564) and the Austrian Science Fund (FWF) grant DOI 10.55776/Z422, grant DOI 10.55776/I5982, and grant DOI 10.55776/P33775 with additional funding from the netidee SCIENCE Stiftung, 2020–2024.\r\nDavid Saulpic: Work partially done while at ISTA. Received funding from the European Union’s\r\nHorizon 2020 research and innovation programme under the Marie Sklodowska-Curie grant agreement No 101034413. This work was partially funded by the grant ANR-19-CE48-0016 from the French National Research Agency (ANR).","scopus_import":"1","month":"09","corr_author":"1","publication_status":"published","department":[{"_id":"MoHe"}],"day":"23","ec_funded":1,"doi":"10.4230/LIPIcs.ESA.2024.100","arxiv":1,"language":[{"iso":"eng"}],"oa_version":"Published Version","quality_controlled":"1","abstract":[{"text":"We study in this paper the problem of maintaining a solution to k-median and k-means clustering in a fully dynamic setting. To do so, we present an algorithm to efficiently maintain a coreset, a compressed version of the dataset, that allows easy computation of a clustering solution at query time. Our coreset algorithm has near-optimal update time of Õ(k) in general metric spaces, which reduces to Õ(d) in the Euclidean space ℝ^d. The query time is O(k²) in general metrics, and O(kd) in ℝ^d. To maintain a constant-factor approximation for k-median and k-means clustering in Euclidean space, this directly leads to an algorithm with update time Õ(d), and query time Õ(kd + k²). To maintain a O(polylog k)-approximation, the query time is reduced to Õ(kd).","lang":"eng"}],"publication":"32nd Annual European Symposium on Algorithms","date_created":"2024-10-13T22:01:50Z","publication_identifier":{"issn":["1868-8969"],"isbn":["9783959773386"]},"file_date_updated":"2024-10-21T09:41:48Z","type":"conference","date_updated":"2025-12-02T13:49:11Z","OA_place":"publisher","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","title":"Fully dynamic k-means coreset in near-optimal update time","OA_type":"gold","conference":{"name":"ESA: European Symposium on Algorithms","start_date":"2024-09-02","location":"London, United Kingdom","end_date":"2024-09-04"},"has_accepted_license":"1","oa":1},{"year":"2023","publisher":"IEEE","project":[{"name":"IST-BRIDGE: International postdoctoral program","call_identifier":"H2020","grant_number":"101034413","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c"},{"grant_number":"101019564","name":"The design and evaluation of modern fully dynamic data structures","call_identifier":"H2020","_id":"bd9ca328-d553-11ed-ba76-dc4f890cfe62"}],"page":"1105-1130","article_processing_charge":"No","date_published":"2023-12-22T00:00:00Z","author":[{"first_name":"Vincent","last_name":"Cohen-Addad","full_name":"Cohen-Addad, Vincent"},{"full_name":"Saulpic, David","id":"f8e48cf0-b0ff-11ed-b0e9-b4c35598f964","last_name":"Saulpic","first_name":"David"},{"first_name":"Chris","full_name":"Schwiegelshohn, Chris","last_name":"Schwiegelshohn"}],"isi":1,"_id":"14768","external_id":{"arxiv":["2310.04076"],"isi":["001137125900060"]},"scopus_import":"1","acknowledgement":"D. Sauplic has received funding from the European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie grant agreement No 101034413, and Grant agreement No. 101019564 “The Design of Modern Fully Dynamic Data Structures (MoDynStruct)”.\r\nC. Schwiegelshohn acknowledges the support of the Independent Research Fund Denmark (DFF) under a Sapere Aude Research Leader grant No 1051-00106B.","citation":{"ista":"Cohen-Addad V, Saulpic D, Schwiegelshohn C. 2023. Deterministic clustering in high dimensional spaces: Sketches and approximation. 2023 IEEE 64th Annual Symposium on Foundations of Computer Science. FOCS: Foundations of Computer Science, 1105–1130.","short":"V. Cohen-Addad, D. Saulpic, C. Schwiegelshohn, in:, 2023 IEEE 64th Annual Symposium on Foundations of Computer Science, IEEE, 2023, pp. 1105–1130.","chicago":"Cohen-Addad, Vincent, David Saulpic, and Chris Schwiegelshohn. “Deterministic Clustering in High Dimensional Spaces: Sketches and Approximation.” In <i>2023 IEEE 64th Annual Symposium on Foundations of Computer Science</i>, 1105–30. IEEE, 2023. <a href=\"https://doi.org/10.1109/focs57990.2023.00066\">https://doi.org/10.1109/focs57990.2023.00066</a>.","mla":"Cohen-Addad, Vincent, et al. “Deterministic Clustering in High Dimensional Spaces: Sketches and Approximation.” <i>2023 IEEE 64th Annual Symposium on Foundations of Computer Science</i>, IEEE, 2023, pp. 1105–30, doi:<a href=\"https://doi.org/10.1109/focs57990.2023.00066\">10.1109/focs57990.2023.00066</a>.","ama":"Cohen-Addad V, Saulpic D, Schwiegelshohn C. Deterministic clustering in high dimensional spaces: Sketches and approximation. In: <i>2023 IEEE 64th Annual Symposium on Foundations of Computer Science</i>. IEEE; 2023:1105-1130. doi:<a href=\"https://doi.org/10.1109/focs57990.2023.00066\">10.1109/focs57990.2023.00066</a>","apa":"Cohen-Addad, V., Saulpic, D., &#38; Schwiegelshohn, C. (2023). Deterministic clustering in high dimensional spaces: Sketches and approximation. In <i>2023 IEEE 64th Annual Symposium on Foundations of Computer Science</i> (pp. 1105–1130). Santa Cruz, CA, United States: IEEE. <a href=\"https://doi.org/10.1109/focs57990.2023.00066\">https://doi.org/10.1109/focs57990.2023.00066</a>","ieee":"V. Cohen-Addad, D. Saulpic, and C. Schwiegelshohn, “Deterministic clustering in high dimensional spaces: Sketches and approximation,” in <i>2023 IEEE 64th Annual Symposium on Foundations of Computer Science</i>, Santa Cruz, CA, United States, 2023, pp. 1105–1130."},"status":"public","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2310.04076","open_access":"1"}],"oa_version":"Preprint","quality_controlled":"1","language":[{"iso":"eng"}],"arxiv":1,"doi":"10.1109/focs57990.2023.00066","day":"22","ec_funded":1,"publication_status":"published","department":[{"_id":"MoHe"}],"month":"12","date_updated":"2025-09-09T14:17:59Z","type":"conference","publication_identifier":{"eisbn":["9798350318944"]},"publication":"2023 IEEE 64th Annual Symposium on Foundations of Computer Science","date_created":"2024-01-09T16:20:09Z","abstract":[{"text":"In all state-of-the-art sketching and coreset techniques for clustering, as well as in the best known fixed-parameter tractable approximation algorithms, randomness plays a key role. For the classic k-median and k-means problems, there are no known deterministic dimensionality reduction procedure or coreset construction that avoid an exponential dependency on the input dimension d, the precision parameter $\\varepsilon^{-1}$ or k. Furthermore, there is no coreset construction that succeeds with probability $1-1/n$ and whose size does not depend on the number of input points, n. This has led researchers in the area to ask what is the power of randomness for clustering sketches [Feldman WIREs Data Mining Knowl. Discov’20].Similarly, the best approximation ratio achievable deterministically without a complexity exponential in the dimension are $1+\\sqrt{2}$ for k-median [Cohen-Addad, Esfandiari, Mirrokni, Narayanan, STOC’22] and 6.12903 for k-means [Grandoni, Ostrovsky, Rabani, Schulman, Venkat, Inf. Process. Lett.’22]. Those are the best results, even when allowing a complexity FPT in the number of clusters k: this stands in sharp contrast with the $(1+\\varepsilon)$-approximation achievable in that case, when allowing randomization.In this paper, we provide deterministic sketches constructions for clustering, whose size bounds are close to the best-known randomized ones. We show how to compute a dimension reduction onto $\\varepsilon^{-O(1)} \\log k$ dimensions in time $k^{O\\left(\\varepsilon^{-O(1)}+\\log \\log k\\right)}$ poly $(n d)$, and how to build a coreset of size $O\\left(k^{2} \\log ^{3} k \\varepsilon^{-O(1)}\\right)$ in time $2^{\\varepsilon^{O(1)} k \\log ^{3} k}+k^{O\\left(\\varepsilon^{-O(1)}+\\log \\log k\\right)}$ poly $(n d)$. In the case where k is small, this answers an open question of [Feldman WIDM’20] and [Munteanu and Schwiegelshohn, Künstliche Intell. ’18] on whether it is possible to efficiently compute coresets deterministically.We also construct a deterministic algorithm for computing $(1+$ $\\varepsilon)$-approximation to k-median and k-means in high dimensional Euclidean spaces in time $2^{k^{2} \\log ^{3} k / \\varepsilon^{O(1)}}$ poly $(n d)$, close to the best randomized complexity of $2^{(k / \\varepsilon)^{O(1)}}$ nd (see [Kumar, Sabharwal, Sen, JACM 10] and [Bhattacharya, Jaiswal, Kumar, TCS’18]).Furthermore, our new insights on sketches also yield a randomized coreset construction that uses uniform sampling, that immediately improves over the recent results of [Braverman et al. FOCS ’22] by a factor k.","lang":"eng"}],"oa":1,"conference":{"name":"FOCS: Foundations of Computer Science","start_date":"2023-11-06","location":"Santa Cruz, CA, United States","end_date":"2023-11-09"},"title":"Deterministic clustering in high dimensional spaces: Sketches and approximation","user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345"}]