[{"quality_controlled":"1","file_date_updated":"2026-07-01T06:22:15Z","file":[{"access_level":"open_access","success":1,"date_created":"2026-07-01T06:22:15Z","file_name":"2026_SIAMJourmathDataScience_Zhang.pdf","creator":"dernst","date_updated":"2026-07-01T06:22:15Z","file_size":1210346,"checksum":"5cfd350dc64d1476063e959316dbff65","relation":"main_file","content_type":"application/pdf","file_id":"22230"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","doi":"10.1137/24m1702854","article_type":"original","citation":{"ieee":"Y. Zhang, M. Mondelli, and R. Venkataramanan, “Precise asymptotics for spectral methods in mixed generalized linear models,” <i>SIAM Journal on Mathematics of Data Science</i>, vol. 8, no. 2. Society for Industrial &#38; Applied Mathematics, pp. 411–439, 2026.","chicago":"Zhang, Yihan, Marco Mondelli, and Ramji Venkataramanan. “Precise Asymptotics for Spectral Methods in Mixed Generalized Linear Models.” <i>SIAM Journal on Mathematics of Data Science</i>. Society for Industrial &#38; Applied Mathematics, 2026. <a href=\"https://doi.org/10.1137/24m1702854\">https://doi.org/10.1137/24m1702854</a>.","mla":"Zhang, Yihan, et al. “Precise Asymptotics for Spectral Methods in Mixed Generalized Linear Models.” <i>SIAM Journal on Mathematics of Data Science</i>, vol. 8, no. 2, Society for Industrial &#38; Applied Mathematics, 2026, pp. 411–39, doi:<a href=\"https://doi.org/10.1137/24m1702854\">10.1137/24m1702854</a>.","ista":"Zhang Y, Mondelli M, Venkataramanan R. 2026. Precise asymptotics for spectral methods in mixed generalized linear models. SIAM Journal on Mathematics of Data Science. 8(2), 411–439.","short":"Y. Zhang, M. Mondelli, R. Venkataramanan, SIAM Journal on Mathematics of Data Science 8 (2026) 411–439.","apa":"Zhang, Y., Mondelli, M., &#38; Venkataramanan, R. (2026). Precise asymptotics for spectral methods in mixed generalized linear models. <i>SIAM Journal on Mathematics of Data Science</i>. Society for Industrial &#38; Applied Mathematics. <a href=\"https://doi.org/10.1137/24m1702854\">https://doi.org/10.1137/24m1702854</a>","ama":"Zhang Y, Mondelli M, Venkataramanan R. Precise asymptotics for spectral methods in mixed generalized linear models. <i>SIAM Journal on Mathematics of Data Science</i>. 2026;8(2):411-439. doi:<a href=\"https://doi.org/10.1137/24m1702854\">10.1137/24m1702854</a>"},"volume":8,"_id":"22228","publication_identifier":{"eissn":["2577-0187"]},"publication_status":"published","OA_place":"publisher","intvolume":"         8","day":"01","acknowledgement":"The first and second authors were partially supported by the 2019 Lopez-Loreta prize.","OA_type":"hybrid","has_accepted_license":"1","ddc":["000"],"scopus_import":"1","corr_author":"1","arxiv":1,"date_created":"2026-06-30T13:03:41Z","type":"journal_article","publication":"SIAM Journal on Mathematics of Data Science","oa_version":"Published Version","status":"public","language":[{"iso":"eng"}],"year":"2026","month":"06","page":"411-439","mathsc":["62E20","62J05","62J12"],"oa":1,"project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"}],"PlanS_conform":"1","publisher":"Society for Industrial & Applied Mathematics","abstract":[{"lang":"eng","text":"In a mixed generalized linear model, the goal is to learn multiple signals from unlabeled observations: each sample comes from exactly one signal, but it is not known which one. We consider the prototypical problem of estimating two statistically independent signals in a mixed generalized linear model with Gaussian covariates. Spectral methods are a popular class of estimators which output the top two eigenvectors of a suitable data-dependent matrix. However, despite the wide applicability, their design is still obtained via heuristic considerations, and the number of samples 𝑛 needed to guarantee recovery is superlinear in the signal dimension 𝑑. In this paper, we develop exact asymptotics on spectral methods in the challenging proportional regime in which 𝑛,𝑑 grow large and their ratio converges to a finite constant. This allows us optimize the design of the spectral method, and combine it with a simple linear estimator, to minimize the estimation error. Our characterization exploits a mix of tools from random matrices, free probability, and the theory of approximate message passing algorithms. Numerical simulations for mixed linear regression and phase retrieval demonstrate the advantage enabled by our analysis over existing designs of spectral methods."}],"article_processing_charge":"Yes (in subscription journal)","researchdata_availability":"no","external_id":{"arxiv":["2211.11368"]},"department":[{"_id":"MaMo"}],"date_published":"2026-06-01T00:00:00Z","supplementarymaterial":"no","author":[{"first_name":"Yihan","last_name":"Zhang","full_name":"Zhang, Yihan"},{"full_name":"Mondelli, Marco","orcid":"0000-0002-3242-7020","last_name":"Mondelli","first_name":"Marco","id":"27EB676C-8706-11E9-9510-7717E6697425"},{"full_name":"Venkataramanan, Ramji","last_name":"Venkataramanan","first_name":"Ramji"}],"title":"Precise asymptotics for spectral methods in mixed generalized linear models","keyword":["spectral estimator","generalized linear models","mixed regression","high-dimensional asymptotics","random matrix theory","approximate message passing (AMP)"],"issue":"2","date_updated":"2026-07-01T06:29:52Z","das_tickbox":"0","tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"}},{"language":[{"iso":"eng"}],"page":"169","month":"07","year":"2026","oa":1,"project":[{"name":"Prix Lopez-Loretta 2019 - Marco Mondelli","_id":"059876FA-7A3F-11EA-A408-12923DDC885E"},{"name":"Inference in High Dimensions: Light-speed Algorithms and Information Limits","grant_number":"101161364","_id":"911e6d1f-16d5-11f0-9cad-c5c68c6a1cdf"},{"name":"Improving estimation and prediction of common complex disease risk","grant_number":"PCEGP3_181181","_id":"9B8D11D6-BA93-11EA-9121-9846C619BF3A"}],"degree_awarded":"PhD","publisher":"Institute of Science and Technology Austria","abstract":[{"lang":"eng","text":"Uncovering the genetic architecture of complex traits and pinpointing causal molecular drivers require the ability to distinguish true signals from noise within massive, high-dimensional omics datasets. To extract meaningful biological insights from these datasets, such as identifying causal genetic variants and proteins, scalable and accurate inference methods are essential. To this end, this thesis develops novel Bayesian inference frameworks based on Vector Approximate Message Passing and demonstrates their effectiveness in the modeling of disease onset times and quantitative physical and clinical measures.\r\n\r\nFirst, we introduce gVAMP, a Bayesian framework tailored for Genome-Wide Association Studies that enables the joint modeling of quantitative complex traits across millions of genetic variants. gVAMP demonstrates superior accuracy in variable selection and out-of-sample polygenic risk prediction compared to state-of-the-art approaches. We model human height using 17 million whole-genome sequence variants from the UK Biobank, incorporating a vast number of rare variants and revealing novel associations. gVAMP achieves a prediction accuracy of approximately 46% for human height, representing the highest reported performance for this trait to date. \r\n\r\nSecond, we present vampW, a Bayesian framework for survival analysis applied to proteomic data. By effectively handling right-censoring and complex protein dependencies within the UK Biobank Pharma Proteomics Project dataset, vampW identifies 219 protein associations across 24 disease outcomes, the majority of which are not among the top marginal discoveries. We further adjust protein levels for exponential age effects, yielding 1,308 associations and highlighting the sensitivity of the analysis to the chosen age-correction methodology. Finally, vampW improves upon the variable selection capabilities of the commonly used (penalized) variants of the Cox proportional hazards model and delivers state-of-the-art out-of-sample prediction of disease onset times.\r\n\r\nCollectively, these methods provide powerful tools for dissecting the genetic architecture of complex traits and the proteomic drivers of disease onset. Furthermore, by delivering accurate polygenic risk scores and precise predictions of onset times, this work advances the capabilities of personalized medicine and clinical risk stratification."}],"article_processing_charge":"No","department":[{"_id":"GradSch"},{"_id":"MaRo"},{"_id":"MaMo"}],"author":[{"full_name":"Depope, Al","last_name":"Depope","id":"0b77531d-dbcd-11ea-9d1d-a8eee0bf3830","first_name":"Al"}],"date_published":"2026-07-11T00:00:00Z","date_updated":"2026-07-22T12:47:00Z","title":"From sparse selection to risk prediction : Approximate message passing for proteomic survival models and large-scale genomics","keyword":["Approximate Message Passing","GWAS","Genomics","Proteomics","Survival modeling"],"acknowledged_ssus":[{"_id":"ScienComp"}],"file_date_updated":"2026-07-13T14:56:41Z","user_id":"8b945eb4-e2f2-11eb-945a-df72226e66a9","file":[{"access_level":"open_access","date_created":"2026-07-13T14:52:19Z","creator":"adepope","file_name":"2026_Depope_Al_Thesis.pdf","file_size":25109878,"date_updated":"2026-07-13T14:52:19Z","checksum":"9ab386790515628d957a194f30a7ccb4","content_type":"application/pdf","relation":"main_file","file_id":"22316"},{"checksum":"8ed8fb63f76a695d5b6fec35343f4b90","file_size":1203199939,"date_updated":"2026-07-13T14:56:41Z","file_id":"22317","content_type":"application/zip","relation":"source_file","access_level":"closed","creator":"adepope","file_name":"2026_Depope_Al_Thesis.zip","date_created":"2026-07-13T14:56:41Z"}],"doi":"10.15479/AT-ISTA-22258","citation":{"ieee":"A. Depope, “From sparse selection to risk prediction : Approximate message passing for proteomic survival models and large-scale genomics,” Institute of Science and Technology Austria, 2026.","chicago":"Depope, Al. “From Sparse Selection to Risk Prediction : Approximate Message Passing for Proteomic Survival Models and Large-Scale Genomics.” Institute of Science and Technology Austria, 2026. <a href=\"https://doi.org/10.15479/AT-ISTA-22258\">https://doi.org/10.15479/AT-ISTA-22258</a>.","mla":"Depope, Al. <i>From Sparse Selection to Risk Prediction : Approximate Message Passing for Proteomic Survival Models and Large-Scale Genomics</i>. Institute of Science and Technology Austria, 2026, doi:<a href=\"https://doi.org/10.15479/AT-ISTA-22258\">10.15479/AT-ISTA-22258</a>.","short":"A. Depope, From Sparse Selection to Risk Prediction : Approximate Message Passing for Proteomic Survival Models and Large-Scale Genomics, Institute of Science and Technology Austria, 2026.","apa":"Depope, A. (2026). <i>From sparse selection to risk prediction : Approximate message passing for proteomic survival models and large-scale genomics</i>. Institute of Science and Technology Austria. <a href=\"https://doi.org/10.15479/AT-ISTA-22258\">https://doi.org/10.15479/AT-ISTA-22258</a>","ista":"Depope A. 2026. From sparse selection to risk prediction : Approximate message passing for proteomic survival models and large-scale genomics. Institute of Science and Technology Austria.","ama":"Depope A. From sparse selection to risk prediction : Approximate message passing for proteomic survival models and large-scale genomics. 2026. doi:<a href=\"https://doi.org/10.15479/AT-ISTA-22258\">10.15479/AT-ISTA-22258</a>"},"publication_identifier":{"issn":["2663-337X"]},"_id":"22258","day":"11","OA_place":"publisher","publication_status":"published","doi_confirm":"1","supervisor":[{"full_name":"Robinson, Matthew Richard","orcid":"0000-0001-8982-8813","last_name":"Robinson","first_name":"Matthew Richard","id":"E5D42276-F5DA-11E9-8E24-6303E6697425"},{"full_name":"Mondelli, Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco","last_name":"Mondelli","orcid":"0000-0002-3242-7020"}],"ddc":["576","610","006"],"acknowledgement":"This work was supported in part by the Swiss National Science Foundation through the\r\nEccellenza Grant \"Improving estimation and prediction of common complex disease risk\"\r\n(grant number PCEGP3_181181); the European Research Council through the grant\r\n\"Inference in High Dimensions: Light-speed Algorithms and Information Limits\" (grant\r\nnumber 101161364); and the Fondation Jean-Jacques et Felicia Lopez-Loreta through the\r\nPrix Lopez-Loretta 2019.\r\n","has_accepted_license":"1","related_material":{"record":[{"relation":"part_of_dissertation","id":"21488","status":"public"}]},"corr_author":"1","date_created":"2026-07-10T13:27:20Z","type":"dissertation","alternative_title":["ISTA Thesis"],"oa_version":"Published Version","status":"public"},{"DOAJ_listed":"1","tmp":{"short":"CC BY-NC-ND (4.0)","image":"/images/cc_by_nc_nd.png","name":"Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International (CC BY-NC-ND 4.0)","legal_code_url":"https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode"},"article_processing_charge":"Yes","department":[{"_id":"MaMo"},{"_id":"MaRo"}],"main_file_link":[{"url":"https://doi.org/10.1016/j.xgen.2026.101162","open_access":"1"}],"author":[{"first_name":"Al","id":"0b77531d-dbcd-11ea-9d1d-a8eee0bf3830","last_name":"Depope","full_name":"Depope, Al"},{"id":"b995e25b-8c4b-11ed-a6d8-f71b7bcd6122","first_name":"Jakub","last_name":"Bajzik","full_name":"Bajzik, Jakub"},{"first_name":"Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","last_name":"Mondelli","orcid":"0000-0002-3242-7020","full_name":"Mondelli, Marco"},{"full_name":"Robinson, Matthew Richard","last_name":"Robinson","orcid":"0000-0001-8982-8813","first_name":"Matthew Richard","id":"E5D42276-F5DA-11E9-8E24-6303E6697425"}],"date_published":"2026-02-18T00:00:00Z","title":"Joint modeling of whole-genome sequencing data for human height via approximate message passing","date_updated":"2026-07-22T12:47:00Z","project":[{"name":"Prix Lopez-Loretta 2019 - Marco Mondelli","_id":"059876FA-7A3F-11EA-A408-12923DDC885E"},{"name":"Inference in High Dimensions: Light-speed Algorithms and Information Limits","grant_number":"101161364","_id":"911e6d1f-16d5-11f0-9cad-c5c68c6a1cdf"},{"name":"Improving estimation and prediction of common complex disease risk","grant_number":"PCEGP3_181181","_id":"9B8D11D6-BA93-11EA-9121-9846C619BF3A"}],"publisher":"Elsevier","abstract":[{"text":"Human height is a model for the genetic analysis of complex traits, and recent studies suggest the presence of thousands of common genetic variant associations and hundreds of low-frequency/rare variants. Here, we develop a new algorithmic paradigm based on approximate message passing (genomic vector approximate message passing [gVAMP]) for identifying DNA sequence variants associated with complex traits and common diseases in large-scale whole-genome sequencing (WGS) data. We show that gVAMP accurately localizes associations to variants with the correct frequency and position in the DNA, outperforming existing fine-mapping methods in selecting the appropriate genetic variants within WGS data. We then apply gVAMP to jointly model the relationship of tens of millions of WGS variants with human height in hundreds of thousands of UK Biobank individuals. We identify 59 rare variants and gene burden scores alongside many hundreds of DNA regions containing common variant associations and show that understanding the genetic basis of complex traits will require the joint analysis of hundreds of millions of variables measured on millions of people. The polygenic risk scores obtained from gVAMP have high accuracy (including a prediction accuracy of ∼46% for human height) and outperform current methods for downstream tasks such as mixed linear model association testing across 13 UK Biobank traits. In conclusion, gVAMP offers a scalable foundation for a wider range of analyses in WGS data.","lang":"eng"}],"language":[{"iso":"eng"}],"month":"02","year":"2026","oa":1,"related_material":{"record":[{"status":"public","id":"22258","relation":"dissertation_contains"}],"link":[{"relation":"press_release","description":"News on ISTA website","url":"https://ista.ac.at/en/news/big-data-and-human-height/"}]},"corr_author":"1","date_created":"2026-03-23T15:10:03Z","type":"journal_article","status":"public","oa_version":"Published Version","publication":"Cell Genomics","publication_identifier":{"eissn":["2666-979X"]},"_id":"21488","OA_place":"publisher","day":"18","publication_status":"epub_ahead","ddc":["000","570"],"acknowledgement":"We thank Malgorzata Borczyk for creating the gene burden scores. We thank Robin Beaumont, Amedeo Roberto Esposito, Gareth Hawkes, Philip Schniter, Matthew Stephens, Pragya Sur, Peter Visscher, Michael Weedon, and Harry Wright for providing valuable suggestions and comments on earlier versions of the work. This project was funded by a Lopez-Loreta Prize to M.M., an SNSF Eccellenza Grant to M.R.R. (PCEGP3-181181), an ERC Starting Grant to M.M. (INF2, project number 101161364), and core funding from ISTA. High-performance computing was supported by the Scientific Service Units (SSU) of ISTA through resources provided by Scientific Computing (SciComp). We would like to acknowledge the participants and investigators of the UK Biobank study. We gratefully acknowledge the All of Us participants for their contributions, without whom this research would not have been possible. We also thank the National Institutes of Health All of Us Research Program for making available the participant data (and/or samples and/or cohort) examined in this study.","has_accepted_license":"1","OA_type":"gold","doi":"10.1016/j.xgen.2026.101162","article_number":"101162","article_type":"original","license":"https://creativecommons.org/licenses/by-nc-nd/4.0/","citation":{"chicago":"Depope, Al, Jakub Bajzik, Marco Mondelli, and Matthew Richard Robinson. “Joint Modeling of Whole-Genome Sequencing Data for Human Height via Approximate Message Passing.” <i>Cell Genomics</i>. Elsevier, 2026. <a href=\"https://doi.org/10.1016/j.xgen.2026.101162\">https://doi.org/10.1016/j.xgen.2026.101162</a>.","ieee":"A. Depope, J. Bajzik, M. Mondelli, and M. R. Robinson, “Joint modeling of whole-genome sequencing data for human height via approximate message passing,” <i>Cell Genomics</i>. Elsevier, 2026.","ama":"Depope A, Bajzik J, Mondelli M, Robinson MR. Joint modeling of whole-genome sequencing data for human height via approximate message passing. <i>Cell Genomics</i>. 2026. doi:<a href=\"https://doi.org/10.1016/j.xgen.2026.101162\">10.1016/j.xgen.2026.101162</a>","apa":"Depope, A., Bajzik, J., Mondelli, M., &#38; Robinson, M. R. (2026). Joint modeling of whole-genome sequencing data for human height via approximate message passing. <i>Cell Genomics</i>. Elsevier. <a href=\"https://doi.org/10.1016/j.xgen.2026.101162\">https://doi.org/10.1016/j.xgen.2026.101162</a>","ista":"Depope A, Bajzik J, Mondelli M, Robinson MR. 2026. Joint modeling of whole-genome sequencing data for human height via approximate message passing. Cell Genomics., 101162.","short":"A. Depope, J. Bajzik, M. Mondelli, M.R. Robinson, Cell Genomics (2026).","mla":"Depope, Al, et al. “Joint Modeling of Whole-Genome Sequencing Data for Human Height via Approximate Message Passing.” <i>Cell Genomics</i>, 101162, Elsevier, 2026, doi:<a href=\"https://doi.org/10.1016/j.xgen.2026.101162\">10.1016/j.xgen.2026.101162</a>."},"quality_controlled":"1","user_id":"ba8df636-2132-11f1-aed0-ed93e2281fdd"},{"tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"date_updated":"2026-02-19T08:08:55Z","title":"Spurious correlations in high dimensional regression: The roles of regularization, simplicity bias and over-parameterization","date_published":"2025-07-30T00:00:00Z","author":[{"full_name":"Bombari, Simone","last_name":"Bombari","id":"ca726dda-de17-11ea-bc14-f9da834f63aa","first_name":"Simone"},{"full_name":"Mondelli, Marco","last_name":"Mondelli","orcid":"0000-0002-3242-7020","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco"}],"department":[{"_id":"MaMo"}],"article_processing_charge":"No","external_id":{"arxiv":["2502.01347"]},"abstract":[{"text":"Learning models have been shown to rely on spurious correlations between non-predictive features and the associated labels in the training data, with negative implications on robustness, bias and fairness. In this work, we provide a statistical characterization of this phenomenon for high-dimensional regression, when the data contains a predictive core feature x and a spurious feature y. Specifically, we quantify the amount of spurious correlations C learned via linear regression, in terms of the data covariance and the strength λ of the ridge regularization. As a consequence, we first capture the simplicity of y through the spectrum of its covariance, and its correlation with x through the Schur complement of the full data covariance. Next, we prove a trade-off between C and the in-distribution test loss L, by showing that the value of λ that minimizes L lies in an interval where C is increasing. Finally, we investigate the effects of over-parameterization via the random features model, by showing its equivalence to regularized linear regression. Our theoretical results are supported by numerical experiments on Gaussian, Color-MNIST, and CIFAR-10 datasets.","lang":"eng"}],"publisher":"ML Research Press","project":[{"_id":"911e6d1f-16d5-11f0-9cad-c5c68c6a1cdf","grant_number":"101161364","name":"Inference in High Dimensions: Light-speed Algorithms and Information Limits"},{"name":"Trustworthy Deep Learning Theory: Private Over-Parameterized Models and Robust LLMs","_id":"92099302-16d5-11f0-9cad-f9a785f54fbd"}],"oa":1,"year":"2025","page":"4839-4873","month":"07","language":[{"iso":"eng"}],"oa_version":"Published Version","publication":"Proceedings of the 42nd International Conference on Machine Learning","status":"public","type":"conference","alternative_title":["PMLR"],"conference":{"location":"Vancouver, Canada","start_date":"2025-07-13","name":"ICML: International Conference on Machine Learning","end_date":"2025-07-19"},"date_created":"2026-02-18T11:58:00Z","arxiv":1,"corr_author":"1","acknowledgement":"Marco Mondelli is funded by the European Union (ERC, INF2, project number 101161364). Views and opinions expressed are however those of the author(s) only and do not necessarily reflect those of the European Union or the European Research Council Executive Agency. Neither the European Union nor the granting authority can be held responsible for them. Simone Bombari is supported by a Google PhD fellowship. The authors would like to thank GuanWen Qiu for helpful discussions.","has_accepted_license":"1","OA_type":"gold","ddc":["000"],"publication_status":"published","OA_place":"publisher","intvolume":"       267","day":"30","_id":"21324","publication_identifier":{"eissn":["2640-3498"]},"volume":267,"citation":{"mla":"Bombari, Simone, and Marco Mondelli. “Spurious Correlations in High Dimensional Regression: The Roles of Regularization, Simplicity Bias and over-Parameterization.” <i>Proceedings of the 42nd International Conference on Machine Learning</i>, vol. 267, ML Research Press, 2025, pp. 4839–73.","apa":"Bombari, S., &#38; Mondelli, M. (2025). Spurious correlations in high dimensional regression: The roles of regularization, simplicity bias and over-parameterization. In <i>Proceedings of the 42nd International Conference on Machine Learning</i> (Vol. 267, pp. 4839–4873). Vancouver, Canada: ML Research Press.","ista":"Bombari S, Mondelli M. 2025. Spurious correlations in high dimensional regression: The roles of regularization, simplicity bias and over-parameterization. Proceedings of the 42nd International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 267, 4839–4873.","short":"S. Bombari, M. Mondelli, in:, Proceedings of the 42nd International Conference on Machine Learning, ML Research Press, 2025, pp. 4839–4873.","ama":"Bombari S, Mondelli M. Spurious correlations in high dimensional regression: The roles of regularization, simplicity bias and over-parameterization. In: <i>Proceedings of the 42nd International Conference on Machine Learning</i>. Vol 267. ML Research Press; 2025:4839-4873.","ieee":"S. Bombari and M. Mondelli, “Spurious correlations in high dimensional regression: The roles of regularization, simplicity bias and over-parameterization,” in <i>Proceedings of the 42nd International Conference on Machine Learning</i>, Vancouver, Canada, 2025, vol. 267, pp. 4839–4873.","chicago":"Bombari, Simone, and Marco Mondelli. “Spurious Correlations in High Dimensional Regression: The Roles of Regularization, Simplicity Bias and over-Parameterization.” In <i>Proceedings of the 42nd International Conference on Machine Learning</i>, 267:4839–73. ML Research Press, 2025."},"file":[{"file_id":"21335","relation":"main_file","content_type":"application/pdf","checksum":"d4ba4f7717b362ca38878f45e57bd643","date_updated":"2026-02-19T08:04:38Z","file_size":887526,"file_name":"2025_ICML_Bombari.pdf","creator":"dernst","date_created":"2026-02-19T08:04:38Z","success":1,"access_level":"open_access"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","file_date_updated":"2026-02-19T08:04:38Z","quality_controlled":"1"},{"external_id":{"pmid":["41321376"]},"article_processing_charge":"No","department":[{"_id":"MaMo"}],"author":[{"last_name":"Gozeten","first_name":"Halil Alperen","full_name":"Gozeten, Halil Alperen"},{"full_name":"Ildiz, Muhammed Emrullah","last_name":"Ildiz","first_name":"Muhammed Emrullah"},{"first_name":"Xuechen","last_name":"Zhang","full_name":"Zhang, Xuechen"},{"first_name":"Mahdi","last_name":"Soltanolkotabi","full_name":"Soltanolkotabi, Mahdi"},{"id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco","last_name":"Mondelli","orcid":"0000-0002-3242-7020","full_name":"Mondelli, Marco"},{"full_name":"Oymak, Samet","first_name":"Samet","last_name":"Oymak"}],"date_published":"2025-11-30T00:00:00Z","title":"Test-time training provably improves transformers as in-context learners","date_updated":"2026-02-19T08:18:24Z","tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"language":[{"iso":"eng"}],"year":"2025","page":"20266-20295","month":"11","oa":1,"project":[{"_id":"911e6d1f-16d5-11f0-9cad-c5c68c6a1cdf","name":"Inference in High Dimensions: Light-speed Algorithms and Information Limits","grant_number":"101161364"}],"publisher":"ML Research Press","abstract":[{"text":"Test-time training (TTT) methods explicitly update the weights of a model to adapt to the specific test instance, and they have found success in a variety of settings, including most recently language modeling and reasoning. To demystify this success, we investigate a gradient-based TTT algorithm for in-context learning, where we train a transformer model on the in-context demonstrations provided in the test prompt. Specifically, we provide a comprehensive theoretical characterization of linear transformers when the update rule is a single gradient step. Our theory (i) delineates the role of alignment between pretraining distribution and target task, (ii) demystifies how TTT can alleviate distribution shift, and (iii) quantifies the sample complexity of TTT including how it can significantly reduce the eventual sample size required for in-context learning. As our empirical contribution, we study the benefits of TTT for TabPFN, a tabular foundation model. In line with our theory, we demonstrate that TTT significantly reduces the required sample size for tabular classification (3 to 5 times fewer) unlocking substantial inference efficiency with a negligible training cost.","lang":"eng"}],"publication_identifier":{"eissn":["2640-3498"]},"_id":"21325","day":"30","intvolume":"       267","OA_place":"publisher","publication_status":"published","ddc":["000"],"has_accepted_license":"1","OA_type":"gold","acknowledgement":"H.A.G., M.E.I., X.Z., and S.O. were supported in part by the NSF grants CCF2046816, CCF-2403075, CCF-2008020, and the Office of Naval Research grant N000142412289.\r\nM. M. is funded by the European Union (ERC, INF2 , project number 101161364). Views and opinions expressed are, however, those of the author(s) only and do not necessarily\r\nreflect those of the European Union or the European Research Council Executive Agency. Neither the European Union nor the granting authority can be held responsible for them. M.S. is supported by the Packard Fellowship in Science and Engineering, a Sloan Research Fellowship in Mathematics, an NSF-CAREER under award #1846369, DARPA FastNICS program, and NSF-CIF awards #1813877 and #2008443, and NIH DP2LM014564-01. The authors also\r\nacknowledge further support from Open Philanthropy, OpenAI, Amazon Research, Google Research, and Microsoft Research.","pmid":1,"date_created":"2026-02-18T12:00:44Z","conference":{"location":"Vancouver, Canada","name":"ICML: International Conference on Machine Learning","start_date":"2025-07-13","end_date":"2025-07-19"},"alternative_title":["PMLR"],"type":"conference","publication":"Proceedings of the 42nd International Conference on Machine Learning","status":"public","oa_version":"Published Version","quality_controlled":"1","file_date_updated":"2026-02-19T08:15:48Z","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","file":[{"access_level":"open_access","success":1,"date_created":"2026-02-19T08:15:48Z","file_name":"2025_ICML_Gozeten.pdf","creator":"dernst","date_updated":"2026-02-19T08:15:48Z","file_size":471176,"checksum":"f774f8619a0d72f3975d9cb23942a1e9","relation":"main_file","content_type":"application/pdf","file_id":"21336"}],"volume":267,"citation":{"ieee":"H. A. Gozeten, M. E. Ildiz, X. Zhang, M. Soltanolkotabi, M. Mondelli, and S. Oymak, “Test-time training provably improves transformers as in-context learners,” in <i>Proceedings of the 42nd International Conference on Machine Learning</i>, Vancouver, Canada, 2025, vol. 267, pp. 20266–20295.","chicago":"Gozeten, Halil Alperen, Muhammed Emrullah Ildiz, Xuechen Zhang, Mahdi Soltanolkotabi, Marco Mondelli, and Samet Oymak. “Test-Time Training Provably Improves Transformers as in-Context Learners.” In <i>Proceedings of the 42nd International Conference on Machine Learning</i>, 267:20266–95. ML Research Press, 2025.","mla":"Gozeten, Halil Alperen, et al. “Test-Time Training Provably Improves Transformers as in-Context Learners.” <i>Proceedings of the 42nd International Conference on Machine Learning</i>, vol. 267, ML Research Press, 2025, pp. 20266–95.","short":"H.A. Gozeten, M.E. Ildiz, X. Zhang, M. Soltanolkotabi, M. Mondelli, S. Oymak, in:, Proceedings of the 42nd International Conference on Machine Learning, ML Research Press, 2025, pp. 20266–20295.","apa":"Gozeten, H. A., Ildiz, M. E., Zhang, X., Soltanolkotabi, M., Mondelli, M., &#38; Oymak, S. (2025). Test-time training provably improves transformers as in-context learners. In <i>Proceedings of the 42nd International Conference on Machine Learning</i> (Vol. 267, pp. 20266–20295). Vancouver, Canada: ML Research Press.","ista":"Gozeten HA, Ildiz ME, Zhang X, Soltanolkotabi M, Mondelli M, Oymak S. 2025. Test-time training provably improves transformers as in-context learners. Proceedings of the 42nd International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 267, 20266–20295.","ama":"Gozeten HA, Ildiz ME, Zhang X, Soltanolkotabi M, Mondelli M, Oymak S. Test-time training provably improves transformers as in-context learners. In: <i>Proceedings of the 42nd International Conference on Machine Learning</i>. Vol 267. ML Research Press; 2025:20266-20295."}},{"oa_version":"Published Version","status":"public","publication":"Proceedings of the 42nd International Conference on Machine Learning","conference":{"end_date":"2025-07-19","location":"Vancouver, Canada","start_date":"2025-07-13","name":"ICML: International Conference on Machine Learning"},"type":"conference","alternative_title":["PMLR"],"arxiv":1,"date_created":"2026-02-18T12:02:45Z","corr_author":"1","ddc":["000"],"OA_type":"gold","acknowledgement":"This research was funded in whole or in part by the Austrian Science Fund (FWF) 10.55776/COE12. For the purpose of open access, the authors have applied a CC BY public\r\ncopyright license to any Author Accepted Manuscript version arising from this submission. The authors would like to thank Peter Sukenık for general helpful discussions and for pointing out that all the stationary points are approximately proportional in the case without entropic regularization. ","has_accepted_license":"1","OA_place":"publisher","day":"30","intvolume":"       267","publication_status":"published","publication_identifier":{"eissn":["2640-3498"]},"_id":"21326","volume":267,"citation":{"chicago":"Wu, Diyuan, and Marco Mondelli. “Neural Collapse beyond the Unconstrained Features Model: Landscape, Dynamics, and Generalization in the Mean-Field Regime.” In <i>Proceedings of the 42nd International Conference on Machine Learning</i>, 267:67499–536. ML Research Press, 2025.","ieee":"D. Wu and M. Mondelli, “Neural collapse beyond the unconstrained features model: Landscape, dynamics, and generalization in the mean-field regime,” in <i>Proceedings of the 42nd International Conference on Machine Learning</i>, Vancouver, Canada, 2025, vol. 267, pp. 67499–67536.","mla":"Wu, Diyuan, and Marco Mondelli. “Neural Collapse beyond the Unconstrained Features Model: Landscape, Dynamics, and Generalization in the Mean-Field Regime.” <i>Proceedings of the 42nd International Conference on Machine Learning</i>, vol. 267, ML Research Press, 2025, pp. 67499–536.","ama":"Wu D, Mondelli M. Neural collapse beyond the unconstrained features model: Landscape, dynamics, and generalization in the mean-field regime. In: <i>Proceedings of the 42nd International Conference on Machine Learning</i>. Vol 267. ML Research Press; 2025:67499-67536.","short":"D. Wu, M. Mondelli, in:, Proceedings of the 42nd International Conference on Machine Learning, ML Research Press, 2025, pp. 67499–67536.","ista":"Wu D, Mondelli M. 2025. Neural collapse beyond the unconstrained features model: Landscape, dynamics, and generalization in the mean-field regime. Proceedings of the 42nd International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 267, 67499–67536.","apa":"Wu, D., &#38; Mondelli, M. (2025). Neural collapse beyond the unconstrained features model: Landscape, dynamics, and generalization in the mean-field regime. In <i>Proceedings of the 42nd International Conference on Machine Learning</i> (Vol. 267, pp. 67499–67536). Vancouver, Canada: ML Research Press."},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","file":[{"access_level":"open_access","date_created":"2026-02-19T08:28:22Z","success":1,"file_name":"2025_ICML_Wu.pdf","creator":"dernst","date_updated":"2026-02-19T08:28:22Z","file_size":3994385,"checksum":"c5ce8b1c83e33dc3a11122f4910deb67","relation":"main_file","content_type":"application/pdf","file_id":"21337"}],"file_date_updated":"2026-02-19T08:28:22Z","quality_controlled":"1","tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"date_updated":"2026-02-19T08:30:42Z","title":"Neural collapse beyond the unconstrained features model: Landscape, dynamics, and generalization in the mean-field regime","author":[{"full_name":"Wu, Diyuan","first_name":"Diyuan","id":"1a5914c2-896a-11ed-bdf8-fb80621a0635","last_name":"Wu"},{"full_name":"Mondelli, Marco","first_name":"Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","last_name":"Mondelli","orcid":"0000-0002-3242-7020"}],"date_published":"2025-07-30T00:00:00Z","department":[{"_id":"MaMo"}],"external_id":{"arxiv":["2501.19104"]},"article_processing_charge":"No","abstract":[{"text":"Neural Collapse is a phenomenon where the last-layer representations of a well-trained neural network converge to a highly structured geometry. In this paper, we focus on its first (and most basic) property, known as NC1: the within-class variability vanishes. While prior theoretical studies establish the occurrence of NC1 via the data-agnostic unconstrained features model, our work adopts a data-specific perspective, analyzing NC1 in a three-layer neural network, with the first two layers operating in the mean-field regime and followed by a linear layer. In particular, we establish a fundamental connection between NC1 and the loss landscape: we prove that points with small empirical loss and gradient norm (thus, close to being stationary) approximately satisfy NC1, and the closeness to NC1 is controlled by the residual loss and gradient norm. We then show that (i) gradient flow on the mean squared error converges to NC1 solutions with small empirical loss, and (ii) for well-separated data distributions, both NC1 and vanishing test loss are achieved simultaneously. This aligns with the empirical observation that NC1 emerges during training while models attain near-zero test error. Overall, our results demonstrate that NC1 arises from gradient training due to the properties of the loss landscape, and they show the co-occurrence of NC1 and small test error for certain data distributions.","lang":"eng"}],"publisher":"ML Research Press","oa":1,"page":"67499-67536","month":"07","year":"2025","language":[{"iso":"eng"}]},{"tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"date_published":"2025-07-01T00:00:00Z","author":[{"id":"d0258e7b-50b8-11ef-ad56-8b9f537b6b1b","first_name":"Filip","last_name":"Kovačević","full_name":"Kovačević, Filip"},{"full_name":"Yihan, Zhang","last_name":"Yihan","first_name":"Zhang"},{"full_name":"Mondelli, Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco","orcid":"0000-0002-3242-7020","last_name":"Mondelli"}],"date_updated":"2026-02-19T09:03:53Z","title":"Spectral estimators for multi-index models: Precise asymptotics and optimal weak recovery","article_processing_charge":"No","external_id":{"arxiv":["2502.01583"]},"department":[{"_id":"MaMo"}],"publisher":"ML Research Press","abstract":[{"text":"Multi-index models provide a popular framework to investigate the learnability of functions with low-dimensional structure and, also due to their connections with neural networks, they have been object of recent intensive study. In this paper, we focus on recovering the subspace spanned by the signals via spectral estimators – a family of methods routinely used in practice, often as a warm-start for iterative algorithms. Our main technical contribution is a precise asymptotic characterization of the performance of spectral methods, when sample size and input dimension grow proportionally and the dimension p of the space to recover is fixed. Specifically, we locate the top-p eigenvalues of the spectral matrix and establish the overlaps between the corresponding eigenvectors (which give the spectral estimators) and a basis of the signal subspace. Our analysis unveils a phase transition phenomenon in which, as the sample complexity grows, eigenvalues escape from the bulk of the spectrum and, when that happens, eigenvectors recover directions of the desired subspace. The precise characterization we put forward enables the optimization of the data preprocessing, thus allowing to identify the spectral estimator that requires the minimal sample size for weak recovery.","lang":"eng"}],"project":[{"grant_number":"101161364","name":"Inference in High Dimensions: Light-speed Algorithms and Information Limits","_id":"911e6d1f-16d5-11f0-9cad-c5c68c6a1cdf"}],"oa":1,"language":[{"iso":"eng"}],"year":"2025","page":"3354-3404","month":"07","type":"conference","alternative_title":["PMLR"],"conference":{"start_date":"2025-06-30","name":"COLT: Conference on Learning Theory","location":"Lyon, France","end_date":"2025-07-04"},"status":"public","publication":"Proceedings of 38th Conference on Learning Theory","oa_version":"Published Version","corr_author":"1","date_created":"2026-02-18T12:12:47Z","arxiv":1,"has_accepted_license":"1","acknowledgement":"This work was done when Y. Z. was at the Institute of Science and Technology Austria. Y. Z. and\r\nM. M. are funded by the European Union (ERC, INF2, project number 101161364). Views and\r\nopinions expressed are however those of the author(s) only and do not necessarily reflect those of the European Union or the European Research Council Executive Agency. Neither the European Union nor the granting authority can be held responsible for them. The authors would like to acknowledge (in alphabetical order) discussions with Yatin Dandi, Leonardo Defilippis and Bruno Loureiro concerning their parallel work (Defilippis et al., 2025).","OA_type":"gold","ddc":["000"],"scopus_import":"1","_id":"21328","publication_identifier":{"eissn":["2640-3498"]},"publication_status":"published","intvolume":"       291","OA_place":"publisher","day":"01","volume":291,"citation":{"ieee":"F. Kovačević, Z. Yihan, and M. Mondelli, “Spectral estimators for multi-index models: Precise asymptotics and optimal weak recovery,” in <i>Proceedings of 38th Conference on Learning Theory</i>, Lyon, France, 2025, vol. 291, pp. 3354–3404.","chicago":"Kovačević, Filip, Zhang Yihan, and Marco Mondelli. “Spectral Estimators for Multi-Index Models: Precise Asymptotics and Optimal Weak Recovery.” In <i>Proceedings of 38th Conference on Learning Theory</i>, 291:3354–3404. ML Research Press, 2025.","short":"F. Kovačević, Z. Yihan, M. Mondelli, in:, Proceedings of 38th Conference on Learning Theory, ML Research Press, 2025, pp. 3354–3404.","apa":"Kovačević, F., Yihan, Z., &#38; Mondelli, M. (2025). Spectral estimators for multi-index models: Precise asymptotics and optimal weak recovery. In <i>Proceedings of 38th Conference on Learning Theory</i> (Vol. 291, pp. 3354–3404). Lyon, France: ML Research Press.","ista":"Kovačević F, Yihan Z, Mondelli M. 2025. Spectral estimators for multi-index models: Precise asymptotics and optimal weak recovery. Proceedings of 38th Conference on Learning Theory. COLT: Conference on Learning Theory, PMLR, vol. 291, 3354–3404.","ama":"Kovačević F, Yihan Z, Mondelli M. Spectral estimators for multi-index models: Precise asymptotics and optimal weak recovery. In: <i>Proceedings of 38th Conference on Learning Theory</i>. Vol 291. ML Research Press; 2025:3354-3404.","mla":"Kovačević, Filip, et al. “Spectral Estimators for Multi-Index Models: Precise Asymptotics and Optimal Weak Recovery.” <i>Proceedings of 38th Conference on Learning Theory</i>, vol. 291, ML Research Press, 2025, pp. 3354–404."},"file":[{"file_id":"21339","relation":"main_file","content_type":"application/pdf","checksum":"19aa70ab4f57fb9067b6ebb99a5fd6f0","date_updated":"2026-02-19T09:03:43Z","file_size":844611,"file_name":"2025_LearningTheory_Kovacevic.pdf","creator":"dernst","success":1,"date_created":"2026-02-19T09:03:43Z","access_level":"open_access"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","quality_controlled":"1","file_date_updated":"2026-02-19T09:03:43Z"},{"project":[{"name":"Prix Lopez-Loretta 2019 - Marco Mondelli","_id":"059876FA-7A3F-11EA-A408-12923DDC885E"}],"abstract":[{"text":"We consider a prototypical problem of Bayesian inference for a structured spiked model: a low-rank signal is corrupted by additive noise. While both information-theoretic and algorithmic limits are well understood when the noise is a Gaussian Wigner matrix, the more realistic case of structured noise still remains challenging. To capture the structure while maintaining mathematical tractability, a line of work has focused on rotationally invariant noise. However, existing studies either provide suboptimal algorithms or are limited to a special class of noise ensembles. In this paper, using tools from statistical physics (replica method) and random matrix theory (generalized spherical integrals) we establish the characterization of the information-theoretic limits for a noise matrix drawn from a general trace ensemble. Remarkably, our analysis unveils the asymptotic equivalence between the rotationally invariant model and a surrogate Gaussian one. Finally, we show how to saturate the predicted statistical limits using an efficient algorithm inspired by the theory of adaptive Thouless-Anderson-Palmer (TAP) equations.","lang":"eng"}],"publisher":"American Physical Society","year":"2025","month":"01","language":[{"iso":"eng"}],"oa":1,"tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"DOAJ_listed":"1","department":[{"_id":"MaMo"}],"article_processing_charge":"Yes","external_id":{"arxiv":["2405.20993"]},"date_updated":"2026-05-06T12:57:36Z","title":"Information limits and Thouless-Anderson-Palmer equations for spiked matrix models with structured noise","date_published":"2025-01-22T00:00:00Z","author":[{"last_name":"Barbier","first_name":"Jean","full_name":"Barbier, Jean"},{"first_name":"Francesco","last_name":"Camilli","full_name":"Camilli, Francesco"},{"last_name":"Xu","first_name":"Yizhou","full_name":"Xu, Yizhou"},{"first_name":"Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","last_name":"Mondelli","orcid":"0000-0002-3242-7020","full_name":"Mondelli, Marco"}],"doi":"10.1103/PhysRevResearch.7.013081","citation":{"mla":"Barbier, Jean, et al. “Information Limits and Thouless-Anderson-Palmer Equations for Spiked Matrix Models with Structured Noise.” <i>Physical Review Research</i>, vol. 7, 013081, American Physical Society, 2025, doi:<a href=\"https://doi.org/10.1103/PhysRevResearch.7.013081\">10.1103/PhysRevResearch.7.013081</a>.","ista":"Barbier J, Camilli F, Xu Y, Mondelli M. 2025. Information limits and Thouless-Anderson-Palmer equations for spiked matrix models with structured noise. Physical Review Research. 7, 013081.","short":"J. Barbier, F. Camilli, Y. Xu, M. Mondelli, Physical Review Research 7 (2025).","apa":"Barbier, J., Camilli, F., Xu, Y., &#38; Mondelli, M. (2025). Information limits and Thouless-Anderson-Palmer equations for spiked matrix models with structured noise. <i>Physical Review Research</i>. American Physical Society. <a href=\"https://doi.org/10.1103/PhysRevResearch.7.013081\">https://doi.org/10.1103/PhysRevResearch.7.013081</a>","ama":"Barbier J, Camilli F, Xu Y, Mondelli M. Information limits and Thouless-Anderson-Palmer equations for spiked matrix models with structured noise. <i>Physical Review Research</i>. 2025;7. doi:<a href=\"https://doi.org/10.1103/PhysRevResearch.7.013081\">10.1103/PhysRevResearch.7.013081</a>","ieee":"J. Barbier, F. Camilli, Y. Xu, and M. Mondelli, “Information limits and Thouless-Anderson-Palmer equations for spiked matrix models with structured noise,” <i>Physical Review Research</i>, vol. 7. American Physical Society, 2025.","chicago":"Barbier, Jean, Francesco Camilli, Yizhou Xu, and Marco Mondelli. “Information Limits and Thouless-Anderson-Palmer Equations for Spiked Matrix Models with Structured Noise.” <i>Physical Review Research</i>. American Physical Society, 2025. <a href=\"https://doi.org/10.1103/PhysRevResearch.7.013081\">https://doi.org/10.1103/PhysRevResearch.7.013081</a>."},"volume":7,"article_type":"original","article_number":"013081","file_date_updated":"2025-02-03T08:27:59Z","quality_controlled":"1","file":[{"file_id":"18988","relation":"main_file","content_type":"application/pdf","checksum":"52c5f72d80ffc928542469114fcdb62b","date_updated":"2025-02-03T08:27:59Z","file_size":702543,"file_name":"2025_PhysReviewResearch_Barbier.pdf","creator":"dernst","success":1,"date_created":"2025-02-03T08:27:59Z","access_level":"open_access"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","arxiv":1,"APC_amount":"3272,21 EUR","date_created":"2025-02-02T23:01:54Z","related_material":{"link":[{"url":"https://github.com/xu-yz19/spiked-matrix-models-with-structured-noise","relation":"software"}]},"status":"public","oa_version":"Published Version","publication":"Physical Review Research","type":"journal_article","publication_status":"published","OA_place":"publisher","day":"22","intvolume":"         7","_id":"18986","publication_identifier":{"issn":["2643-1564"]},"scopus_import":"1","acknowledgement":"J.B., F.C., and Y.X. were funded by the European Union (ERC, CHORAL, Project No. 101039794). Views and opinions expressed are however those of the authors only and do not necessarily reflect those of the European Union or the European Research Council. Neither the European Union nor the granting authority can be held responsible for them. M.M. was supported by the 2019 Lopez-Loreta Prize. J.B. acknowledges discussions with TianQi Hou at the initial stage of the project, as well as with Antoine Bodin.","OA_type":"gold","has_accepted_license":"1","ddc":["530"]},{"publication":"Applied and Computational Harmonic Analysis","status":"public","oa_version":"Published Version","type":"journal_article","date_created":"2025-02-23T23:01:54Z","corr_author":"1","scopus_import":"1","has_accepted_license":"1","OA_type":"hybrid","ddc":["000"],"publication_status":"published","intvolume":"        77","OA_place":"publisher","day":"01","_id":"19065","publication_identifier":{"eissn":["1096-603X"],"issn":["1063-5203"]},"citation":{"short":"M. Fornasier, T. Klock, M. Mondelli, M. Rauchensteiner, Applied and Computational Harmonic Analysis 77 (2025).","apa":"Fornasier, M., Klock, T., Mondelli, M., &#38; Rauchensteiner, M. (2025). Efficient identification of wide shallow neural networks with biases. <i>Applied and Computational Harmonic Analysis</i>. Elsevier. <a href=\"https://doi.org/10.1016/j.acha.2025.101749\">https://doi.org/10.1016/j.acha.2025.101749</a>","ista":"Fornasier M, Klock T, Mondelli M, Rauchensteiner M. 2025. Efficient identification of wide shallow neural networks with biases. Applied and Computational Harmonic Analysis. 77, 101749.","ama":"Fornasier M, Klock T, Mondelli M, Rauchensteiner M. Efficient identification of wide shallow neural networks with biases. <i>Applied and Computational Harmonic Analysis</i>. 2025;77. doi:<a href=\"https://doi.org/10.1016/j.acha.2025.101749\">10.1016/j.acha.2025.101749</a>","mla":"Fornasier, Massimo, et al. “Efficient Identification of Wide Shallow Neural Networks with Biases.” <i>Applied and Computational Harmonic Analysis</i>, vol. 77, 101749, Elsevier, 2025, doi:<a href=\"https://doi.org/10.1016/j.acha.2025.101749\">10.1016/j.acha.2025.101749</a>.","ieee":"M. Fornasier, T. Klock, M. Mondelli, and M. Rauchensteiner, “Efficient identification of wide shallow neural networks with biases,” <i>Applied and Computational Harmonic Analysis</i>, vol. 77. Elsevier, 2025.","chicago":"Fornasier, Massimo, Timo Klock, Marco Mondelli, and Michael Rauchensteiner. “Efficient Identification of Wide Shallow Neural Networks with Biases.” <i>Applied and Computational Harmonic Analysis</i>. Elsevier, 2025. <a href=\"https://doi.org/10.1016/j.acha.2025.101749\">https://doi.org/10.1016/j.acha.2025.101749</a>."},"volume":77,"article_type":"original","article_number":"101749","doi":"10.1016/j.acha.2025.101749","file":[{"access_level":"open_access","file_name":"2025_ApplCompAnalysis_Fornasier.pdf","creator":"dernst","date_created":"2025-08-05T12:22:04Z","success":1,"checksum":"657f258af0f7ca135e69959fd13e2d63","date_updated":"2025-08-05T12:22:04Z","file_size":2223350,"file_id":"20131","relation":"main_file","content_type":"application/pdf"}],"user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","file_date_updated":"2025-08-05T12:22:04Z","quality_controlled":"1","tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"title":"Efficient identification of wide shallow neural networks with biases","date_updated":"2025-09-30T10:35:09Z","date_published":"2025-06-01T00:00:00Z","author":[{"last_name":"Fornasier","first_name":"Massimo","full_name":"Fornasier, Massimo"},{"full_name":"Klock, Timo","last_name":"Klock","first_name":"Timo"},{"full_name":"Mondelli, Marco","orcid":"0000-0002-3242-7020","last_name":"Mondelli","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco"},{"full_name":"Rauchensteiner, Michael","last_name":"Rauchensteiner","first_name":"Michael"}],"department":[{"_id":"MaMo"}],"article_processing_charge":"No","external_id":{"isi":["001430202700001"]},"abstract":[{"text":"The identification of the parameters of a neural network from finite samples of input-output pairs is often referred to as the teacher-student model, and this model has represented a popular framework for understanding training and generalization. Even if the problem is NP-complete in the worst case, a rapidly growing literature – after adding suitable distributional assumptions – has established finite sample identification of two-layer networks with a number of neurons (math. formula), D being the input dimension. For the range (math. formula) the problem becomes harder, and truly little is known for networks parametrized by biases as well. This paper fills the gap by providing efficient algorithms and rigorous theoretical guarantees of finite sample identification for such wider shallow networks with biases. Our approach is based on a two-step pipeline: first, we recover the direction of the weights, by exploiting second order information; next, we identify the signs by suitable algebraic evaluations, and we recover the biases by empirical risk minimization via gradient descent. Numerical results demonstrate the effectiveness of our approach.","lang":"eng"}],"publisher":"Elsevier","PlanS_conform":"1","oa":1,"month":"06","year":"2025","isi":1,"language":[{"iso":"eng"}]},{"user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","file":[{"content_type":"application/pdf","relation":"main_file","file_id":"19286","file_size":898601,"date_updated":"2025-03-04T09:35:57Z","checksum":"df3921ddf1b360b07f43d427fea51242","date_created":"2025-03-04T09:35:57Z","success":1,"creator":"dernst","file_name":"2025_LIPIcs_Resch.pdf","access_level":"open_access"}],"file_date_updated":"2025-03-04T09:35:57Z","quality_controlled":"1","citation":{"chicago":"Resch, Nicolas, Chen Yuan, and Yihan Zhang. “Tight Bounds on List-Decodable and List-Recoverable Zero-Rate Codes.” In <i>16th Innovations in Theoretical Computer Science Conference</i>, Vol. 325. Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2025. <a href=\"https://doi.org/10.4230/LIPIcs.ITCS.2025.82\">https://doi.org/10.4230/LIPIcs.ITCS.2025.82</a>.","ieee":"N. Resch, C. Yuan, and Y. Zhang, “Tight bounds on list-decodable and list-recoverable zero-rate codes,” in <i>16th Innovations in Theoretical Computer Science Conference</i>, New York, NY, United States, 2025, vol. 325.","mla":"Resch, Nicolas, et al. “Tight Bounds on List-Decodable and List-Recoverable Zero-Rate Codes.” <i>16th Innovations in Theoretical Computer Science Conference</i>, vol. 325, 82, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2025, doi:<a href=\"https://doi.org/10.4230/LIPIcs.ITCS.2025.82\">10.4230/LIPIcs.ITCS.2025.82</a>.","ama":"Resch N, Yuan C, Zhang Y. Tight bounds on list-decodable and list-recoverable zero-rate codes. In: <i>16th Innovations in Theoretical Computer Science Conference</i>. Vol 325. Schloss Dagstuhl - Leibniz-Zentrum für Informatik; 2025. doi:<a href=\"https://doi.org/10.4230/LIPIcs.ITCS.2025.82\">10.4230/LIPIcs.ITCS.2025.82</a>","apa":"Resch, N., Yuan, C., &#38; Zhang, Y. (2025). Tight bounds on list-decodable and list-recoverable zero-rate codes. In <i>16th Innovations in Theoretical Computer Science Conference</i> (Vol. 325). New York, NY, United States: Schloss Dagstuhl - Leibniz-Zentrum für Informatik. <a href=\"https://doi.org/10.4230/LIPIcs.ITCS.2025.82\">https://doi.org/10.4230/LIPIcs.ITCS.2025.82</a>","ista":"Resch N, Yuan C, Zhang Y. 2025. Tight bounds on list-decodable and list-recoverable zero-rate codes. 16th Innovations in Theoretical Computer Science Conference. ITCS: Innovations in Theoretical Computer Science, LIPIcs, vol. 325, 82.","short":"N. Resch, C. Yuan, Y. Zhang, in:, 16th Innovations in Theoretical Computer Science Conference, Schloss Dagstuhl - Leibniz-Zentrum für Informatik, 2025."},"volume":325,"article_number":"82","doi":"10.4230/LIPIcs.ITCS.2025.82","scopus_import":"1","ddc":["510","000"],"acknowledgement":"The research of C. Yuan was support in part by the National Key R&D Program of China\r\nunder Grant 2023YFE0123900 and Natural Science Foundation of Shanghai under the 2024 Shanghai Action Plan for Science, Technology and Innovation Grant 24BC3200700. The research of N. Resch is supported in part by an NWO (Dutch Research Council) grant with number C.2324.0590, and this work was done in part while he was visiting the Simons Institute for the Theory of Computing, supported by DOE grant #DE-SC0024124.","has_accepted_license":"1","OA_type":"gold","OA_place":"publisher","day":"11","intvolume":"       325","publication_status":"published","publication_identifier":{"issn":["1868-8969"],"isbn":["9783959773614"]},"_id":"19281","oa_version":"Published Version","publication":"16th Innovations in Theoretical Computer Science Conference","status":"public","conference":{"location":"New York, NY, United States","name":"ITCS: Innovations in Theoretical Computer Science","start_date":"2025-01-07","end_date":"2025-01-10"},"type":"conference","alternative_title":["LIPIcs"],"date_created":"2025-03-02T23:01:53Z","arxiv":1,"corr_author":"1","oa":1,"month":"02","year":"2025","language":[{"iso":"eng"}],"isi":1,"abstract":[{"lang":"eng","text":"In this work, we consider the list-decodability and list-recoverability of codes in the zero-rate regime. Briefly, a code 𝒞 ⊆ [q]ⁿ is (p,𝓁,L)-list-recoverable if for all tuples of input lists (Y₁,… ,Y_n) with each Y_i ⊆ [q] and |Y_i| = 𝓁, the number of codewords c ∈ 𝒞 such that c_i ∉ Y_i for at most pn choices of i ∈ [n] is less than L; list-decoding is the special case of 𝓁 = 1. In recent work by Resch, Yuan and Zhang (ICALP 2023) the zero-rate threshold for list-recovery was determined for all parameters: that is, the work explicitly computes p_*: = p_*(q,𝓁,L) with the property that for all ε > 0 (a) there exist positive-rate (p_*-ε,𝓁,L)-list-recoverable codes, and (b) any (p_*+ε,𝓁,L)-list-recoverable code has rate 0. In fact, in the latter case the code has constant size, independent on n. However, the constant size in their work is quite large in 1/ε, at least |𝒞| ≥ (1/(ε))^O(q^L).\r\nOur contribution in this work is to show that for all choices of q,𝓁 and L with q ≥ 3, any (p_*+ε,𝓁,L)-list-recoverable code must have size O_{q,𝓁,L}(1/ε), and furthermore this upper bound is complemented by a matching lower bound Ω_{q,𝓁,L}(1/ε). This greatly generalizes work by Alon, Bukh and Polyanskiy (IEEE Trans. Inf. Theory 2018) which focused only on the case of binary alphabet (and thus necessarily only list-decoding). We remark that we can in fact recover the same result for q = 2 and even L, as obtained by Alon, Bukh and Polyanskiy: we thus strictly generalize their work. \r\nOur main technical contribution is to (a) properly define a linear programming relaxation of the list-recovery condition over large alphabets; and (b) to demonstrate that a certain function defined on a q-ary probability simplex is maximized by the uniform distribution. This represents the core challenge in generalizing to larger q (as a binary simplex can be naturally identified with a one-dimensional interval). We can subsequently re-utilize certain Schur convexity and convexity properties established for a related function by Resch, Yuan and Zhang along with ideas of Alon, Bukh and Polyanskiy."}],"publisher":"Schloss Dagstuhl - Leibniz-Zentrum für Informatik","title":"Tight bounds on list-decodable and list-recoverable zero-rate codes","date_updated":"2025-09-30T10:42:35Z","author":[{"full_name":"Resch, Nicolas","last_name":"Resch","first_name":"Nicolas"},{"last_name":"Yuan","first_name":"Chen","full_name":"Yuan, Chen"},{"full_name":"Zhang, Yihan","first_name":"Yihan","id":"2ce5da42-b2ea-11eb-bba5-9f264e9d002c","last_name":"Zhang","orcid":"0000-0002-6465-6258"}],"date_published":"2025-02-11T00:00:00Z","department":[{"_id":"MaMo"}],"external_id":{"arxiv":["2309.01800"],"isi":["001532717300082"]},"article_processing_charge":"Yes","tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"}},{"project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"},{"name":"Trustworthy Deep Learning Theory: Private Over-Parameterized Models and Robust LLMs","_id":"92099302-16d5-11f0-9cad-f9a785f54fbd"}],"publisher":"National Academy of Sciences","abstract":[{"text":"Differentially private gradient descent (DP-GD) is a popular algorithm to train deep learning models with provable guarantees on the privacy of the training data. In the last decade, the problem of understanding its performance cost with respect to standard GD has received remarkable attention from the research community, which formally derived upper bounds on the excess population risk  RP  in different learning settings. However, existing bounds typically degrade with over-parameterization, i.e., as the number of parameters  p  gets larger than the number of training samples  n  -- a regime which is ubiquitous in current deep-learning practice. As a result, the lack of theoretical insights leaves practitioners without clear guidance, leading some to reduce the effective number of trainable parameters to improve performance, while others use larger models to achieve better results through scale. In this work, we show that in the popular random features model with quadratic loss, for any sufficiently large  p , privacy can be obtained for free, i.e.,  |RP|=o(1) , not only when the privacy parameter  ε  has constant order, but also in the strongly private setting  ε=o(1) . This challenges the common wisdom that over-parameterization inherently hinders performance in private learning.","lang":"eng"}],"isi":1,"language":[{"iso":"eng"}],"year":"2025","month":"04","oa":1,"tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"external_id":{"pmid":["40215275"],"isi":["001471214000001"],"arxiv":["2410.14787"]},"article_processing_charge":"Yes (in subscription journal)","department":[{"_id":"MaMo"}],"author":[{"last_name":"Bombari","id":"ca726dda-de17-11ea-bc14-f9da834f63aa","first_name":"Simone","full_name":"Bombari, Simone"},{"last_name":"Mondelli","orcid":"0000-0002-3242-7020","first_name":"Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","full_name":"Mondelli, Marco"}],"date_published":"2025-04-15T00:00:00Z","title":"Privacy for free in the overparameterized regime","issue":"15","date_updated":"2026-05-20T08:23:19Z","doi":"10.1073/pnas.2423072122","article_number":"e2423072122","article_type":"original","volume":122,"citation":{"chicago":"Bombari, Simone, and Marco Mondelli. “Privacy for Free in the Overparameterized Regime.” <i>Proceedings of the National Academy of Sciences</i>. National Academy of Sciences, 2025. <a href=\"https://doi.org/10.1073/pnas.2423072122\">https://doi.org/10.1073/pnas.2423072122</a>.","ieee":"S. Bombari and M. Mondelli, “Privacy for free in the overparameterized regime,” <i>Proceedings of the National Academy of Sciences</i>, vol. 122, no. 15. National Academy of Sciences, 2025.","ama":"Bombari S, Mondelli M. Privacy for free in the overparameterized regime. <i>Proceedings of the National Academy of Sciences</i>. 2025;122(15). doi:<a href=\"https://doi.org/10.1073/pnas.2423072122\">10.1073/pnas.2423072122</a>","short":"S. Bombari, M. Mondelli, Proceedings of the National Academy of Sciences 122 (2025).","apa":"Bombari, S., &#38; Mondelli, M. (2025). Privacy for free in the overparameterized regime. <i>Proceedings of the National Academy of Sciences</i>. National Academy of Sciences. <a href=\"https://doi.org/10.1073/pnas.2423072122\">https://doi.org/10.1073/pnas.2423072122</a>","ista":"Bombari S, Mondelli M. 2025. Privacy for free in the overparameterized regime. Proceedings of the National Academy of Sciences. 122(15), e2423072122.","mla":"Bombari, Simone, and Marco Mondelli. “Privacy for Free in the Overparameterized Regime.” <i>Proceedings of the National Academy of Sciences</i>, vol. 122, no. 15, e2423072122, National Academy of Sciences, 2025, doi:<a href=\"https://doi.org/10.1073/pnas.2423072122\">10.1073/pnas.2423072122</a>."},"quality_controlled":"1","file_date_updated":"2025-05-05T07:27:54Z","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","file":[{"file_id":"19648","content_type":"application/pdf","relation":"main_file","checksum":"1ac6f78e368d35a0cafb4d2d9bd63443","file_size":2328320,"date_updated":"2025-05-05T07:27:54Z","creator":"dernst","file_name":"2025_PNAS_Bombari.pdf","date_created":"2025-05-05T07:27:54Z","success":1,"access_level":"open_access"}],"corr_author":"1","pmid":1,"date_created":"2025-04-27T22:02:13Z","APC_amount":"2754,32 EUR","arxiv":1,"type":"journal_article","publication":"Proceedings of the National Academy of Sciences","oa_version":"Published Version","status":"public","publication_identifier":{"issn":["0027-8424"],"eissn":["1091-6490"]},"_id":"19627","OA_place":"publisher","day":"15","intvolume":"       122","publication_status":"published","ddc":["000"],"acknowledgement":"This research was funded in whole, or in part, by the Austrian Science Fund (FWF) Grant number COE 12. For the purpose of open access, the author has applied a CC BY public copyright license to any Author Accepted Manuscript version arising from this submission. The authors were also supported by the 2019 Lopez-Loreta prize, and Simone Bombari was supported by a Google PhD fellowship. We thank Diyuan Wu, Edwige Cyffers, Francesco Pedrotti, Inbar Seroussi, Nikita P. Kalinin, Pietro Pelliconi, Roodabeh Safavi, Yizhe Zhu, and Zhichao Wang for helpful discussions.","has_accepted_license":"1","OA_type":"hybrid","scopus_import":"1"},{"tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"external_id":{"arxiv":["2410.18837"]},"article_processing_charge":"No","department":[{"_id":"MaMo"}],"author":[{"full_name":"Emrullah Ildiz, M.","first_name":"M.","last_name":"Emrullah Ildiz"},{"last_name":"Gozeten","first_name":"Halil Alperen","full_name":"Gozeten, Halil Alperen"},{"full_name":"Taga, Ege Onur","last_name":"Taga","first_name":"Ege Onur"},{"orcid":"0000-0002-3242-7020","last_name":"Mondelli","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco","full_name":"Mondelli, Marco"},{"full_name":"Oymak, Samet","last_name":"Oymak","first_name":"Samet"}],"date_published":"2025-04-01T00:00:00Z","date_updated":"2025-08-04T08:33:58Z","title":"High-dimensional analysis of knowledge distillation: Weak-to-Strong generalization and scaling laws","project":[{"_id":"911e6d1f-16d5-11f0-9cad-c5c68c6a1cdf","name":"Inference in High Dimensions: Light-speed Algorithms and Information Limits","grant_number":"101161364"}],"publisher":"ICLR","abstract":[{"lang":"eng","text":"A growing number of machine learning scenarios rely on knowledge distillation where one uses the output of a surrogate model as labels to supervise the training of a target model. In this work, we provide a sharp characterization of this process for ridgeless, high-dimensional regression, under two settings: (i) model shift, where the surrogate model is arbitrary, and (ii) distribution shift, where the surrogate model is the solution of empirical risk minimization with out-of-distribution data. In both cases, we characterize the precise risk of the target model through non-asymptotic bounds in terms of sample size and data distribution under mild conditions. As a consequence, we identify the form of the optimal surrogate model, which reveals the benefits and limitations of discarding weak features in a data-dependent fashion. In the context of weak-to-strong (W2S) generalization, this has the interpretation that (i) W2S training, with the surrogate as the weak model, can provably outperform training with strong labels under the same data budget, but (ii) it is unable to improve the data scaling law. We validate our results on numerical experiments both on ridgeless regression and on neural network architectures."}],"language":[{"iso":"eng"}],"year":"2025","month":"04","page":"2967-3006","oa":1,"date_created":"2025-07-20T22:02:02Z","arxiv":1,"conference":{"end_date":"2025-04-28","name":"ICLR: International Conference on Learning Representations","start_date":"2025-04-24","location":"Singapore, Singapore"},"type":"conference","oa_version":"Published Version","status":"public","publication":"13th International Conference on Learning Representations","publication_identifier":{"isbn":["9798331320850"]},"_id":"20033","OA_place":"publisher","day":"01","publication_status":"published","ddc":["000"],"has_accepted_license":"1","OA_type":"diamond","acknowledgement":"M.E.I., H.A.G., E.O.T., S.O. are supported by the NSF grants CCF-2046816, CCF-2403075, the Office of Naval Research grant N000142412289, an OpenAI Agentic AI Systems grant, and gifts by Open Philanthropy and Google Research. M. M. is funded by the European Union (ERC, INF2, project number 101161364). Views and opinions expressed are however those of the author(s) only and do not necessarily reflect those of the European Union or the European Research Council Executive Agency. Neither the European Union nor the granting authority can be held responsible for them.","scopus_import":"1","citation":{"mla":"Emrullah Ildiz, M., et al. “High-Dimensional Analysis of Knowledge Distillation: Weak-to-Strong Generalization and Scaling Laws.” <i>13th International Conference on Learning Representations</i>, ICLR, 2025, pp. 2967–3006.","short":"M. Emrullah Ildiz, H.A. Gozeten, E.O. Taga, M. Mondelli, S. Oymak, in:, 13th International Conference on Learning Representations, ICLR, 2025, pp. 2967–3006.","apa":"Emrullah Ildiz, M., Gozeten, H. A., Taga, E. O., Mondelli, M., &#38; Oymak, S. (2025). High-dimensional analysis of knowledge distillation: Weak-to-Strong generalization and scaling laws. In <i>13th International Conference on Learning Representations</i> (pp. 2967–3006). Singapore, Singapore: ICLR.","ista":"Emrullah Ildiz M, Gozeten HA, Taga EO, Mondelli M, Oymak S. 2025. High-dimensional analysis of knowledge distillation: Weak-to-Strong generalization and scaling laws. 13th International Conference on Learning Representations. ICLR: International Conference on Learning Representations, 2967–3006.","ama":"Emrullah Ildiz M, Gozeten HA, Taga EO, Mondelli M, Oymak S. High-dimensional analysis of knowledge distillation: Weak-to-Strong generalization and scaling laws. In: <i>13th International Conference on Learning Representations</i>. ICLR; 2025:2967-3006.","ieee":"M. Emrullah Ildiz, H. A. Gozeten, E. O. Taga, M. Mondelli, and S. Oymak, “High-dimensional analysis of knowledge distillation: Weak-to-Strong generalization and scaling laws,” in <i>13th International Conference on Learning Representations</i>, Singapore, Singapore, 2025, pp. 2967–3006.","chicago":"Emrullah Ildiz, M., Halil Alperen Gozeten, Ege Onur Taga, Marco Mondelli, and Samet Oymak. “High-Dimensional Analysis of Knowledge Distillation: Weak-to-Strong Generalization and Scaling Laws.” In <i>13th International Conference on Learning Representations</i>, 2967–3006. ICLR, 2025."},"quality_controlled":"1","file_date_updated":"2025-08-04T08:32:38Z","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","file":[{"file_id":"20112","content_type":"application/pdf","relation":"main_file","checksum":"5a38b093ebb4ee4eb662ea142621a5ca","file_size":528171,"date_updated":"2025-08-04T08:32:38Z","creator":"dernst","file_name":"2025_ICLR_Ildiz.pdf","date_created":"2025-08-04T08:32:38Z","success":1,"access_level":"open_access"}]},{"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","file":[{"date_created":"2025-08-04T08:45:43Z","success":1,"creator":"dernst","file_name":"2025_ICLR_Jacot.pdf","access_level":"open_access","content_type":"application/pdf","relation":"main_file","file_id":"20114","file_size":1337236,"date_updated":"2025-08-04T08:45:43Z","checksum":"59c48c173887139647cc9839c0801136"}],"file_date_updated":"2025-08-04T08:45:43Z","quality_controlled":"1","citation":{"ista":"Jacot A, Súkeník P, Wang Z, Mondelli M. 2025. Wide neural networks trained with weight decay provably exhibit neural collapse. 13th International Conference on Learning Representations. ICLR: International Conference on Learning Representations, 1905–1931.","short":"A. Jacot, P. Súkeník, Z. Wang, M. Mondelli, in:, 13th International Conference on Learning Representations, ICLR, 2025, pp. 1905–1931.","apa":"Jacot, A., Súkeník, P., Wang, Z., &#38; Mondelli, M. (2025). Wide neural networks trained with weight decay provably exhibit neural collapse. In <i>13th International Conference on Learning Representations</i> (pp. 1905–1931). Singapore, Singapore: ICLR.","ama":"Jacot A, Súkeník P, Wang Z, Mondelli M. Wide neural networks trained with weight decay provably exhibit neural collapse. In: <i>13th International Conference on Learning Representations</i>. ICLR; 2025:1905-1931.","mla":"Jacot, Arthur, et al. “Wide Neural Networks Trained with Weight Decay Provably Exhibit Neural Collapse.” <i>13th International Conference on Learning Representations</i>, ICLR, 2025, pp. 1905–31.","ieee":"A. Jacot, P. Súkeník, Z. Wang, and M. Mondelli, “Wide neural networks trained with weight decay provably exhibit neural collapse,” in <i>13th International Conference on Learning Representations</i>, Singapore, Singapore, 2025, pp. 1905–1931.","chicago":"Jacot, Arthur, Peter Súkeník, Zihan Wang, and Marco Mondelli. “Wide Neural Networks Trained with Weight Decay Provably Exhibit Neural Collapse.” In <i>13th International Conference on Learning Representations</i>, 1905–31. ICLR, 2025."},"scopus_import":"1","ddc":["000"],"acknowledgement":"M. M. and P. S. are funded by the European Union (ERC, INF2, project number 101161364). Views and opinions expressed are however those of the author(s) only and do not necessarily reflect those of the European Union or the European Research Council Executive Agency. Neither the European Union nor the granting authority can be held responsible for them.","has_accepted_license":"1","OA_type":"diamond","OA_place":"publisher","day":"01","publication_status":"published","publication_identifier":{"isbn":["9798331320850"]},"_id":"20035","publication":"13th International Conference on Learning Representations","status":"public","oa_version":"Published Version","conference":{"start_date":"2025-04-24","name":"ICLR: International Conference on Learning Representations","location":"Singapore, Singapore","end_date":"2025-04-28"},"type":"conference","date_created":"2025-07-20T22:02:02Z","arxiv":1,"corr_author":"1","oa":1,"page":"1905-1931","year":"2025","month":"04","language":[{"iso":"eng"}],"abstract":[{"lang":"eng","text":"Deep neural networks (DNNs) at convergence consistently represent the training data in the last layer via a geometric structure referred to as neural collapse. This empirical evidence has spurred a line of theoretical research aimed at proving the emergence of neural collapse, mostly focusing on the unconstrained features model. Here, the features of the penultimate layer are free variables, which makes the model data-agnostic and puts into question its ability to capture DNN training. Our work addresses the issue, moving away from unconstrained features and\r\nstudying DNNs that end with at least two linear layers. We first prove generic guarantees on neural collapse that assume (i) low training error and balancedness of linear layers (for within-class variability collapse), and (ii) bounded conditioning of the features before the linear part (for orthogonality of class-means, and their alignment with weight matrices). The balancedness refers to the fact that W⊤ℓ+1Wℓ+1 ≈ WℓW⊤ℓfor any pair of consecutive weight matrices of the linear part, and the bounded conditioning requires a well-behaved ratio between largest and smallest non-zero singular values of the features. We then show that such assumptions hold for gradient descent training with weight decay: (i) for networks with a wide first layer, we prove low training error and balancedness, and (ii) for solutions that are either nearly optimal or stable under large learning rates, we additionally prove the bounded conditioning. Taken together, our results are the first to show neural collapse in the end-to-end training of DNNs."}],"publisher":"ICLR","project":[{"name":"Inference in High Dimensions: Light-speed Algorithms and Information Limits","grant_number":"101161364","_id":"911e6d1f-16d5-11f0-9cad-c5c68c6a1cdf"}],"title":"Wide neural networks trained with weight decay provably exhibit neural collapse","date_updated":"2025-08-04T08:47:00Z","author":[{"full_name":"Jacot, Arthur","first_name":"Arthur","last_name":"Jacot"},{"full_name":"Súkeník, Peter","first_name":"Peter","id":"d64d6a8d-eb8e-11eb-b029-96fd216dec3c","last_name":"Súkeník"},{"last_name":"Wang","first_name":"Zihan","full_name":"Wang, Zihan"},{"full_name":"Mondelli, Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco","orcid":"0000-0002-3242-7020","last_name":"Mondelli"}],"date_published":"2025-04-01T00:00:00Z","department":[{"_id":"MaMo"}],"external_id":{"arxiv":["2410.04887"]},"article_processing_charge":"No","tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"}},{"title":"Sibson α-mutual information and its variational representations","date_updated":"2026-02-16T11:49:40Z","date_published":"2025-07-11T00:00:00Z","author":[{"full_name":"Esposito, Amedeo Roberto","last_name":"Esposito","first_name":"Amedeo Roberto","id":"9583e921-e1ad-11ec-9862-cef099626dc9"},{"first_name":"Michael","last_name":"Gastpar","full_name":"Gastpar, Michael"},{"first_name":"Ibrahim","last_name":"Issa","full_name":"Issa, Ibrahim"}],"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2405.08352"}],"department":[{"_id":"MaMo"}],"article_processing_charge":"No","external_id":{"arxiv":["2405.08352"]},"oa":1,"year":"2025","month":"07","language":[{"iso":"eng"}],"abstract":[{"text":"Information measures can be constructed from Rényi divergences much like mutual information from Kullback-Leibler divergence. One such information measure is known as Sibson α-mutual information and has received renewed attention recently in several contexts: concentration of measure under dependence, statistical learning, hypothesis testing, and estimation theory. In this paper, we survey and extend the state of the art. In particular, we introduce variational representations for Sibson α-mutual information and employ them in each described context to derive novel results. Namely, we produce generalized Transportation-Cost inequalities and Fano-type inequalities. We also present an overview of known applications, spanning from learning theory and Bayesian risk to universal prediction.","lang":"eng"}],"publisher":"IEEE","scopus_import":"1","OA_type":"green","publication_status":"epub_ahead","day":"11","OA_place":"repository","_id":"20081","publication_identifier":{"issn":["0018-9448"],"eissn":["1557-9654"]},"status":"public","oa_version":"Preprint","publication":"IEEE Transactions on Information Theory","type":"journal_article","date_created":"2025-07-27T22:01:26Z","arxiv":1,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","quality_controlled":"1","citation":{"ieee":"A. R. Esposito, M. Gastpar, and I. Issa, “Sibson α-mutual information and its variational representations,” <i>IEEE Transactions on Information Theory</i>. IEEE, 2025.","chicago":"Esposito, Amedeo Roberto, Michael Gastpar, and Ibrahim Issa. “Sibson α-Mutual Information and Its Variational Representations.” <i>IEEE Transactions on Information Theory</i>. IEEE, 2025. <a href=\"https://doi.org/10.1109/TIT.2025.3587340\">https://doi.org/10.1109/TIT.2025.3587340</a>.","mla":"Esposito, Amedeo Roberto, et al. “Sibson α-Mutual Information and Its Variational Representations.” <i>IEEE Transactions on Information Theory</i>, IEEE, 2025, doi:<a href=\"https://doi.org/10.1109/TIT.2025.3587340\">10.1109/TIT.2025.3587340</a>.","short":"A.R. Esposito, M. Gastpar, I. Issa, IEEE Transactions on Information Theory (2025).","ista":"Esposito AR, Gastpar M, Issa I. 2025. Sibson α-mutual information and its variational representations. IEEE Transactions on Information Theory.","apa":"Esposito, A. R., Gastpar, M., &#38; Issa, I. (2025). Sibson α-mutual information and its variational representations. <i>IEEE Transactions on Information Theory</i>. IEEE. <a href=\"https://doi.org/10.1109/TIT.2025.3587340\">https://doi.org/10.1109/TIT.2025.3587340</a>","ama":"Esposito AR, Gastpar M, Issa I. Sibson α-mutual information and its variational representations. <i>IEEE Transactions on Information Theory</i>. 2025. doi:<a href=\"https://doi.org/10.1109/TIT.2025.3587340\">10.1109/TIT.2025.3587340</a>"},"article_type":"original","doi":"10.1109/TIT.2025.3587340"},{"date_updated":"2025-09-09T07:00:34Z","title":"Learning Pareto manifolds in high dimensions: How can regularization help?","date_published":"2025-05-01T00:00:00Z","author":[{"last_name":"Wegel","first_name":"Tobias","full_name":"Wegel, Tobias"},{"last_name":"Kovačević","id":"d0258e7b-50b8-11ef-ad56-8b9f537b6b1b","first_name":"Filip","full_name":"Kovačević, Filip"},{"first_name":"Alexandru","last_name":"Ţifrea","full_name":"Ţifrea, Alexandru"},{"last_name":"Yang","first_name":"Fanny","full_name":"Yang, Fanny"}],"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2503.08849"}],"department":[{"_id":"MaMo"}],"article_processing_charge":"No","external_id":{"arxiv":["2503.08849"]},"abstract":[{"text":"Simultaneously addressing multiple objectives is becoming increasingly important in modern machine learning. At the same time, data is often high-dimensional and costly to label. For a single objective such as prediction risk, conventional regularization techniques are known to improve generalization when the data exhibits low-dimensional structure like sparsity. However, it is largely unexplored how to leverage this structure in the context of multi-objective learning (MOL) with multiple competing objectives. In this work, we discuss how the application of vanilla regularization approaches can fail, and propose a two-stage MOL framework that can successfully leverage low-dimensional structure. We demonstrate its effectiveness experimentally for multi-distribution learning and fairness-risk trade-offs.","lang":"eng"}],"publisher":"ML Research Press","oa":1,"month":"05","page":"4591-4599","year":"2025","language":[{"iso":"eng"}],"status":"public","publication":"The 28th International Conference on Artificial Intelligence and Statistics","oa_version":"Preprint","type":"conference","alternative_title":["PMLR"],"conference":{"end_date":"2025-05-05","name":"AISTATS: Conference on Artificial Intelligence and Statistics","start_date":"2025-05-03","location":"Mai Khao, Thailand"},"date_created":"2025-09-07T22:01:35Z","arxiv":1,"scopus_import":"1","OA_type":"green","acknowledgement":"We thank Junhyung Park for valuable feedback on the manuscript. AT was supported by a PhD fellowship from the Swiss Data Science Center. TW was supported by the SNF Grant 204439. This work was done in part while TW and FY were visiting the Simons Institute for the Theory of\r\nComputing.","publication_status":"published","day":"01","OA_place":"repository","intvolume":"       258","_id":"20300","publication_identifier":{"eissn":["2640-3498"]},"volume":258,"citation":{"chicago":"Wegel, Tobias, Filip Kovačević, Alexandru Ţifrea, and Fanny Yang. “Learning Pareto Manifolds in High Dimensions: How Can Regularization Help?” In <i>The 28th International Conference on Artificial Intelligence and Statistics</i>, 258:4591–99. ML Research Press, 2025.","ieee":"T. Wegel, F. Kovačević, A. Ţifrea, and F. Yang, “Learning Pareto manifolds in high dimensions: How can regularization help?,” in <i>The 28th International Conference on Artificial Intelligence and Statistics</i>, Mai Khao, Thailand, 2025, vol. 258, pp. 4591–4599.","ama":"Wegel T, Kovačević F, Ţifrea A, Yang F. Learning Pareto manifolds in high dimensions: How can regularization help? In: <i>The 28th International Conference on Artificial Intelligence and Statistics</i>. Vol 258. ML Research Press; 2025:4591-4599.","short":"T. Wegel, F. Kovačević, A. Ţifrea, F. Yang, in:, The 28th International Conference on Artificial Intelligence and Statistics, ML Research Press, 2025, pp. 4591–4599.","ista":"Wegel T, Kovačević F, Ţifrea A, Yang F. 2025. Learning Pareto manifolds in high dimensions: How can regularization help? The 28th International Conference on Artificial Intelligence and Statistics. AISTATS: Conference on Artificial Intelligence and Statistics, PMLR, vol. 258, 4591–4599.","apa":"Wegel, T., Kovačević, F., Ţifrea, A., &#38; Yang, F. (2025). Learning Pareto manifolds in high dimensions: How can regularization help? In <i>The 28th International Conference on Artificial Intelligence and Statistics</i> (Vol. 258, pp. 4591–4599). Mai Khao, Thailand: ML Research Press.","mla":"Wegel, Tobias, et al. “Learning Pareto Manifolds in High Dimensions: How Can Regularization Help?” <i>The 28th International Conference on Artificial Intelligence and Statistics</i>, vol. 258, ML Research Press, 2025, pp. 4591–99."},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","quality_controlled":"1"},{"date_created":"2025-11-23T23:01:39Z","type":"conference","conference":{"end_date":"2025-06-27","location":"Ann Arbor, MI, United States","start_date":"2025-06-22","name":"ISIT: International Symposium on Information Theory"},"oa_version":"None","publication":"2025 IEEE International Symposium on Information Theory Proceedings","status":"public","article_processing_charge":"No","_id":"20667","publication_identifier":{"issn":["2157-8095"],"isbn":["9798331543990"]},"publication_status":"published","department":[{"_id":"MaMo"}],"day":"20","date_published":"2025-10-20T00:00:00Z","acknowledgement":"The research of A.K. and N.W. was supported by the Israel Science Foundation (ISF), grant no. 1782/22.","OA_type":"closed access","author":[{"first_name":"Abd","last_name":"El Latif Kadry","full_name":"El Latif Kadry, Abd"},{"last_name":"Zhang","orcid":"0000-0002-6465-6258","id":"2ce5da42-b2ea-11eb-bba5-9f264e9d002c","first_name":"Yihan","full_name":"Zhang, Yihan"},{"full_name":"Weinberger, Nir","first_name":"Nir","last_name":"Weinberger"}],"scopus_import":"1","title":"Mean estimation in high-dimensional binary timeinhomogeneous Markov Gaussian mixture models","date_updated":"2025-11-24T08:53:34Z","doi":"10.1109/ISIT63088.2025.11195426","publisher":"IEEE","citation":{"ieee":"A. El Latif Kadry, Y. Zhang, and N. Weinberger, “Mean estimation in high-dimensional binary timeinhomogeneous Markov Gaussian mixture models,” in <i>2025 IEEE International Symposium on Information Theory Proceedings</i>, Ann Arbor, MI, United States, 2025.","chicago":"El Latif Kadry, Abd, Yihan Zhang, and Nir Weinberger. “Mean Estimation in High-Dimensional Binary Timeinhomogeneous Markov Gaussian Mixture Models.” In <i>2025 IEEE International Symposium on Information Theory Proceedings</i>. IEEE, 2025. <a href=\"https://doi.org/10.1109/ISIT63088.2025.11195426\">https://doi.org/10.1109/ISIT63088.2025.11195426</a>.","mla":"El Latif Kadry, Abd, et al. “Mean Estimation in High-Dimensional Binary Timeinhomogeneous Markov Gaussian Mixture Models.” <i>2025 IEEE International Symposium on Information Theory Proceedings</i>, IEEE, 2025, doi:<a href=\"https://doi.org/10.1109/ISIT63088.2025.11195426\">10.1109/ISIT63088.2025.11195426</a>.","short":"A. El Latif Kadry, Y. Zhang, N. Weinberger, in:, 2025 IEEE International Symposium on Information Theory Proceedings, IEEE, 2025.","apa":"El Latif Kadry, A., Zhang, Y., &#38; Weinberger, N. (2025). Mean estimation in high-dimensional binary timeinhomogeneous Markov Gaussian mixture models. In <i>2025 IEEE International Symposium on Information Theory Proceedings</i>. Ann Arbor, MI, United States: IEEE. <a href=\"https://doi.org/10.1109/ISIT63088.2025.11195426\">https://doi.org/10.1109/ISIT63088.2025.11195426</a>","ista":"El Latif Kadry A, Zhang Y, Weinberger N. 2025. Mean estimation in high-dimensional binary timeinhomogeneous Markov Gaussian mixture models. 2025 IEEE International Symposium on Information Theory Proceedings. ISIT: International Symposium on Information Theory.","ama":"El Latif Kadry A, Zhang Y, Weinberger N. Mean estimation in high-dimensional binary timeinhomogeneous Markov Gaussian mixture models. In: <i>2025 IEEE International Symposium on Information Theory Proceedings</i>. IEEE; 2025. doi:<a href=\"https://doi.org/10.1109/ISIT63088.2025.11195426\">10.1109/ISIT63088.2025.11195426</a>"},"abstract":[{"text":"We explore the problem of mean estimation for a high-dimensional binary symmetric Gaussian mixture model, where the label (sign) follows a time-inhomogeneous Markov chain. We propose a spectral estimator based on a partition of a subset of the samples to blocks. We develop a computationally efficient algorithm to find the optimal blocks, and derive minimax lower bounds on the estimation loss of any estimator, which establish the effectiveness of our proposed estimator. The resulting minimax rate illuminates the interplay between the sample size, dimension, signal strength, and the memory on the loss.","lang":"eng"}],"language":[{"iso":"eng"}],"quality_controlled":"1","month":"10","year":"2025","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87"},{"tmp":{"image":"/images/cc_by.png","short":"CC BY (4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"date_updated":"2025-12-09T13:53:31Z","issue":"3-4","title":"Spectral estimators for structured generalized linear models via approximate message passing","date_published":"2025-09-02T00:00:00Z","author":[{"full_name":"Zhang, Yihan","orcid":"0000-0002-6465-6258","last_name":"Zhang","id":"2ce5da42-b2ea-11eb-bba5-9f264e9d002c","first_name":"Yihan"},{"first_name":"Hong Chang","last_name":"Ji","full_name":"Ji, Hong Chang"},{"full_name":"Venkataramanan, Ramji","last_name":"Venkataramanan","first_name":"Ramji"},{"full_name":"Mondelli, Marco","first_name":"Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","last_name":"Mondelli","orcid":"0000-0002-3242-7020"}],"department":[{"_id":"MaMo"}],"article_processing_charge":"No","abstract":[{"lang":"eng","text":"We consider the problem of parameter estimation in a high-dimensional generalized linear model. Spectral methods obtained via the principal eigenvector of a suitable data-dependent matrix provide a simple yet surprisingly effective solution. However, despite their wide use, a rigorous performance characterization, as well as a principled way to preprocess the data, are available only for unstructured (i.i.d. Gaussian and Haar orthogonal) designs. In contrast, real-world data matrices are highly structured and exhibit non-trivial correlations. To address the problem, we consider correlated Gaussian designs capturing the anisotropic nature of the features via a covariance matrix Σ. Our main result is a precise asymptotic characterization of the performance of spectral estimators. This allows us to identify the optimal preprocessing that minimizes the number of samples needed for parameter estimation. Surprisingly, such preprocessing is universal across a broad set of designs, which partly addresses a conjecture on optimal spectral estimators for rotationally invariant models. Our principled approach vastly improves upon previous heuristic methods, including for designs common in computational imaging and genetics. The proposed methodology, based on approximate message passing, is broadly applicable and opens the way to the precise characterization of spiked matrices and of the corresponding spectral methods in a variety of settings."}],"publisher":"EMS Press","project":[{"name":"Prix Lopez-Loretta 2019 - Marco Mondelli","_id":"059876FA-7A3F-11EA-A408-12923DDC885E"}],"PlanS_conform":"1","oa":1,"year":"2025","month":"09","page":"193-304","language":[{"iso":"eng"}],"publication":"Mathematical Statistics and Learning","oa_version":"Published Version","status":"public","type":"journal_article","date_created":"2025-12-07T23:02:02Z","corr_author":"1","scopus_import":"1","acknowledgement":"This work was done when Y. Z. and H. C. J. were at the Institute of Science and Technology Austria. Y. Z. thanks Hugo Latourelle-Vigeant for bringing [53] to the authors’ attention.\r\nY. Z. and M. M. are partially supported by the 2019 Lopez-Loreta Prize and by the Interdisciplinary Projects Committee (IPC) at ISTA. H. C. J. is supported by the ERC Advanced Grant “RMTBeyond” No. 101020331.","OA_type":"diamond","has_accepted_license":"1","ddc":["000"],"publication_status":"published","OA_place":"publisher","intvolume":"         8","day":"02","_id":"20734","publication_identifier":{"eissn":["2520-2324"],"issn":["2520-2316"]},"volume":8,"citation":{"mla":"Zhang, Yihan, et al. “Spectral Estimators for Structured Generalized Linear Models via Approximate Message Passing.” <i>Mathematical Statistics and Learning</i>, vol. 8, no. 3–4, EMS Press, 2025, pp. 193–304, doi:<a href=\"https://doi.org/10.4171/MSL/52\">10.4171/MSL/52</a>.","apa":"Zhang, Y., Ji, H. C., Venkataramanan, R., &#38; Mondelli, M. (2025). Spectral estimators for structured generalized linear models via approximate message passing. <i>Mathematical Statistics and Learning</i>. EMS Press. <a href=\"https://doi.org/10.4171/MSL/52\">https://doi.org/10.4171/MSL/52</a>","short":"Y. Zhang, H.C. Ji, R. Venkataramanan, M. Mondelli, Mathematical Statistics and Learning 8 (2025) 193–304.","ista":"Zhang Y, Ji HC, Venkataramanan R, Mondelli M. 2025. Spectral estimators for structured generalized linear models via approximate message passing. Mathematical Statistics and Learning. 8(3–4), 193–304.","ama":"Zhang Y, Ji HC, Venkataramanan R, Mondelli M. Spectral estimators for structured generalized linear models via approximate message passing. <i>Mathematical Statistics and Learning</i>. 2025;8(3-4):193-304. doi:<a href=\"https://doi.org/10.4171/MSL/52\">10.4171/MSL/52</a>","ieee":"Y. Zhang, H. C. Ji, R. Venkataramanan, and M. Mondelli, “Spectral estimators for structured generalized linear models via approximate message passing,” <i>Mathematical Statistics and Learning</i>, vol. 8, no. 3–4. EMS Press, pp. 193–304, 2025.","chicago":"Zhang, Yihan, Hong Chang Ji, Ramji Venkataramanan, and Marco Mondelli. “Spectral Estimators for Structured Generalized Linear Models via Approximate Message Passing.” <i>Mathematical Statistics and Learning</i>. EMS Press, 2025. <a href=\"https://doi.org/10.4171/MSL/52\">https://doi.org/10.4171/MSL/52</a>."},"article_type":"original","doi":"10.4171/MSL/52","file":[{"access_level":"open_access","date_created":"2025-12-09T13:50:03Z","success":1,"file_name":"2025_MathStatLearning_Zhang.pdf","creator":"dernst","date_updated":"2025-12-09T13:50:03Z","file_size":1379626,"checksum":"55a1bd9c1b6b0198c42504fb94f4ad4c","relation":"main_file","content_type":"application/pdf","file_id":"20752"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","file_date_updated":"2025-12-09T13:50:03Z","quality_controlled":"1"},{"author":[{"full_name":"Zhang, Yihan","id":"2ce5da42-b2ea-11eb-bba5-9f264e9d002c","first_name":"Yihan","orcid":"0000-0002-6465-6258","last_name":"Zhang"},{"full_name":"Vatedka, Shashank","last_name":"Vatedka","first_name":"Shashank"}],"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2211.04408"}],"date_published":"2024-02-01T00:00:00Z","issue":"2","date_updated":"2025-09-04T11:32:49Z","title":"Multiple packing: Lower bounds via error exponents","external_id":{"arxiv":["2211.04408"],"isi":["001166812100008"]},"article_processing_charge":"No","department":[{"_id":"MaMo"}],"publisher":"IEEE","abstract":[{"lang":"eng","text":"We derive lower bounds on the maximal rates for multiple packings in high-dimensional Euclidean spaces. For any N > 0 and L ∈ Z ≥2 , a multiple packing is a set C of points in R n such that any point in R n lies in the intersection of at most L - 1 balls of radius √ nN around points in C . This is a natural generalization of the sphere packing problem. We study the multiple packing problem for both bounded point sets whose points have norm at most √ nP for some constant P > 0, and unbounded point sets whose points are allowed to be anywhere in R n . Given a well-known connection with coding theory, multiple packings can be viewed as the Euclidean analog of list-decodable codes, which are well-studied over finite fields. We derive the best known lower bounds on the optimal multiple packing density. This is accomplished by establishing an inequality which relates the list-decoding error exponent for additive white Gaussian noise channels, a quantity of average-case nature, to the list-decoding radius, a quantity of worst-case nature. We also derive novel bounds on the list-decoding error exponent for infinite constellations and closed-form expressions for the list-decoding error exponents for the power-constrained AWGN channel, which may be of independent interest beyond multiple packing."}],"oa":1,"language":[{"iso":"eng"}],"isi":1,"month":"02","page":"1008-1039","year":"2024","type":"journal_article","oa_version":"Preprint","publication":"IEEE Transactions on Information Theory","status":"public","corr_author":"1","date_created":"2023-12-10T23:01:00Z","arxiv":1,"acknowledgement":"The work of Yihan Zhang was supported by the European Union’s Horizon 2020 Research and Innovation Programme under Grant 682203-ERC-[Inf-Speed-Tradeoff]. The work of Shashank Vatedka was supported in part by the Core Research Grant from the Science and\r\nEngineering Research Board, India, under Grant CRG/2022/004464; and in\r\npart by the Department of Science and Technology (DST), India, under Grant\r\nDST/INT/RUS/RSF/P-41/2020 (TPN No. 65025).","scopus_import":"1","publication_identifier":{"eissn":["1557-9654"],"issn":["0018-9448"]},"_id":"14665","intvolume":"        70","day":"01","publication_status":"published","article_type":"original","citation":{"chicago":"Zhang, Yihan, and Shashank Vatedka. “Multiple Packing: Lower Bounds via Error Exponents.” <i>IEEE Transactions on Information Theory</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/TIT.2023.3334032\">https://doi.org/10.1109/TIT.2023.3334032</a>.","ieee":"Y. Zhang and S. Vatedka, “Multiple packing: Lower bounds via error exponents,” <i>IEEE Transactions on Information Theory</i>, vol. 70, no. 2. IEEE, pp. 1008–1039, 2024.","mla":"Zhang, Yihan, and Shashank Vatedka. “Multiple Packing: Lower Bounds via Error Exponents.” <i>IEEE Transactions on Information Theory</i>, vol. 70, no. 2, IEEE, 2024, pp. 1008–39, doi:<a href=\"https://doi.org/10.1109/TIT.2023.3334032\">10.1109/TIT.2023.3334032</a>.","ama":"Zhang Y, Vatedka S. Multiple packing: Lower bounds via error exponents. <i>IEEE Transactions on Information Theory</i>. 2024;70(2):1008-1039. doi:<a href=\"https://doi.org/10.1109/TIT.2023.3334032\">10.1109/TIT.2023.3334032</a>","apa":"Zhang, Y., &#38; Vatedka, S. (2024). Multiple packing: Lower bounds via error exponents. <i>IEEE Transactions on Information Theory</i>. IEEE. <a href=\"https://doi.org/10.1109/TIT.2023.3334032\">https://doi.org/10.1109/TIT.2023.3334032</a>","ista":"Zhang Y, Vatedka S. 2024. Multiple packing: Lower bounds via error exponents. IEEE Transactions on Information Theory. 70(2), 1008–1039.","short":"Y. Zhang, S. Vatedka, IEEE Transactions on Information Theory 70 (2024) 1008–1039."},"volume":70,"doi":"10.1109/TIT.2023.3334032","user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","quality_controlled":"1"},{"project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"}],"abstract":[{"lang":"eng","text":"We propose a novel approach to concentration for non-independent random variables. The main idea is to “pretend” that the random variables are independent and pay a multiplicative price measuring how far they are from actually being independent. This price is encapsulated in the Hellinger integral between the joint and the product of the marginals, which is then upper bounded leveraging tensorisation properties. Our bounds represent a natural generalisation of concentration inequalities in the presence of dependence: we recover exactly the classical bounds (McDiarmid’s inequality) when the random variables are independent. Furthermore, in a “large deviations” regime, we obtain the same decay in the probability as for the independent case, even when the random variables display non-trivial dependencies. To show this, we consider a number of applications of interest. First, we provide a bound for Markov chains with finite state space. Then, we consider the Simple Symmetric Random Walk, which is a non-contracting Markov chain, and a non-Markovian setting in which the stochastic process depends on its entire past. To conclude, we propose an application to Markov Chain Monte Carlo methods, where our approach leads to an improved lower bound on the minimum burn-in period required to reach a certain accuracy. In all of these settings, we provide a regime of parameters in which our bound fares better than what the state of the art can provide."}],"publisher":"IEEE","page":"3823-3839","month":"06","year":"2024","language":[{"iso":"eng"}],"isi":1,"oa":1,"department":[{"_id":"MaMo"}],"external_id":{"arxiv":["2303.07245"],"isi":["001230181100001"]},"article_processing_charge":"No","issue":"6","date_updated":"2025-09-04T13:06:53Z","title":"Concentration without independence via information measures","author":[{"first_name":"Amedeo Roberto","id":"9583e921-e1ad-11ec-9862-cef099626dc9","last_name":"Esposito","full_name":"Esposito, Amedeo Roberto"},{"full_name":"Mondelli, Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco","last_name":"Mondelli","orcid":"0000-0002-3242-7020"}],"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2303.07245"}],"date_published":"2024-06-01T00:00:00Z","doi":"10.1109/TIT.2024.3367767","citation":{"ama":"Esposito AR, Mondelli M. Concentration without independence via information measures. <i>IEEE Transactions on Information Theory</i>. 2024;70(6):3823-3839. doi:<a href=\"https://doi.org/10.1109/TIT.2024.3367767\">10.1109/TIT.2024.3367767</a>","ista":"Esposito AR, Mondelli M. 2024. Concentration without independence via information measures. IEEE Transactions on Information Theory. 70(6), 3823–3839.","apa":"Esposito, A. R., &#38; Mondelli, M. (2024). Concentration without independence via information measures. <i>IEEE Transactions on Information Theory</i>. IEEE. <a href=\"https://doi.org/10.1109/TIT.2024.3367767\">https://doi.org/10.1109/TIT.2024.3367767</a>","short":"A.R. Esposito, M. Mondelli, IEEE Transactions on Information Theory 70 (2024) 3823–3839.","mla":"Esposito, Amedeo Roberto, and Marco Mondelli. “Concentration without Independence via Information Measures.” <i>IEEE Transactions on Information Theory</i>, vol. 70, no. 6, IEEE, 2024, pp. 3823–39, doi:<a href=\"https://doi.org/10.1109/TIT.2024.3367767\">10.1109/TIT.2024.3367767</a>.","chicago":"Esposito, Amedeo Roberto, and Marco Mondelli. “Concentration without Independence via Information Measures.” <i>IEEE Transactions on Information Theory</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/TIT.2024.3367767\">https://doi.org/10.1109/TIT.2024.3367767</a>.","ieee":"A. R. Esposito and M. Mondelli, “Concentration without independence via information measures,” <i>IEEE Transactions on Information Theory</i>, vol. 70, no. 6. IEEE, pp. 3823–3839, 2024."},"volume":70,"article_type":"original","quality_controlled":"1","user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","date_created":"2024-03-24T23:01:00Z","arxiv":1,"corr_author":"1","related_material":{"record":[{"relation":"earlier_version","status":"public","id":"14922"}]},"status":"public","publication":"IEEE Transactions on Information Theory","oa_version":"Preprint","type":"journal_article","day":"01","intvolume":"        70","publication_status":"published","publication_identifier":{"issn":["0018-9448"],"eissn":["1557-9654"]},"_id":"15172","scopus_import":"1"},{"article_type":"original","citation":{"mla":"Dey, Bikash Kumar, et al. “Codes for Adversaries: Between Worst-Case and Average-Case Jamming.” <i>Foundations and Trends in Communications and Information Theory</i>, vol. 21, no. 3–4, Now Publishers, 2024, pp. 300–588, doi:<a href=\"https://doi.org/10.1561/0100000112\">10.1561/0100000112</a>.","short":"B.K. Dey, S. Jaggi, M. Langberg, A.D. Sarwate, Y. Zhang, Foundations and Trends in Communications and Information Theory 21 (2024) 300–588.","ista":"Dey BK, Jaggi S, Langberg M, Sarwate AD, Zhang Y. 2024. Codes for adversaries: Between worst-case and average-case jamming. Foundations and Trends in Communications and Information Theory. 21(3–4), 300–588.","apa":"Dey, B. K., Jaggi, S., Langberg, M., Sarwate, A. D., &#38; Zhang, Y. (2024). Codes for adversaries: Between worst-case and average-case jamming. <i>Foundations and Trends in Communications and Information Theory</i>. Now Publishers. <a href=\"https://doi.org/10.1561/0100000112\">https://doi.org/10.1561/0100000112</a>","ama":"Dey BK, Jaggi S, Langberg M, Sarwate AD, Zhang Y. Codes for adversaries: Between worst-case and average-case jamming. <i>Foundations and Trends in Communications and Information Theory</i>. 2024;21(3-4):300-588. doi:<a href=\"https://doi.org/10.1561/0100000112\">10.1561/0100000112</a>","ieee":"B. K. Dey, S. Jaggi, M. Langberg, A. D. Sarwate, and Y. Zhang, “Codes for adversaries: Between worst-case and average-case jamming,” <i>Foundations and Trends in Communications and Information Theory</i>, vol. 21, no. 3–4. Now Publishers, pp. 300–588, 2024.","chicago":"Dey, Bikash Kumar, Sidharth Jaggi, Michael Langberg, Anand D. Sarwate, and Yihan Zhang. “Codes for Adversaries: Between Worst-Case and Average-Case Jamming.” <i>Foundations and Trends in Communications and Information Theory</i>. Now Publishers, 2024. <a href=\"https://doi.org/10.1561/0100000112\">https://doi.org/10.1561/0100000112</a>."},"volume":21,"doi":"10.1561/0100000112","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","quality_controlled":"1","type":"journal_article","publication":"Foundations and Trends in Communications and Information Theory","status":"public","oa_version":"None","corr_author":"1","date_created":"2024-12-15T23:01:50Z","OA_type":"closed access","scopus_import":"1","_id":"18652","publication_identifier":{"eissn":["1567-2328"],"issn":["1567-2190"]},"publication_status":"published","day":"03","intvolume":"        21","publisher":"Now Publishers","abstract":[{"lang":"eng","text":"Over the last 70 years, information theory and coding has enabled communication technologies that have had an astounding impact on our lives. This is possible due to the match between encoding/decoding strategies and corresponding channel models. Traditional studies of channels have taken one of two extremes: Shannon-theoretic models are inherently average-case in which channel noise is governed by a memoryless stochastic process, whereas coding-theoretic (referred to as “Hamming”) models take a worst-case, adversarial, view of the noise. However, for several existing and emerging communication systems the Shannon/average-case view may be too optimistic, whereas the Hamming/worstcase view may be too pessimistic. This monograph takes up the challenge of studying adversarial channel models that lie between the Shannon and Hamming extremes."}],"language":[{"iso":"eng"}],"year":"2024","page":"300-588","month":"12","date_published":"2024-12-03T00:00:00Z","author":[{"full_name":"Dey, Bikash Kumar","first_name":"Bikash Kumar","last_name":"Dey"},{"full_name":"Jaggi, Sidharth","last_name":"Jaggi","first_name":"Sidharth"},{"full_name":"Langberg, Michael","first_name":"Michael","last_name":"Langberg"},{"full_name":"Sarwate, Anand D.","first_name":"Anand D.","last_name":"Sarwate"},{"full_name":"Zhang, Yihan","last_name":"Zhang","orcid":"0000-0002-6465-6258","id":"2ce5da42-b2ea-11eb-bba5-9f264e9d002c","first_name":"Yihan"}],"title":"Codes for adversaries: Between worst-case and average-case jamming","date_updated":"2024-12-16T10:38:44Z","issue":"3-4","article_processing_charge":"No","department":[{"_id":"MaMo"}]}]