[{"date_created":"2025-01-30T08:26:59Z","abstract":[{"text":"Recent advances in large language model (LLM) pretraining have led to high-quality LLMs with impressive abilities. By compressing such LLMs via quantization to 3-4 bits per parameter, they can fit into memory-limited devices such as laptops and mobile phones, enabling personalized use. Quantizing models to 3-4 bits per parameter can lead to moderate to high accuracy losses, especially for smaller models (1-10B parameters), which are suitable for edge deployment. To address this accuracy issue, we introduce the Sparse-Quantized Representation (SpQR), a new compressed format and quantization technique that enables for the first time \\emph{near-lossless} compression of LLMs across model scales while reaching similar compression levels to previous methods. SpQR works by identifying and isolating \\emph{outlier weights}, which cause particularly large quantization errors, and storing them in higher precision while compressing all other weights to 3-4 bits, and achieves relative accuracy losses of less than \r\n in perplexity for highly-accurate LLaMA and Falcon LLMs. This makes it possible to run a 33B parameter LLM on a single 24 GB consumer GPU without performance degradation at 15% speedup, thus making powerful LLMs available to consumers without any downsides. SpQR comes with efficient algorithms for both encoding weights into its format, as well as decoding them efficiently at runtime. Specifically, we provide an efficient GPU inference algorithm for SpQR, which yields faster inference than 16-bit baselines at similar accuracy while enabling memory compression gains of more than 4x.","lang":"eng"}],"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2306.03078"}],"quality_controlled":"1","date_published":"2024-05-15T00:00:00Z","year":"2024","oa":1,"_id":"18977","publisher":"OpenReview","arxiv":1,"day":"15","publication":"12th International Conference on Learning Representations","OA_place":"repository","title":"SpQR: A sparse-quantized representation for near-lossless LLM weight compression","publication_status":"published","type":"conference","oa_version":"Preprint","status":"public","date_updated":"2025-01-30T08:27:47Z","department":[{"_id":"DaAl"}],"article_processing_charge":"No","author":[{"first_name":"Tim","last_name":"Dettmers","full_name":"Dettmers, Tim"},{"full_name":"Svirschevski, Ruslan A.","last_name":"Svirschevski","first_name":"Ruslan A."},{"last_name":"Egiazarian","full_name":"Egiazarian, Vage","first_name":"Vage"},{"first_name":"Denis","full_name":"Kuznedelev, Denis","last_name":"Kuznedelev"},{"last_name":"Frantar","full_name":"Frantar, Elias","first_name":"Elias","id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f"},{"first_name":"Saleh","last_name":"Ashkboos","full_name":"Ashkboos, Saleh"},{"last_name":"Borzunov","full_name":"Borzunov, Alexander","first_name":"Alexander"},{"first_name":"Torsten","last_name":"Hoefler","full_name":"Hoefler, Torsten"},{"orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian"}],"scopus_import":"1","citation":{"ista":"Dettmers T, Svirschevski RA, Egiazarian V, Kuznedelev D, Frantar E, Ashkboos S, Borzunov A, Hoefler T, Alistarh D-A. 2024. SpQR: A sparse-quantized representation for near-lossless LLM weight compression. 12th International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","short":"T. Dettmers, R.A. Svirschevski, V. Egiazarian, D. Kuznedelev, E. Frantar, S. Ashkboos, A. Borzunov, T. Hoefler, D.-A. Alistarh, in:, 12th International Conference on Learning Representations, OpenReview, 2024.","ieee":"T. Dettmers <i>et al.</i>, “SpQR: A sparse-quantized representation for near-lossless LLM weight compression,” in <i>12th International Conference on Learning Representations</i>, Vienna, Austria, 2024.","mla":"Dettmers, Tim, et al. “SpQR: A Sparse-Quantized Representation for near-Lossless LLM Weight Compression.” <i>12th International Conference on Learning Representations</i>, OpenReview, 2024.","ama":"Dettmers T, Svirschevski RA, Egiazarian V, et al. SpQR: A sparse-quantized representation for near-lossless LLM weight compression. In: <i>12th International Conference on Learning Representations</i>. OpenReview; 2024.","apa":"Dettmers, T., Svirschevski, R. A., Egiazarian, V., Kuznedelev, D., Frantar, E., Ashkboos, S., … Alistarh, D.-A. (2024). SpQR: A sparse-quantized representation for near-lossless LLM weight compression. In <i>12th International Conference on Learning Representations</i>. Vienna, Austria: OpenReview.","chicago":"Dettmers, Tim, Ruslan A. Svirschevski, Vage Egiazarian, Denis Kuznedelev, Elias Frantar, Saleh Ashkboos, Alexander Borzunov, Torsten Hoefler, and Dan-Adrian Alistarh. “SpQR: A Sparse-Quantized Representation for near-Lossless LLM Weight Compression.” In <i>12th International Conference on Learning Representations</i>. OpenReview, 2024."},"language":[{"iso":"eng"}],"acknowledgement":"Denis Kuznedelev acknowledges the support from the Russian Ministry of Science and Higher\r\nEducation, grant No. 075-10-2021-068. Ruslan Svirschevski and Vage Egiazarian and Denis\r\nKuznedelev were supported by the grant for research centers in the field of AI provided by the\r\nAnalytical Center for the Government of the Russian Federation (ACRF) in accordance with the\r\nagreement on the provision of subsidies (identifier of the agreement 000000D730321P5Q0002) and the agreement with HSE University No. 70-2021-00139.","OA_type":"green","external_id":{"arxiv":["2306.03078"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"05","conference":{"start_date":"2024-05-07","location":"Vienna, Austria","name":"ICLR: International Conference on Learning Representations","end_date":"2024-05-11"}},{"ddc":["000"],"day":"25","arxiv":1,"publisher":"Neural Information Processing Systems Foundation","_id":"18996","title":"Identifying general mechanism shifts in linear causal representations","volume":37,"OA_place":"repository","publication":"38th Conference on Neural Information Processing Systems","quality_controlled":"1","abstract":[{"text":"We consider the linear causal representation learning setting where we observe a linear mixing of d unknown latent factors, which follow a linear structural causal model. Recent work has shown that it is possible to recover the latent factors as well as the underlying structural causal model over them, up to permutation and scaling, provided that we have at least d environments, each of which corresponds to perfect interventions on a single latent node (factor). After this powerful result, a key open problem faced by the community has been to relax these conditions: allow for coarser than perfect single-node interventions, and allow for fewer than d of them, since the number of latent factors d could be very large. In this work, we consider precisely such a setting, where we allow a smaller than d number of environments, and also allow for very coarse interventions that can very coarsely \\textit{change the entire causal graph over the latent factors}. On the flip side, we relax what we wish to extract to simply the \\textit{list of nodes that have shifted between one or more environments}. We provide a surprising identifiability result that it is indeed possible, under some very mild standard assumptions, to identify the set of shifted nodes. Our identifiability proof moreover is a constructive one: we explicitly provide necessary and sufficient conditions for a node to be a shifted node, and show that we can check these conditions given observed data. Our algorithm lends itself very naturally to the sample setting where instead of just interventional distributions, we are provided datasets of samples from each of these distributions. We corroborate our results on both synthetic experiments as well as an interesting psychometric dataset. The code can be found at https://github.com/TianyuCodings/iLCS.","lang":"eng"}],"date_created":"2025-02-04T13:09:34Z","intvolume":"        37","oa":1,"tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","short":"CC BY (4.0)"},"alternative_title":["Advances in Neural Information Processing Systems"],"year":"2024","date_published":"2024-09-25T00:00:00Z","language":[{"iso":"eng"}],"has_accepted_license":"1","citation":{"mla":"Chen, Tianyu, et al. “Identifying General Mechanism Shifts in Linear Causal Representations.” <i>38th Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024.","ista":"Chen T, Bello K, Locatello F, Aragam B, Ravikumar PK. 2024. Identifying general mechanism shifts in linear causal representations. 38th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 37.","short":"T. Chen, K. Bello, F. Locatello, B. Aragam, P.K. Ravikumar, in:, 38th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","ieee":"T. Chen, K. Bello, F. Locatello, B. Aragam, and P. K. Ravikumar, “Identifying general mechanism shifts in linear causal representations,” in <i>38th Conference on Neural Information Processing Systems</i>, Vancouver, Canada, 2024, vol. 37.","ama":"Chen T, Bello K, Locatello F, Aragam B, Ravikumar PK. Identifying general mechanism shifts in linear causal representations. In: <i>38th Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024.","chicago":"Chen, Tianyu, Kevin Bello, Francesco Locatello, Bryon Aragam, and Pradeep Kumar Ravikumar. “Identifying General Mechanism Shifts in Linear Causal Representations.” In <i>38th Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024.","apa":"Chen, T., Bello, K., Locatello, F., Aragam, B., &#38; Ravikumar, P. K. (2024). Identifying general mechanism shifts in linear causal representations. In <i>38th Conference on Neural Information Processing Systems</i> (Vol. 37). Vancouver, Canada: Neural Information Processing Systems Foundation."},"file_date_updated":"2025-02-04T13:09:08Z","scopus_import":"1","conference":{"start_date":"2024-12-16","location":"Vancouver, Canada","name":"NeurIPS: Neural Information Processing Systems","end_date":"2024-12-16"},"month":"09","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publication_identifier":{"eissn":["1049-5258"]},"external_id":{"arxiv":["2410.24059"]},"OA_type":"green","file":[{"file_size":5659119,"relation":"main_file","checksum":"75c3091e70bd2916cd94afbf40a0c425","file_name":"2024_NeurIPS_Chen.pdf","date_updated":"2025-02-04T13:09:08Z","creator":"dernst","content_type":"application/pdf","access_level":"open_access","success":1,"date_created":"2025-02-04T13:09:08Z","file_id":"18997"}],"status":"public","oa_version":"Published Version","type":"conference","publication_status":"published","author":[{"full_name":"Chen, Tianyu","last_name":"Chen","first_name":"Tianyu"},{"last_name":"Bello","full_name":"Bello, Kevin","first_name":"Kevin"},{"full_name":"Locatello, Francesco","last_name":"Locatello","first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4"},{"first_name":"Bryon","last_name":"Aragam","full_name":"Aragam, Bryon"},{"first_name":"Pradeep Kumar","last_name":"Ravikumar","full_name":"Ravikumar, Pradeep Kumar"}],"article_processing_charge":"No","department":[{"_id":"FrLo"}],"date_updated":"2025-07-07T13:23:49Z"},{"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"11","conference":{"name":"EMNLP: Conference on Empirical Methods in Natural Language Processing","start_date":"2024-11-12","location":"Miami, FL, United States","end_date":"2024-11-16"},"page":"12080-12099","file":[{"date_updated":"2025-02-10T08:20:34Z","file_name":"2024_EMNLP_Draganov.pdf","checksum":"f4416a5962194f0181ab0dc7f9ef93c0","relation":"main_file","file_size":1312638,"file_id":"19016","date_created":"2025-02-10T08:20:34Z","success":1,"access_level":"open_access","content_type":"application/pdf","creator":"dernst"}],"OA_type":"gold","external_id":{"arxiv":["2404.00500"]},"language":[{"iso":"eng"}],"citation":{"ieee":"O. Draganov and S. Skiena, “The shape of word embeddings: Quantifying non-isometry with topological data analysis,” in <i>Findings of the Association for Computational Linguistics: EMNLP 2024</i>, Miami, FL, United States, 2024, pp. 12080–12099.","ista":"Draganov O, Skiena S. 2024. The shape of word embeddings: Quantifying non-isometry with topological data analysis. Findings of the Association for Computational Linguistics: EMNLP 2024. EMNLP: Conference on Empirical Methods in Natural Language Processing, 12080–12099.","short":"O. Draganov, S. Skiena, in:, Findings of the Association for Computational Linguistics: EMNLP 2024, Association for Computational Linguistics, 2024, pp. 12080–12099.","mla":"Draganov, Ondrej, and Steven Skiena. “The Shape of Word Embeddings: Quantifying Non-Isometry with Topological Data Analysis.” <i>Findings of the Association for Computational Linguistics: EMNLP 2024</i>, Association for Computational Linguistics, 2024, pp. 12080–99, doi:<a href=\"https://doi.org/10.18653/v1/2024.findings-emnlp.705\">10.18653/v1/2024.findings-emnlp.705</a>.","apa":"Draganov, O., &#38; Skiena, S. (2024). The shape of word embeddings: Quantifying non-isometry with topological data analysis. In <i>Findings of the Association for Computational Linguistics: EMNLP 2024</i> (pp. 12080–12099). Miami, FL, United States: Association for Computational Linguistics. <a href=\"https://doi.org/10.18653/v1/2024.findings-emnlp.705\">https://doi.org/10.18653/v1/2024.findings-emnlp.705</a>","chicago":"Draganov, Ondrej, and Steven Skiena. “The Shape of Word Embeddings: Quantifying Non-Isometry with Topological Data Analysis.” In <i>Findings of the Association for Computational Linguistics: EMNLP 2024</i>, 12080–99. Association for Computational Linguistics, 2024. <a href=\"https://doi.org/10.18653/v1/2024.findings-emnlp.705\">https://doi.org/10.18653/v1/2024.findings-emnlp.705</a>.","ama":"Draganov O, Skiena S. The shape of word embeddings: Quantifying non-isometry with topological data analysis. In: <i>Findings of the Association for Computational Linguistics: EMNLP 2024</i>. Association for Computational Linguistics; 2024:12080-12099. doi:<a href=\"https://doi.org/10.18653/v1/2024.findings-emnlp.705\">10.18653/v1/2024.findings-emnlp.705</a>"},"has_accepted_license":"1","corr_author":"1","file_date_updated":"2025-02-10T08:20:34Z","scopus_import":"1","author":[{"full_name":"Draganov, Ondrej","last_name":"Draganov","id":"2B23F01E-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-0464-3823","first_name":"Ondrej"},{"first_name":"Steven","last_name":"Skiena","full_name":"Skiena, Steven"}],"article_processing_charge":"No","department":[{"_id":"GradSch"},{"_id":"HeEd"}],"date_updated":"2025-02-10T08:21:37Z","status":"public","type":"conference","oa_version":"Published Version","publication_status":"published","doi":"10.18653/v1/2024.findings-emnlp.705","title":"The shape of word embeddings: Quantifying non-isometry with topological data analysis","OA_place":"publisher","publication":"Findings of the Association for Computational Linguistics: EMNLP 2024","ddc":["500"],"arxiv":1,"publisher":"Association for Computational Linguistics","day":"01","_id":"18998","oa":1,"tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","short":"CC BY (4.0)"},"year":"2024","date_published":"2024-11-01T00:00:00Z","quality_controlled":"1","date_created":"2025-02-04T16:19:28Z","abstract":[{"lang":"eng","text":"Word embeddings represent language vocabularies as clouds of d-dimensional points. We investigate how information is conveyed by the general shape of these clouds, instead of representing the semantic meaning of each token. Specifically, we use the notion of persistent homology from topological data analysis (TDA) to measure the distances between language pairs from the shape of their unlabeled embeddings. These distances quantify the degree of non-isometry of the embeddings. To distinguish whether these differences are random training errors or capture real information about the languages, we use the computed distance matrices to construct language phylogenetic trees over 81 Indo-European languages. Careful evaluation shows that our reconstructed trees exhibit strong and statistically-significant similarities to the reference."}]},{"abstract":[{"text":"Exploring the shape of point configurations has been a key driver in the evolution of TDA (short for topological data analysis) since its infancy. This survey illustrates the recent efforts to broaden these ideas to model spatial interactions among multiple configurations, each distinguished by a color. It describes advances in this area and prepares the ground for further exploration by mentioning unresolved questions and promising research avenues while focusing on the overlap with discrete geometry.","lang":"eng"}],"date_created":"2025-02-04T16:21:21Z","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2406.04102","open_access":"1"}],"year":"2024","date_published":"2024-06-06T00:00:00Z","oa":1,"tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","short":"CC BY (4.0)"},"arxiv":1,"day":"06","_id":"18999","ddc":["510"],"publication":"arXiv","title":"Chromatic topological data analysis","OA_place":"repository","type":"preprint","oa_version":"Preprint","publication_status":"submitted","doi":"10.48550/ARXIV.2406.04102","status":"public","department":[{"_id":"GradSch"},{"_id":"HeEd"}],"date_updated":"2025-02-10T08:14:27Z","article_processing_charge":"No","author":[{"last_name":"Cultrera di Montesano","full_name":"Cultrera di Montesano, Sebastiano","first_name":"Sebastiano","orcid":"0000-0001-6249-0832","id":"34D2A09C-F248-11E8-B48F-1D18A9856A87"},{"id":"2B23F01E-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-0464-3823","first_name":"Ondrej","full_name":"Draganov, Ondrej","last_name":"Draganov"},{"last_name":"Edelsbrunner","full_name":"Edelsbrunner, Herbert","id":"3FB178DA-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-9823-6833","first_name":"Herbert"},{"last_name":"Saghafian","full_name":"Saghafian, Morteza","first_name":"Morteza","id":"f86f7148-b140-11ec-9577-95435b8df824"}],"corr_author":"1","language":[{"iso":"eng"}],"citation":{"ieee":"S. Cultrera di Montesano, O. Draganov, H. Edelsbrunner, and M. Saghafian, “Chromatic topological data analysis,” <i>arXiv</i>. .","ista":"Cultrera di Montesano S, Draganov O, Edelsbrunner H, Saghafian M. Chromatic topological data analysis. arXiv, 2406.04102.","short":"S. Cultrera di Montesano, O. Draganov, H. Edelsbrunner, M. Saghafian, ArXiv (n.d.).","mla":"Cultrera di Montesano, Sebastiano, et al. “Chromatic Topological Data Analysis.” <i>ArXiv</i>, 2406.04102, doi:<a href=\"https://doi.org/10.48550/ARXIV.2406.04102\">10.48550/ARXIV.2406.04102</a>.","ama":"Cultrera di Montesano S, Draganov O, Edelsbrunner H, Saghafian M. Chromatic topological data analysis. <i>arXiv</i>. doi:<a href=\"https://doi.org/10.48550/ARXIV.2406.04102\">10.48550/ARXIV.2406.04102</a>","apa":"Cultrera di Montesano, S., Draganov, O., Edelsbrunner, H., &#38; Saghafian, M. (n.d.). Chromatic topological data analysis. <i>arXiv</i>. <a href=\"https://doi.org/10.48550/ARXIV.2406.04102\">https://doi.org/10.48550/ARXIV.2406.04102</a>","chicago":"Cultrera di Montesano, Sebastiano, Ondrej Draganov, Herbert Edelsbrunner, and Morteza Saghafian. “Chromatic Topological Data Analysis.” <i>ArXiv</i>, n.d. <a href=\"https://doi.org/10.48550/ARXIV.2406.04102\">https://doi.org/10.48550/ARXIV.2406.04102</a>."},"has_accepted_license":"1","article_number":"2406.04102","external_id":{"arxiv":["2406.04102"]},"OA_type":"green","month":"06","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87"},{"date_updated":"2025-07-10T11:51:32Z","department":[{"_id":"CaMu"},{"_id":"FrLo"}],"article_processing_charge":"No","author":[{"id":"d3e02e50-48a8-11ee-8f62-c108061797fa","first_name":"Dingling","full_name":"Yao, Dingling","last_name":"Yao"},{"id":"f978ccb0-3f7f-11eb-b193-b0e2bd13182b","orcid":"0000-0001-5836-5350","first_name":"Caroline J","last_name":"Muller","full_name":"Muller, Caroline J"},{"first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco","last_name":"Locatello"}],"publication_status":"published","type":"conference","oa_version":"Published Version","status":"public","file":[{"relation":"main_file","checksum":"fe8832367e7143876f178244385d859e","file_size":2595855,"file_name":"2024_NeurIPS_Yao.pdf","date_updated":"2025-02-05T07:44:58Z","content_type":"application/pdf","creator":"dernst","file_id":"19006","access_level":"open_access","success":1,"date_created":"2025-02-05T07:44:58Z"}],"related_material":{"link":[{"url":"https://github.com/CausalLearningAI/crl-dynamical-systems","relation":"software"}]},"acknowledgement":"We thank Niklas Boers for recommending the SpeedyWeather simulator and Valentino Maiorca\r\nfor guidance on Fourier transformation for SST data. We are also grateful to Shimeng Huang and Riccardo Cadei for their feedback on the treatment effect estimation experiment and to Jiale Chen and Adeel Pervez for their assistance with the solver implementation. Finally, we appreciate the anonymous reviewers for their insightful suggestions, which helped improve the manuscript. ","OA_type":"gold","external_id":{"arxiv":["2405.13888"]},"month":"12","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","conference":{"end_date":"2024-12-16","location":"Vancouver, Canada","start_date":"2024-12-16","name":"NeurIPS: Neural Information Processing Systems"},"scopus_import":"1","corr_author":"1","file_date_updated":"2025-02-05T07:44:58Z","citation":{"ama":"Yao D, Muller CJ, Locatello F. Marrying causal representation learning with dynamical systems for science. In: <i>38th Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024.","apa":"Yao, D., Muller, C. J., &#38; Locatello, F. (2024). Marrying causal representation learning with dynamical systems for science. In <i>38th Conference on Neural Information Processing Systems</i> (Vol. 37). Vancouver, Canada: Neural Information Processing Systems Foundation.","chicago":"Yao, Dingling, Caroline J Muller, and Francesco Locatello. “Marrying Causal Representation Learning with Dynamical Systems for Science.” In <i>38th Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024.","ista":"Yao D, Muller CJ, Locatello F. 2024. Marrying causal representation learning with dynamical systems for science. 38th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 37.","ieee":"D. Yao, C. J. Muller, and F. Locatello, “Marrying causal representation learning with dynamical systems for science,” in <i>38th Conference on Neural Information Processing Systems</i>, Vancouver, Canada, 2024, vol. 37.","short":"D. Yao, C.J. Muller, F. Locatello, in:, 38th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","mla":"Yao, Dingling, et al. “Marrying Causal Representation Learning with Dynamical Systems for Science.” <i>38th Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024."},"has_accepted_license":"1","language":[{"iso":"eng"}],"date_published":"2024-12-01T00:00:00Z","year":"2024","alternative_title":["Advances in Neural Information Processing Systems"],"tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","short":"CC BY (4.0)"},"oa":1,"abstract":[{"text":"Causal representation learning promises to extend causal models to hidden causal\r\nvariables from raw entangled measurements. However, most progress has focused\r\non proving identifiability results in different settings, and we are not aware of any\r\nsuccessful real-world application. At the same time, the field of dynamical systems\r\nbenefited from deep learning and scaled to countless applications but does not allow\r\nparameter identification. In this paper, we draw a clear connection between the two\r\nand their key assumptions, allowing us to apply identifiable methods developed\r\nin causal representation learning to dynamical systems. At the same time, we can\r\nleverage scalable differentiable solvers developed for differential equations to build\r\nmodels that are both identifiable and practical. Overall, we learn explicitly controllable models that isolate the trajectory-specific parameters for further downstream\r\ntasks such as out-of-distribution classification or treatment effect estimation. We\r\nexperiment with a wind simulator with partially known factors of variation. We\r\nalso apply the resulting model to real-world climate data and successfully answer\r\ndownstream causal questions in line with existing literature on climate change.\r\nCode is available at https://github.com/CausalLearningAI/crl-dynamical-systems.","lang":"eng"}],"intvolume":"        37","date_created":"2025-02-05T07:49:00Z","quality_controlled":"1","publication":"38th Conference on Neural Information Processing Systems","OA_place":"publisher","volume":37,"title":"Marrying causal representation learning with dynamical systems for science","_id":"19005","publisher":"Neural Information Processing Systems Foundation","arxiv":1,"day":"01","ddc":["000","550"]},{"date_updated":"2025-05-14T11:29:10Z","department":[{"_id":"FrLo"}],"article_processing_charge":"No","author":[{"first_name":"Avinash","last_name":"Kori","full_name":"Kori, Avinash"},{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","full_name":"Locatello, Francesco","last_name":"Locatello"},{"first_name":"Ainkaran","full_name":"Santhirasekaram, Ainkaran","last_name":"Santhirasekaram"},{"last_name":"Toni","full_name":"Toni, Francesca","first_name":"Francesca"},{"first_name":"Ben","full_name":"Glocker, Ben","last_name":"Glocker"},{"first_name":"Fabio","last_name":"De Sousa Ribeiro","full_name":"De Sousa Ribeiro, Fabio"}],"publication_status":"published","type":"conference","oa_version":"Published Version","status":"public","acknowledgement":"A. Kori is supported by UKRI (grant number EP/S023356/1), as part of the UKRI Centre for Doctoral Training in Safe and Trusted AI. B. Glocker and F.D.S. Ribeiro acknowledge the support of the UKRI AI programme, and the Engineering and Physical Sciences Research Council, for CHAI - EPSRC Causality in Healthcare AI Hub (grant number EP/Y028856/1).","file":[{"date_updated":"2025-02-05T08:34:25Z","file_name":"2024_NeurIPS_Kori.pdf","checksum":"d27b3c7102adc28e798fe41001f0b919","relation":"main_file","file_size":6943800,"file_id":"19008","date_created":"2025-02-05T08:34:25Z","access_level":"open_access","success":1,"content_type":"application/pdf","creator":"dernst"}],"OA_type":"hybrid","external_id":{"arxiv":["2406.07141"]},"month":"12","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","conference":{"end_date":"2024-12-16","name":"NeurIPS: Neural Information Processing Systems","start_date":"2024-12-16","location":"Vancouver, Canada"},"scopus_import":"1","file_date_updated":"2025-02-05T08:34:25Z","citation":{"ista":"Kori A, Locatello F, Santhirasekaram A, Toni F, Glocker B, De Sousa Ribeiro F. 2024. Identifiable object-centric representation learning via probabilistic slot attention. 38th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 37.","ieee":"A. Kori, F. Locatello, A. Santhirasekaram, F. Toni, B. Glocker, and F. De Sousa Ribeiro, “Identifiable object-centric representation learning via probabilistic slot attention,” in <i>38th Conference on Neural Information Processing Systems</i>, Vancouver, Canada, 2024, vol. 37.","short":"A. Kori, F. Locatello, A. Santhirasekaram, F. Toni, B. Glocker, F. De Sousa Ribeiro, in:, 38th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","mla":"Kori, Avinash, et al. “Identifiable Object-Centric Representation Learning via Probabilistic Slot Attention.” <i>38th Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024.","apa":"Kori, A., Locatello, F., Santhirasekaram, A., Toni, F., Glocker, B., &#38; De Sousa Ribeiro, F. (2024). Identifiable object-centric representation learning via probabilistic slot attention. In <i>38th Conference on Neural Information Processing Systems</i> (Vol. 37). Vancouver, Canada: Neural Information Processing Systems Foundation.","chicago":"Kori, Avinash, Francesco Locatello, Ainkaran Santhirasekaram, Francesca Toni, Ben Glocker, and Fabio De Sousa Ribeiro. “Identifiable Object-Centric Representation Learning via Probabilistic Slot Attention.” In <i>38th Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024.","ama":"Kori A, Locatello F, Santhirasekaram A, Toni F, Glocker B, De Sousa Ribeiro F. Identifiable object-centric representation learning via probabilistic slot attention. In: <i>38th Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024."},"has_accepted_license":"1","language":[{"iso":"eng"}],"date_published":"2024-12-01T00:00:00Z","year":"2024","alternative_title":["Advances in Neural Information Processing Systems"],"oa":1,"abstract":[{"lang":"eng","text":"Learning modular object-centric representations is crucial for systematic generalization. Existing methods show promising object-binding capabilities empirically,\r\nbut theoretical identifiability guarantees remain relatively underdeveloped. Understanding when object-centric representations can theoretically be identified is\r\ncrucial for scaling slot-based methods to high-dimensional images with correctness\r\nguarantees. To that end, we propose a probabilistic slot-attention algorithm that\r\nimposes an aggregate mixture prior over object-centric slot representations, thereby\r\nproviding slot identifiability guarantees without supervision, up to an equivalence\r\nrelation. We provide empirical verification of our theoretical identifiability result\r\nusing both simple 2-dimensional data and high-resolution imaging datasets.\r\n"}],"intvolume":"        37","date_created":"2025-02-05T08:36:22Z","quality_controlled":"1","publication":"38th Conference on Neural Information Processing Systems","volume":37,"OA_place":"publisher","title":"Identifiable object-centric representation learning via probabilistic slot attention","_id":"19007","arxiv":1,"publisher":"Neural Information Processing Systems Foundation","day":"01","ddc":["000"]},{"external_id":{"arxiv":["2412.14923"]},"publication":"arXiv","related_material":{"record":[{"status":"public","relation":"earlier_version","id":"18295"}]},"OA_place":"repository","user_id":"8b945eb4-e2f2-11eb-945a-df72226e66a9","month":"12","title":"Terminal singularities of the moduli space of curves on low degree hypersurfaces and the circle method","_id":"19013","day":"19","corr_author":"1","arxiv":1,"citation":{"mla":"Glas, Jakob, and Matthew Hase-Liu. “Terminal Singularities of the Moduli Space of Curves on Low Degree Hypersurfaces and the Circle Method.” <i>ArXiv</i>, doi:<a href=\"https://doi.org/10.48550/arXiv.2412.14923\">10.48550/arXiv.2412.14923</a>.","short":"J. Glas, M. Hase-Liu, ArXiv (n.d.).","ieee":"J. Glas and M. Hase-Liu, “Terminal singularities of the moduli space of curves on low degree hypersurfaces and the circle method,” <i>arXiv</i>. .","ista":"Glas J, Hase-Liu M. Terminal singularities of the moduli space of curves on low degree hypersurfaces and the circle method. arXiv, <a href=\"https://doi.org/10.48550/arXiv.2412.14923\">10.48550/arXiv.2412.14923</a>.","chicago":"Glas, Jakob, and Matthew  Hase-Liu. “Terminal Singularities of the Moduli Space of Curves on Low Degree Hypersurfaces and the Circle Method.” <i>ArXiv</i>, n.d. <a href=\"https://doi.org/10.48550/arXiv.2412.14923\">https://doi.org/10.48550/arXiv.2412.14923</a>.","apa":"Glas, J., &#38; Hase-Liu, M. (n.d.). Terminal singularities of the moduli space of curves on low degree hypersurfaces and the circle method. <i>arXiv</i>. <a href=\"https://doi.org/10.48550/arXiv.2412.14923\">https://doi.org/10.48550/arXiv.2412.14923</a>","ama":"Glas J, Hase-Liu M. Terminal singularities of the moduli space of curves on low degree hypersurfaces and the circle method. <i>arXiv</i>. doi:<a href=\"https://doi.org/10.48550/arXiv.2412.14923\">10.48550/arXiv.2412.14923</a>"},"language":[{"iso":"eng"}],"date_updated":"2025-04-15T08:05:40Z","date_published":"2024-12-19T00:00:00Z","department":[{"_id":"TiBr"}],"year":"2024","article_processing_charge":"No","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","short":"CC BY (4.0)"},"author":[{"id":"d6423cba-dc74-11ea-a0a7-ee61689ff5fb","first_name":"Jakob","full_name":"Glas, Jakob","last_name":"Glas"},{"last_name":"Hase-Liu","full_name":"Hase-Liu, Matthew ","first_name":"Matthew "}],"oa":1,"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2412.14923"}],"doi":"10.48550/arXiv.2412.14923","publication_status":"draft","abstract":[{"lang":"eng","text":"We study the singularities of the moduli space of degree e maps from smooth genus g curves to an arbitrary smooth hypersurface of low degree. For e large compared to g, we show that these moduli spaces have at worst terminal singularities. Our main approach is to study the jet schemes of these moduli spaces by developing a suitable form of the circle method."}],"date_created":"2025-02-07T12:04:11Z","oa_version":"Preprint","type":"preprint","status":"public"},{"language":[{"iso":"eng"}],"citation":{"chicago":"Sakai, Hiroyuki, Christian Freude, Thomas Auzinger, David Hahn, and Michael Wimmer. “A Statistical Approach to Monte Carlo Denoising.” In <i>Proceedings - SIGGRAPH Asia 2024 Conference Papers</i>. Association for Computing Machinery, 2024. <a href=\"https://doi.org/10.1145/3680528.3687591\">https://doi.org/10.1145/3680528.3687591</a>.","apa":"Sakai, H., Freude, C., Auzinger, T., Hahn, D., &#38; Wimmer, M. (2024). A statistical approach to Monte Carlo denoising. In <i>Proceedings - SIGGRAPH Asia 2024 Conference Papers</i>. Tokyo, Japan: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3680528.3687591\">https://doi.org/10.1145/3680528.3687591</a>","ama":"Sakai H, Freude C, Auzinger T, Hahn D, Wimmer M. A statistical approach to Monte Carlo denoising. In: <i>Proceedings - SIGGRAPH Asia 2024 Conference Papers</i>. Association for Computing Machinery; 2024. doi:<a href=\"https://doi.org/10.1145/3680528.3687591\">10.1145/3680528.3687591</a>","mla":"Sakai, Hiroyuki, et al. “A Statistical Approach to Monte Carlo Denoising.” <i>Proceedings - SIGGRAPH Asia 2024 Conference Papers</i>, 68, Association for Computing Machinery, 2024, doi:<a href=\"https://doi.org/10.1145/3680528.3687591\">10.1145/3680528.3687591</a>.","short":"H. Sakai, C. Freude, T. Auzinger, D. Hahn, M. Wimmer, in:, Proceedings - SIGGRAPH Asia 2024 Conference Papers, Association for Computing Machinery, 2024.","ieee":"H. Sakai, C. Freude, T. Auzinger, D. Hahn, and M. Wimmer, “A statistical approach to Monte Carlo denoising,” in <i>Proceedings - SIGGRAPH Asia 2024 Conference Papers</i>, Tokyo, Japan, 2024.","ista":"Sakai H, Freude C, Auzinger T, Hahn D, Wimmer M. 2024. A statistical approach to Monte Carlo denoising. Proceedings - SIGGRAPH Asia 2024 Conference Papers. SA: SIGGRAPH Asia, 68."},"article_number":"68","has_accepted_license":"1","file_date_updated":"2025-04-15T12:53:24Z","scopus_import":"1","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"12","conference":{"end_date":"2024-12-06","location":"Tokyo, Japan","start_date":"2024-12-03","name":"SA: SIGGRAPH Asia"},"publication_identifier":{"isbn":["9798400711312"]},"file":[{"content_type":"application/pdf","creator":"dernst","file_id":"19563","success":1,"access_level":"open_access","date_created":"2025-04-15T12:53:24Z","relation":"main_file","checksum":"89f63b9237224362ec33430af9152700","file_size":14791980,"date_updated":"2025-04-15T12:53:24Z","file_name":"2024_SIGGRAPH_Sakai.pdf"}],"acknowledgement":"We would like to thank Lukas Lipp for fruitful discussions, Károly Zsolnai-Fehér and Jaroslav Křivánek for valuable contributions to early versions of this work, and Bernhard Kerbl for help with our CUDA implementation. Moreover, we would like to thank the creators of the scenes we have used: Wig42 for “Wooden Staircase” (Fig. 1), “Grey and White Room” (Fig. S6), and “Modern Living Room” (Fig. S8); nacimus for “Bathroom” (Fig. 3, S5); NovaZeeke for “Japanese Classroom” (Fig. 4, 6); Beeple for “Zero-Day” (Fig. 8); Jay-Artist for “White Room” (Fig. S7); Mareck for “Contemporary Bathroom” (Fig. 2); Christian Freude for “Glass Caustics” (Fig. S10); and Benedikt Bitterli for “Veach Ajar” (Fig. 7, S2), “Veach MIS” (Fig. S4), and “Fur Ball” (Fig. S11). This work has received funding from the Vienna Science and Technology Fund (WWTF) project ICT22-028 (“Toward Optimal Path Guiding for Photorealistic Rendering”) and the Austrian Science Fund (FWF) project F 77 (SFB “Advanced Computational Design”).","OA_type":"hybrid","external_id":{"isi":["001441591200068"]},"status":"public","isi":1,"type":"conference","oa_version":"Published Version","publication_status":"published","doi":"10.1145/3680528.3687591","author":[{"full_name":"Sakai, Hiroyuki","last_name":"Sakai","first_name":"Hiroyuki"},{"full_name":"Freude, Christian","last_name":"Freude","first_name":"Christian"},{"orcid":"0000-0002-1546-3265","id":"4718F954-F248-11E8-B48F-1D18A9856A87","first_name":"Thomas","last_name":"Auzinger","full_name":"Auzinger, Thomas"},{"full_name":"Hahn, David","last_name":"Hahn","id":"357A6A66-F248-11E8-B48F-1D18A9856A87","first_name":"David"},{"last_name":"Wimmer","full_name":"Wimmer, Michael","first_name":"Michael"}],"article_processing_charge":"Yes (in subscription journal)","date_updated":"2025-12-02T13:58:56Z","ddc":["000"],"publisher":"Association for Computing Machinery","day":"03","_id":"19028","title":"A statistical approach to Monte Carlo denoising","OA_place":"publisher","publication":"Proceedings - SIGGRAPH Asia 2024 Conference Papers","quality_controlled":"1","date_created":"2025-02-16T23:02:34Z","abstract":[{"text":"The stochastic nature of modern Monte Carlo (MC) rendering methods inevitably produces noise in rendered images for a practical number of samples per pixel. The problem of denoising these images has been widely studied, with most recent methods relying on data-driven, pretrained neural networks. In contrast, in this paper we propose a statistical approach to the denoising problem, treating each pixel as a random variable and reasoning about its distribution. Considering a pixel of the noisy rendered image, we formulate fast pair-wise statistical tests—based on online estimators—to decide which of the nearby pixels to exclude from the denoising filter. We show that for symmetric pixel weights and normally distributed samples, the classical Welch t-test is optimal in terms of mean squared error. We then show how to extend this result to handle non-normal distributions, using more recent confidence-interval formulations in combination with the Box-Cox transformation. Our results show that our statistical denoising approach matches the performance of state-of-the-art neural image denoising without having to resort to any computation-intensive pretraining. Furthermore, our approach easily generalizes to other quantities besides pixel intensity, which we demonstrate by showing additional applications to Russian roulette path termination and multiple importance sampling.","lang":"eng"}],"oa":1,"tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","short":"CC BY (4.0)"},"year":"2024","date_published":"2024-12-03T00:00:00Z"},{"oa":1,"tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","short":"CC BY (4.0)"},"year":"2024","issue":"13","date_published":"2024-07-01T00:00:00Z","quality_controlled":"1","abstract":[{"lang":"eng","text":"This paper corrects an error in an earlier work of the author."}],"date_created":"2025-02-18T07:15:50Z","intvolume":"      2024","article_type":"original","title":"The polynomial sieve and equal sums of like polynomials","volume":2024,"OA_place":"publisher","publication":"International Mathematics Research Notices","ddc":["510"],"publisher":"Oxford University Press","day":"01","_id":"19051","author":[{"full_name":"Browning, Timothy D","last_name":"Browning","orcid":"0000-0002-8314-0177","id":"35827D50-F248-11E8-B48F-1D18A9856A87","first_name":"Timothy D"}],"article_processing_charge":"Yes (via OA deal)","department":[{"_id":"TiBr"}],"date_updated":"2025-09-09T12:16:45Z","status":"public","isi":1,"type":"journal_article","oa_version":"Published Version","publication_status":"published","doi":"10.1093/imrn/rnae066","month":"07","user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","publication_identifier":{"issn":["1073-7928"],"eissn":["1687-0247"]},"page":"10165-10168","file":[{"content_type":"application/pdf","creator":"dernst","access_level":"open_access","success":1,"date_created":"2025-02-18T07:56:36Z","file_id":"19052","file_size":205750,"relation":"main_file","checksum":"b625b8adf018d2a97591813c1fc17b96","date_updated":"2025-02-18T07:56:36Z","file_name":"2024_IMRN_Browning.pdf"}],"related_material":{"record":[{"id":"254","relation":"earlier_version","status":"public"}]},"external_id":{"isi":["001196957300001"]},"OA_type":"hybrid","language":[{"iso":"eng"}],"citation":{"ama":"Browning TD. The polynomial sieve and equal sums of like polynomials. <i>International Mathematics Research Notices</i>. 2024;2024(13):10165-10168. doi:<a href=\"https://doi.org/10.1093/imrn/rnae066\">10.1093/imrn/rnae066</a>","chicago":"Browning, Timothy D. “The Polynomial Sieve and Equal Sums of like Polynomials.” <i>International Mathematics Research Notices</i>. Oxford University Press, 2024. <a href=\"https://doi.org/10.1093/imrn/rnae066\">https://doi.org/10.1093/imrn/rnae066</a>.","apa":"Browning, T. D. (2024). The polynomial sieve and equal sums of like polynomials. <i>International Mathematics Research Notices</i>. Oxford University Press. <a href=\"https://doi.org/10.1093/imrn/rnae066\">https://doi.org/10.1093/imrn/rnae066</a>","mla":"Browning, Timothy D. “The Polynomial Sieve and Equal Sums of like Polynomials.” <i>International Mathematics Research Notices</i>, vol. 2024, no. 13, Oxford University Press, 2024, pp. 10165–68, doi:<a href=\"https://doi.org/10.1093/imrn/rnae066\">10.1093/imrn/rnae066</a>.","ista":"Browning TD. 2024. The polynomial sieve and equal sums of like polynomials. International Mathematics Research Notices. 2024(13), 10165–10168.","short":"T.D. Browning, International Mathematics Research Notices 2024 (2024) 10165–10168.","ieee":"T. D. Browning, “The polynomial sieve and equal sums of like polynomials,” <i>International Mathematics Research Notices</i>, vol. 2024, no. 13. Oxford University Press, pp. 10165–10168, 2024."},"has_accepted_license":"1","corr_author":"1","file_date_updated":"2025-02-18T07:56:36Z","scopus_import":"1"},{"related_material":{"link":[{"relation":"software","url":" https://github.com/egozverev/Shold-It-Be-Executed-Or-Processed"}]},"file":[{"file_size":530972,"relation":"main_file","checksum":"35eb43968684b87be59144603ef10af0","date_updated":"2025-02-20T10:11:45Z","file_name":"2403.06833v3.pdf","content_type":"application/pdf","creator":"ezverev","access_level":"open_access","success":1,"date_created":"2025-02-20T10:11:45Z","file_id":"19064"}],"acknowledgement":"The authors would like to sincerely thank Juan Rocamonde for valuable feedback to our manuscript. We acknowledge the support from the Scientific Service Units (SSU) of ISTA through resources provided by Scientific Computing (SciComp). We thank Dan Alistarh for providing us with computational resources. This work was partially funded by the German Federal Ministry of Education and Research (BMBF) under the grant AIgenCY (16KIS2012) and ELSA – European Lighthouse on Secure and Safe AI funded by the European Union under grant agreement No. 101070617. Views and opinions expressed are however those of the authors only and do not necessarily reflect those of the European Union or European Commission. Neither the European Union nor the European Commission can be held responsible for them.","acknowledged_ssus":[{"_id":"ScienComp"}],"external_id":{"arxiv":["2403.06833"]},"OA_type":"green","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"03","corr_author":"1","file_date_updated":"2025-02-20T10:11:45Z","citation":{"mla":"Zverev, Egor, et al. “Can LLMs Separate Instructions from Data? And What Do We Even Mean by That?” <i>ArXiv</i>, 2403.06833, 2024, doi:<a href=\"https://doi.org/10.48550/arXiv.2403.06833\">10.48550/arXiv.2403.06833</a>.","short":"E. Zverev, S. Abdelnabi, S. Tabesh, M. Fritz, C. Lampert, ArXiv (2024).","ieee":"E. Zverev, S. Abdelnabi, S. Tabesh, M. Fritz, and C. Lampert, “Can LLMs separate instructions from data? And what do we even mean by that?,” <i>arXiv</i>. 2024.","ista":"Zverev E, Abdelnabi S, Tabesh S, Fritz M, Lampert C. 2024. Can LLMs separate instructions from data? And what do we even mean by that? arXiv, 2403.06833.","chicago":"Zverev, Egor, Sahar Abdelnabi, Soroush Tabesh, Mario Fritz, and Christoph Lampert. “Can LLMs Separate Instructions from Data? And What Do We Even Mean by That?” <i>ArXiv</i>, 2024. <a href=\"https://doi.org/10.48550/arXiv.2403.06833\">https://doi.org/10.48550/arXiv.2403.06833</a>.","apa":"Zverev, E., Abdelnabi, S., Tabesh, S., Fritz, M., &#38; Lampert, C. (2024). Can LLMs separate instructions from data? And what do we even mean by that? <i>arXiv</i>. <a href=\"https://doi.org/10.48550/arXiv.2403.06833\">https://doi.org/10.48550/arXiv.2403.06833</a>","ama":"Zverev E, Abdelnabi S, Tabesh S, Fritz M, Lampert C. Can LLMs separate instructions from data? And what do we even mean by that? <i>arXiv</i>. 2024. doi:<a href=\"https://doi.org/10.48550/arXiv.2403.06833\">10.48550/arXiv.2403.06833</a>"},"has_accepted_license":"1","article_number":"2403.06833","language":[{"iso":"eng"}],"date_updated":"2025-02-24T12:52:23Z","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"author":[{"full_name":"Zverev, Egor","last_name":"Zverev","id":"05162b19-1340-11ed-8f02-fa94e0e8c3bc","first_name":"Egor"},{"first_name":"Sahar","last_name":"Abdelnabi","full_name":"Abdelnabi, Sahar"},{"full_name":"Tabesh, Soroush","last_name":"Tabesh","id":"06000900-6068-11ef-8d61-c2472ef2e752","orcid":"0009-0003-4119-6281","first_name":"Soroush"},{"full_name":"Fritz, Mario","last_name":"Fritz","first_name":"Mario"},{"first_name":"Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","full_name":"Lampert, Christoph"}],"article_processing_charge":"No","publication_status":"published","doi":"10.48550/arXiv.2403.06833","type":"preprint","oa_version":"Preprint","status":"public","publication":"arXiv","OA_place":"repository","title":"Can LLMs separate instructions from data? And what do we even mean by that?","_id":"19063","arxiv":1,"day":"01","ddc":["000"],"date_published":"2024-03-01T00:00:00Z","year":"2024","tmp":{"name":"Creative Commons Attribution-ShareAlike 4.0 International Public License (CC BY-SA 4.0)","legal_code_url":"https://creativecommons.org/licenses/by-sa/4.0/legalcode","image":"/images/cc_by_sa.png","short":"CC BY-SA (4.0)"},"oa":1,"date_created":"2025-02-20T10:13:42Z","abstract":[{"lang":"eng","text":"Instruction-tuned Large Language Models (LLMs) show impressive results in numerous practical applications, but they lack essential safety features that are common in other areas of computer science, particularly an explicit separation of instructions and data. This makes them vulnerable to manipulations such as indirect prompt injections and generally unsuitable for safety-critical tasks. Surprisingly, there is currently no established definition or benchmark to quantify this phenomenon. In this work, we close this gap by introducing a formal measure for instruction-data separation and an empirical variant that is calculable from a model's outputs. We also present a new dataset, SEP, that allows estimating the measure for real-world models. Our results on various LLMs show that the problem of instruction-data separation is real: all models fail to achieve high separation, and canonical mitigation techniques, such as prompt engineering and fine-tuning, either fail to substantially improve separation or reduce model utility. The source code and SEP dataset are openly accessible at https://github.com/egozverev/Shold-It-Be-Executed-Or-Processed.\r\n"}],"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2403.06833"}]},{"_id":"19307","publisher":"Zenodo","corr_author":"1","day":"21","citation":{"ama":"Hwong Y-L, Muller CJ. Data - The unreasonable efficiency of total rain evaporation removal in triggering convective self-aggregation. 2024. doi:<a href=\"https://doi.org/10.5281/ZENODO.10687169\">10.5281/ZENODO.10687169</a>","apa":"Hwong, Y.-L., &#38; Muller, C. J. (2024). Data - The unreasonable efficiency of total rain evaporation removal in triggering convective self-aggregation. Zenodo. <a href=\"https://doi.org/10.5281/ZENODO.10687169\">https://doi.org/10.5281/ZENODO.10687169</a>","chicago":"Hwong, Yi-Ling, and Caroline J Muller. “Data - The Unreasonable Efficiency of Total Rain Evaporation Removal in Triggering Convective Self-Aggregation.” Zenodo, 2024. <a href=\"https://doi.org/10.5281/ZENODO.10687169\">https://doi.org/10.5281/ZENODO.10687169</a>.","ista":"Hwong Y-L, Muller CJ. 2024. Data - The unreasonable efficiency of total rain evaporation removal in triggering convective self-aggregation, Zenodo, <a href=\"https://doi.org/10.5281/ZENODO.10687169\">10.5281/ZENODO.10687169</a>.","ieee":"Y.-L. Hwong and C. J. Muller, “Data - The unreasonable efficiency of total rain evaporation removal in triggering convective self-aggregation.” Zenodo, 2024.","short":"Y.-L. Hwong, C.J. Muller, (2024).","mla":"Hwong, Yi-Ling, and Caroline J. Muller. <i>Data - The Unreasonable Efficiency of Total Rain Evaporation Removal in Triggering Convective Self-Aggregation</i>. Zenodo, 2024, doi:<a href=\"https://doi.org/10.5281/ZENODO.10687169\">10.5281/ZENODO.10687169</a>."},"ddc":["550"],"has_accepted_license":"1","related_material":{"record":[{"id":"15186","relation":"used_in_publication","status":"public"}]},"OA_type":"green","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"02","OA_place":"repository","title":"Data - The unreasonable efficiency of total rain evaporation removal in triggering convective self-aggregation","abstract":[{"text":"This repository contains the data, scripts, SAM codes and files required to reproduce the results of the manuscript \"The Unreasonable Efficiency of Total Rain Evaporation Removal in Triggering Convective Self-Aggregation\" submitted to the Geophysical Research Letters (GRL).\r\n\r\nBrief description of project: This project aims to examine the impact of rain evaporation removal or reduction in the planetary boundary layer (PBL) on convective self aggregation (CSA). Non-rotating radiative-convective equilibrium (RCE) simulations were conducted with the System for Atmospheric Modeling (SAM) cloud resolving model. Rain evaporation in the lowest 1 km was progressively reduced and the effect on CSA was investigated. The physical processes underlying this type of aggregation (referred to in the manuscript as no-evaporation CSA, or NE-CSA) were analyzed and described. \r\nThe default SAM code base (version 6.10.8) can be downloaded from here: http://rossby.msrc.sunysb.edu/~marat/SAM.html","lang":"eng"}],"date_created":"2025-03-07T08:39:40Z","doi":"10.5281/ZENODO.10687169","main_file_link":[{"open_access":"1","url":"https://doi.org/10.5281/zenodo.8369509"}],"type":"research_data_reference","oa_version":"Published Version","status":"public","date_published":"2024-02-21T00:00:00Z","date_updated":"2025-09-04T13:16:39Z","year":"2024","department":[{"_id":"CaMu"}],"tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","short":"CC BY (4.0)"},"author":[{"first_name":"Yi-Ling","orcid":"0000-0001-9281-3479","id":"1217aa61-4dd1-11ec-9ac3-f2ba3f17ee22","last_name":"Hwong","full_name":"Hwong, Yi-Ling"},{"first_name":"Caroline J","id":"f978ccb0-3f7f-11eb-b193-b0e2bd13182b","orcid":"0000-0001-5836-5350","full_name":"Muller, Caroline J","last_name":"Muller"}],"article_processing_charge":"No","oa":1},{"oa":1,"tmp":{"legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","short":"CC BY (4.0)"},"alternative_title":["TMLR"],"year":"2024","date_published":"2024-04-12T00:00:00Z","quality_controlled":"1","abstract":[{"lang":"eng","text":"Continual learning is a subfield of machine learning, which aims to allow machine learning models to continuously learn on new data, by accumulating knowledge without forgetting what was learned in the past. In this work, we take a step back, and ask: \"Why should one care about continual learning in the first place?\". We set the stage by examining recent continual learning papers published at four major machine learning conferences, and show that memory-constrained settings dominate the field. Then, we discuss five open problems in machine learning, and even though they might seem unrelated to continual learning at first sight, we show that continual learning will inevitably be part of their solution. These problems are model editing, personalization and specialization, on-device learning, faster (re-)training and reinforcement learning. Finally, by comparing the desiderata from these unsolved problems and the current assumptions in continual learning, we highlight and discuss four future directions for continual learning research. We hope that this work offers an interesting perspective on the future of continual learning, while displaying its potential value and the paths we have to pursue in order to make it successful. This work is the result of the many discussions the authors had at the Dagstuhl seminar on Deep Continual Learning, in March 2023."}],"date_created":"2025-03-16T23:01:25Z","intvolume":"      2024","article_type":"original","title":"Continual learning: Applications and the road forward","volume":2024,"OA_place":"publisher","publication":"Transactions on Machine Learning Research","ddc":["000"],"day":"12","publisher":"Transactions on Machine Learning Research","arxiv":1,"_id":"19408","author":[{"last_name":"Verwimp","full_name":"Verwimp, Eli","first_name":"Eli"},{"full_name":"Aljundi, Rahaf","last_name":"Aljundi","first_name":"Rahaf"},{"last_name":"Ben-David","full_name":"Ben-David, Shai","first_name":"Shai"},{"first_name":"Matthias","full_name":"Bethge, Matthias","last_name":"Bethge"},{"first_name":"Andrea","last_name":"Cossu","full_name":"Cossu, Andrea"},{"last_name":"Gepperth","full_name":"Gepperth, Alexander","first_name":"Alexander"},{"first_name":"Tyler L.","last_name":"Hayes","full_name":"Hayes, Tyler L."},{"first_name":"Eyke","last_name":"Hüllermeier","full_name":"Hüllermeier, Eyke"},{"first_name":"Christopher","last_name":"Kanan","full_name":"Kanan, Christopher"},{"full_name":"Kudithipudi, Dhireesha","last_name":"Kudithipudi","first_name":"Dhireesha"},{"id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","first_name":"Christoph","last_name":"Lampert","full_name":"Lampert, Christoph"},{"last_name":"Mundt","full_name":"Mundt, Martin","first_name":"Martin"},{"last_name":"Pascanu","full_name":"Pascanu, Razvan","first_name":"Razvan"},{"first_name":"Adrian","full_name":"Popescu, Adrian","last_name":"Popescu"},{"first_name":"Andreas S.","last_name":"Tolias","full_name":"Tolias, Andreas S."},{"first_name":"Joost","full_name":"Van De Weijer, Joost","last_name":"Van De Weijer"},{"last_name":"Liu","full_name":"Liu, Bing","first_name":"Bing"},{"last_name":"Lomonaco","full_name":"Lomonaco, Vincenzo","first_name":"Vincenzo"},{"last_name":"Tuytelaars","full_name":"Tuytelaars, Tinne","first_name":"Tinne"},{"full_name":"Van De Ven, Gido M.","last_name":"Van De Ven","first_name":"Gido M."}],"article_processing_charge":"No","department":[{"_id":"ChLa"}],"date_updated":"2025-03-20T09:21:02Z","status":"public","oa_version":"Published Version","type":"journal_article","publication_status":"published","month":"04","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publication_identifier":{"eissn":["2835-8856"]},"OA_type":"diamond","external_id":{"arxiv":["2311.11908"]},"file":[{"content_type":"application/pdf","creator":"dernst","success":1,"access_level":"open_access","date_created":"2025-03-20T09:02:18Z","file_id":"19426","file_size":1367966,"relation":"main_file","checksum":"0714e12f7423cd098976ed9974561155","file_name":"2024_TMLR_Verwimp.pdf","date_updated":"2025-03-20T09:02:18Z"}],"language":[{"iso":"eng"}],"has_accepted_license":"1","citation":{"ama":"Verwimp E, Aljundi R, Ben-David S, et al. Continual learning: Applications and the road forward. <i>Transactions on Machine Learning Research</i>. 2024;2024.","chicago":"Verwimp, Eli, Rahaf Aljundi, Shai Ben-David, Matthias Bethge, Andrea Cossu, Alexander Gepperth, Tyler L. Hayes, et al. “Continual Learning: Applications and the Road Forward.” <i>Transactions on Machine Learning Research</i>. Transactions on Machine Learning Research, 2024.","apa":"Verwimp, E., Aljundi, R., Ben-David, S., Bethge, M., Cossu, A., Gepperth, A., … Van De Ven, G. M. (2024). Continual learning: Applications and the road forward. <i>Transactions on Machine Learning Research</i>. Transactions on Machine Learning Research.","mla":"Verwimp, Eli, et al. “Continual Learning: Applications and the Road Forward.” <i>Transactions on Machine Learning Research</i>, vol. 2024, Transactions on Machine Learning Research, 2024.","ista":"Verwimp E, Aljundi R, Ben-David S, Bethge M, Cossu A, Gepperth A, Hayes TL, Hüllermeier E, Kanan C, Kudithipudi D, Lampert C, Mundt M, Pascanu R, Popescu A, Tolias AS, Van De Weijer J, Liu B, Lomonaco V, Tuytelaars T, Van De Ven GM. 2024. Continual learning: Applications and the road forward. Transactions on Machine Learning Research. 2024.","short":"E. Verwimp, R. Aljundi, S. Ben-David, M. Bethge, A. Cossu, A. Gepperth, T.L. Hayes, E. Hüllermeier, C. Kanan, D. Kudithipudi, C. Lampert, M. Mundt, R. Pascanu, A. Popescu, A.S. Tolias, J. Van De Weijer, B. Liu, V. Lomonaco, T. Tuytelaars, G.M. Van De Ven, Transactions on Machine Learning Research 2024 (2024).","ieee":"E. Verwimp <i>et al.</i>, “Continual learning: Applications and the road forward,” <i>Transactions on Machine Learning Research</i>, vol. 2024. Transactions on Machine Learning Research, 2024."},"file_date_updated":"2025-03-20T09:02:18Z","scopus_import":"1"},{"intvolume":"         2","date_created":"2025-03-23T23:01:28Z","abstract":[{"text":"This Comment explores new approaches to enrich large-scale population data, including incorporating macro-environmental and digital health measures.","lang":"eng"}],"quality_controlled":"1","date_published":"2024-10-01T00:00:00Z","issue":"10","year":"2024","_id":"19446","day":"01","publisher":"Springer Nature","publication":"Nature Mental Health","volume":2,"article_type":"letter_note","title":"Large-scale population data enrichment in mental health research","doi":"10.1038/s44220-024-00316-z","publication_status":"published","oa_version":"None","type":"journal_article","status":"public","date_updated":"2025-03-25T08:28:39Z","department":[{"_id":"GaNo"}],"article_processing_charge":"No","author":[{"first_name":"Frauke","full_name":"Nees, Frauke","last_name":"Nees"},{"last_name":"Renner","full_name":"Renner, Paul","first_name":"Paul"},{"full_name":"Holz, Nathalie E.","last_name":"Holz","first_name":"Nathalie E."},{"first_name":"Elli","full_name":"Polemiti, Elli","last_name":"Polemiti"},{"full_name":"Siehl, Sebastian","last_name":"Siehl","first_name":"Sebastian"},{"first_name":"Sören","last_name":"Hese","full_name":"Hese, Sören"},{"full_name":"Schepanski, Kerstin","last_name":"Schepanski","first_name":"Kerstin"},{"full_name":"Schumann, Gunter","last_name":"Schumann","first_name":"Gunter"},{"full_name":"Walter, Henrik","last_name":"Walter","first_name":"Henrik"},{"full_name":"Heinz, Andreas","last_name":"Heinz","first_name":"Andreas"},{"first_name":"Markus","last_name":"Ralser","full_name":"Ralser, Markus"},{"first_name":"Sven","last_name":"Twardziok","full_name":"Twardziok, Sven"},{"full_name":"Vaidya, Nilakshi","last_name":"Vaidya","first_name":"Nilakshi"},{"first_name":"Antoine","last_name":"Bernas","full_name":"Bernas, Antoine"},{"full_name":"Serin, Emin","last_name":"Serin","first_name":"Emin"},{"last_name":"Jentsch","full_name":"Jentsch, Marcel","first_name":"Marcel"},{"first_name":"Esther","full_name":"Hitchen, Esther","last_name":"Hitchen"},{"last_name":"Kebir","full_name":"Kebir, Hedi","first_name":"Hedi"},{"first_name":"Tristram A.","full_name":"Lett, Tristram A.","last_name":"Lett"},{"last_name":"Roy","full_name":"Roy, Jean Charles","first_name":"Jean Charles"},{"last_name":"Eils","full_name":"Eils, Roland","first_name":"Roland"},{"first_name":"Ulrike Helene","last_name":"Taron","full_name":"Taron, Ulrike Helene"},{"first_name":"Tatjana","last_name":"Schütz","full_name":"Schütz, Tatjana"},{"first_name":"Jamie","full_name":"Banks, Jamie","last_name":"Banks"},{"full_name":"Banaschewski, Tobias","last_name":"Banaschewski","first_name":"Tobias"},{"first_name":"Karina","last_name":"Jansone","full_name":"Jansone, Karina"},{"full_name":"Christmann, Nina","last_name":"Christmann","first_name":"Nina"},{"last_name":"Meyer-Lindenberg","full_name":"Meyer-Lindenberg, Andreas","first_name":"Andreas"},{"first_name":"Heike","last_name":"Tost","full_name":"Tost, Heike"},{"full_name":"Holz, Nathalie","last_name":"Holz","first_name":"Nathalie"},{"full_name":"Schwarz, Emanuel","last_name":"Schwarz","first_name":"Emanuel"},{"last_name":"Stringaris","full_name":"Stringaris, Argyris","first_name":"Argyris"},{"last_name":"Neidhart","full_name":"Neidhart, Maja","first_name":"Maja"},{"full_name":"Seefried, Beke","last_name":"Seefried","first_name":"Beke"},{"last_name":"Aden","full_name":"Aden, Rieke","first_name":"Rieke"},{"first_name":"Ole A.","last_name":"Andreassen","full_name":"Andreassen, Ole A."},{"first_name":"Lars T.","last_name":"Westlye","full_name":"Westlye, Lars T."},{"first_name":"Dennis","full_name":"Van Der Meer, Dennis","last_name":"Van Der Meer"},{"last_name":"Fernandez","full_name":"Fernandez, Sara","first_name":"Sara"},{"first_name":"Rikka","full_name":"Kjelkenes, Rikka","last_name":"Kjelkenes"},{"first_name":"Helga","full_name":"Ask, Helga","last_name":"Ask"},{"first_name":"Michael","last_name":"Rapp","full_name":"Rapp, Michael"},{"first_name":"Mira","last_name":"Tschorn","full_name":"Tschorn, Mira"},{"last_name":"Böttger","full_name":"Böttger, Sarah Jane","first_name":"Sarah Jane"},{"last_name":"Marquand","full_name":"Marquand, Andre","first_name":"Andre"},{"id":"3E57A680-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-7673-7178","first_name":"Gaia","full_name":"Novarino, Gaia","last_name":"Novarino"},{"first_name":"Lena","id":"4406F586-F248-11E8-B48F-1D18A9856A87","last_name":"Marr","full_name":"Marr, Lena"},{"last_name":"Slater","full_name":"Slater, Mel","first_name":"Mel"},{"full_name":"Viapiana, Guillem Feixas","last_name":"Viapiana","first_name":"Guillem Feixas"},{"first_name":"Francisco Eiroa","full_name":"Orosa, Francisco Eiroa","last_name":"Orosa"},{"first_name":"Jaime","full_name":"Gallego, Jaime","last_name":"Gallego"},{"last_name":"Pastor","full_name":"Pastor, Alvaro","first_name":"Alvaro"},{"first_name":"Andreas J.","full_name":"Forstner, Andreas J.","last_name":"Forstner"},{"first_name":"Per","full_name":"Hoffmann, Per","last_name":"Hoffmann"},{"last_name":"Nöthen","full_name":"Nöthen, Markus M.","first_name":"Markus M."},{"last_name":"Claus","full_name":"Claus, Isabelle","first_name":"Isabelle"},{"last_name":"Miller","full_name":"Miller, Abigail","first_name":"Abigail"},{"first_name":"Carina M.","last_name":"Mathey","full_name":"Mathey, Carina M."},{"last_name":"Heilmann-Heimbach","full_name":"Heilmann-Heimbach, Stefanie","first_name":"Stefanie"},{"first_name":"Peter","full_name":"Sommer, Peter","last_name":"Sommer"},{"full_name":"Patraskaki, Myrto","last_name":"Patraskaki","first_name":"Myrto"},{"first_name":"Johannes","full_name":"Wilbertz, Johannes","last_name":"Wilbertz"},{"full_name":"Schmitt, Karen","last_name":"Schmitt","first_name":"Karen"},{"last_name":"Jirsa","full_name":"Jirsa, Viktor","first_name":"Viktor"},{"last_name":"Petkoski","full_name":"Petkoski, Spase","first_name":"Spase"},{"full_name":"Pitel, Séverine","last_name":"Pitel","first_name":"Séverine"},{"last_name":"Otten","full_name":"Otten, Lisa","first_name":"Lisa"},{"full_name":"Athanasiadis, Anastasios Polykarpos","last_name":"Athanasiadis","first_name":"Anastasios Polykarpos"},{"first_name":"Charlie","last_name":"Pearmund","full_name":"Pearmund, Charlie"},{"first_name":"Bernhard","full_name":"Spanlang, Bernhard","last_name":"Spanlang"},{"first_name":"Elena","full_name":"Alvarez, Elena","last_name":"Alvarez"},{"first_name":"Mavi","last_name":"Sanchez","full_name":"Sanchez, Mavi"},{"full_name":"Giner, Arantxa","last_name":"Giner","first_name":"Arantxa"},{"last_name":"Jia","full_name":"Jia, Tianye","first_name":"Tianye"},{"last_name":"Gong","full_name":"Gong, Yanting","first_name":"Yanting"},{"full_name":"Xia, Yunman","last_name":"Xia","first_name":"Yunman"},{"first_name":"Xiao","full_name":"Chang, Xiao","last_name":"Chang"},{"first_name":"Vince","full_name":"Calhoun, Vince","last_name":"Calhoun"},{"first_name":"Jingyu","last_name":"Liu","full_name":"Liu, Jingyu"},{"full_name":"Schwalber, Ameli","last_name":"Schwalber","first_name":"Ameli"},{"first_name":"Paul","last_name":"Thompson","full_name":"Thompson, Paul"},{"first_name":"Nicholas","last_name":"Clinton","full_name":"Clinton, Nicholas"},{"first_name":"Sylvane","full_name":"Desrivières, Sylvane","last_name":"Desrivières"},{"first_name":"Allan H.","full_name":"Young, Allan H.","last_name":"Young"},{"first_name":"Bernd","full_name":"Stahl, Bernd","last_name":"Stahl"},{"last_name":"Ogoh","full_name":"Ogoh, George","first_name":"George"}],"scopus_import":"1","citation":{"ista":"Nees F, Renner P, Holz NE, Polemiti E, Siehl S, Hese S, Schepanski K, Schumann G, Walter H, Heinz A, Ralser M, Twardziok S, Vaidya N, Bernas A, Serin E, Jentsch M, Hitchen E, Kebir H, Lett TA, Roy JC, Eils R, Taron UH, Schütz T, Banks J, Banaschewski T, Jansone K, Christmann N, Meyer-Lindenberg A, Tost H, Holz N, Schwarz E, Stringaris A, Neidhart M, Seefried B, Aden R, Andreassen OA, Westlye LT, Van Der Meer D, Fernandez S, Kjelkenes R, Ask H, Rapp M, Tschorn M, Böttger SJ, Marquand A, Novarino G, Marr L, Slater M, Viapiana GF, Orosa FE, Gallego J, Pastor A, Forstner AJ, Hoffmann P, Nöthen MM, Claus I, Miller A, Mathey CM, Heilmann-Heimbach S, Sommer P, Patraskaki M, Wilbertz J, Schmitt K, Jirsa V, Petkoski S, Pitel S, Otten L, Athanasiadis AP, Pearmund C, Spanlang B, Alvarez E, Sanchez M, Giner A, Jia T, Gong Y, Xia Y, Chang X, Calhoun V, Liu J, Schwalber A, Thompson P, Clinton N, Desrivières S, Young AH, Stahl B, Ogoh G. 2024. Large-scale population data enrichment in mental health research. Nature Mental Health. 2(10), 1124–1127.","ieee":"F. Nees <i>et al.</i>, “Large-scale population data enrichment in mental health research,” <i>Nature Mental Health</i>, vol. 2, no. 10. Springer Nature, pp. 1124–1127, 2024.","short":"F. Nees, P. Renner, N.E. Holz, E. Polemiti, S. Siehl, S. Hese, K. Schepanski, G. Schumann, H. Walter, A. Heinz, M. Ralser, S. Twardziok, N. Vaidya, A. Bernas, E. Serin, M. Jentsch, E. Hitchen, H. Kebir, T.A. Lett, J.C. Roy, R. Eils, U.H. Taron, T. Schütz, J. Banks, T. Banaschewski, K. Jansone, N. Christmann, A. Meyer-Lindenberg, H. Tost, N. Holz, E. Schwarz, A. Stringaris, M. Neidhart, B. Seefried, R. Aden, O.A. Andreassen, L.T. Westlye, D. Van Der Meer, S. Fernandez, R. Kjelkenes, H. Ask, M. Rapp, M. Tschorn, S.J. Böttger, A. Marquand, G. Novarino, L. Marr, M. Slater, G.F. Viapiana, F.E. Orosa, J. Gallego, A. Pastor, A.J. Forstner, P. Hoffmann, M.M. Nöthen, I. Claus, A. Miller, C.M. Mathey, S. Heilmann-Heimbach, P. Sommer, M. Patraskaki, J. Wilbertz, K. Schmitt, V. Jirsa, S. Petkoski, S. Pitel, L. Otten, A.P. Athanasiadis, C. Pearmund, B. Spanlang, E. Alvarez, M. Sanchez, A. Giner, T. Jia, Y. Gong, Y. Xia, X. Chang, V. Calhoun, J. Liu, A. Schwalber, P. Thompson, N. Clinton, S. Desrivières, A.H. Young, B. Stahl, G. Ogoh, Nature Mental Health 2 (2024) 1124–1127.","mla":"Nees, Frauke, et al. “Large-Scale Population Data Enrichment in Mental Health Research.” <i>Nature Mental Health</i>, vol. 2, no. 10, Springer Nature, 2024, pp. 1124–27, doi:<a href=\"https://doi.org/10.1038/s44220-024-00316-z\">10.1038/s44220-024-00316-z</a>.","ama":"Nees F, Renner P, Holz NE, et al. Large-scale population data enrichment in mental health research. <i>Nature Mental Health</i>. 2024;2(10):1124-1127. doi:<a href=\"https://doi.org/10.1038/s44220-024-00316-z\">10.1038/s44220-024-00316-z</a>","apa":"Nees, F., Renner, P., Holz, N. E., Polemiti, E., Siehl, S., Hese, S., … Ogoh, G. (2024). Large-scale population data enrichment in mental health research. <i>Nature Mental Health</i>. Springer Nature. <a href=\"https://doi.org/10.1038/s44220-024-00316-z\">https://doi.org/10.1038/s44220-024-00316-z</a>","chicago":"Nees, Frauke, Paul Renner, Nathalie E. Holz, Elli Polemiti, Sebastian Siehl, Sören Hese, Kerstin Schepanski, et al. “Large-Scale Population Data Enrichment in Mental Health Research.” <i>Nature Mental Health</i>. Springer Nature, 2024. <a href=\"https://doi.org/10.1038/s44220-024-00316-z\">https://doi.org/10.1038/s44220-024-00316-z</a>."},"language":[{"iso":"eng"}],"OA_type":"closed access","page":"1124-1127","acknowledgement":"Funded by the European Union. Complementary funding was received by the UK Research and Innovation (UKRI) under the UK government’s Horizon Europe funding guarantee (10041392 and 10038599). Views and opinions expressed are however those of the author(s) only and do not necessarily reflect those of the European Union, the European Health and Digital Executive Agency (HADEA) or UKRI. The European Union, HADEA and UKRI cannot be held responsible for them. This work received also support from Chinese Ministry for Science and Technology (MOST), the Horizon 2020-funded European Research Council Advanced Grant ‘STRATIFY’ (695313), the German Research Foundation (COPE; 675346; NE 1383/15-1 (CoviDrug)) and the National Natural Science Foundation of China grant 82150710554.","publication_identifier":{"eissn":["2731-6076"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"10"},{"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"03","page":"708-722.e5","external_id":{"pmid":["39719709"]},"OA_type":"closed access","publication_identifier":{"issn":["1550-4131"]},"citation":{"mla":"Douglass, Amelia M., et al. “Acute and Circadian Feedforward Regulation of Agouti-Related Peptide Hunger Neurons.” <i>Cell Metabolism</i>, vol. 37, no. 3, Elsevier, 2024, p. 708–722.e5, doi:<a href=\"https://doi.org/10.1016/j.cmet.2024.11.009\">10.1016/j.cmet.2024.11.009</a>.","short":"A.M. Douglass, H. Kucukdereli, J.C. Madara, D. Wang, C. Wu, E.D. Lowenstein, J. Tao, B.B. Lowell, Cell Metabolism 37 (2024) 708–722.e5.","ieee":"A. M. Douglass <i>et al.</i>, “Acute and circadian feedforward regulation of agouti-related peptide hunger neurons,” <i>Cell Metabolism</i>, vol. 37, no. 3. Elsevier, p. 708–722.e5, 2024.","ista":"Douglass AM, Kucukdereli H, Madara JC, Wang D, Wu C, Lowenstein ED, Tao J, Lowell BB. 2024. Acute and circadian feedforward regulation of agouti-related peptide hunger neurons. Cell Metabolism. 37(3), 708–722.e5.","ama":"Douglass AM, Kucukdereli H, Madara JC, et al. Acute and circadian feedforward regulation of agouti-related peptide hunger neurons. <i>Cell Metabolism</i>. 2024;37(3):708-722.e5. doi:<a href=\"https://doi.org/10.1016/j.cmet.2024.11.009\">10.1016/j.cmet.2024.11.009</a>","chicago":"Douglass, Amelia M., Hakan Kucukdereli, Joseph C. Madara, Daqing Wang, Chen Wu, Elijah D. Lowenstein, Jenkang Tao, and Bradford B. Lowell. “Acute and Circadian Feedforward Regulation of Agouti-Related Peptide Hunger Neurons.” <i>Cell Metabolism</i>. Elsevier, 2024. <a href=\"https://doi.org/10.1016/j.cmet.2024.11.009\">https://doi.org/10.1016/j.cmet.2024.11.009</a>.","apa":"Douglass, A. M., Kucukdereli, H., Madara, J. C., Wang, D., Wu, C., Lowenstein, E. D., … Lowell, B. B. (2024). Acute and circadian feedforward regulation of agouti-related peptide hunger neurons. <i>Cell Metabolism</i>. Elsevier. <a href=\"https://doi.org/10.1016/j.cmet.2024.11.009\">https://doi.org/10.1016/j.cmet.2024.11.009</a>"},"language":[{"iso":"eng"}],"extern":"1","scopus_import":"1","author":[{"full_name":"Douglass, Amelia May Barnett","last_name":"Douglass","id":"de5f6fda-80fb-11ef-996f-a8c4ecd8e289","orcid":"0000-0001-5398-6473","first_name":"Amelia May Barnett"},{"first_name":"Hakan","full_name":"Kucukdereli, Hakan","last_name":"Kucukdereli"},{"full_name":"Madara, Joseph C.","last_name":"Madara","first_name":"Joseph C."},{"full_name":"Wang, Daqing","last_name":"Wang","first_name":"Daqing"},{"last_name":"Wu","full_name":"Wu, Chen","first_name":"Chen"},{"first_name":"Elijah D.","full_name":"Lowenstein, Elijah D.","last_name":"Lowenstein"},{"full_name":"Tao, Jenkang","last_name":"Tao","first_name":"Jenkang"},{"last_name":"Lowell","full_name":"Lowell, Bradford B.","first_name":"Bradford B."}],"article_processing_charge":"No","date_updated":"2025-07-10T11:51:40Z","status":"public","publication_status":"published","doi":"10.1016/j.cmet.2024.11.009","type":"journal_article","oa_version":"None","volume":37,"article_type":"original","title":"Acute and circadian feedforward regulation of agouti-related peptide hunger neurons","publication":"Cell Metabolism","pmid":1,"_id":"19470","publisher":"Elsevier","day":"04","date_published":"2024-03-04T00:00:00Z","year":"2024","issue":"3","intvolume":"        37","date_created":"2025-04-03T12:27:39Z","abstract":[{"lang":"eng","text":"When food is freely available, eating occurs without energy deficit. While agouti-related peptide (AgRP) neurons are likely involved, their activation is thought to require negative energy balance. To investigate this, we implemented long-term, continuous in vivo fiber-photometry recordings in mice. We discovered new forms of AgRP neuron regulation, including fast pre-ingestive decreases in activity and unexpectedly rapid activation by fasting. Furthermore, AgRP neuron activity has a circadian rhythm that peaks concurrent with the daily feeding onset. Importantly, this rhythm persists when nutrition is provided via constant-rate gastric infusions. Hence, it is not secondary to a circadian feeding rhythm. The AgRP neuron rhythm is driven by the circadian clock, the suprachiasmatic nucleus (SCN), as SCN ablation abolishes the circadian rhythm in AgRP neuron activity and feeding. The SCN activates AgRP neurons via excitatory afferents from thyrotrophin-releasing hormone-expressing neurons in the dorsomedial hypothalamus (DMHTrh neurons) to drive daily feeding rhythms."}],"quality_controlled":"1"},{"publication_identifier":{"eissn":["1687-0247"],"issn":["1073-7928"]},"acknowledgement":"The author would like to thank Peter Koymans and Carlo Pagano for helpful discussions.","page":"7571-7593","external_id":{"arxiv":["2211.06062"]},"OA_type":"green","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"05","extern":"1","scopus_import":"1","language":[{"iso":"eng"}],"citation":{"mla":"Chan, Stephanie. “The 3-Isogeny Selmer Groups of the Elliptic Curves Y2=x3+n2.” <i>International Mathematics Research Notices</i>, vol. 2024, no. 9, Oxford University Press, 2024, pp. 7571–93, doi:<a href=\"https://doi.org/10.1093/imrn/rnad266\">10.1093/imrn/rnad266</a>.","short":"S. Chan, International Mathematics Research Notices 2024 (2024) 7571–7593.","ieee":"S. Chan, “The 3-isogeny selmer groups of the elliptic curves y2=x3+n2,” <i>International Mathematics Research Notices</i>, vol. 2024, no. 9. Oxford University Press, pp. 7571–7593, 2024.","ista":"Chan S. 2024. The 3-isogeny selmer groups of the elliptic curves y2=x3+n2. International Mathematics Research Notices. 2024(9), 7571–7593.","chicago":"Chan, Stephanie. “The 3-Isogeny Selmer Groups of the Elliptic Curves Y2=x3+n2.” <i>International Mathematics Research Notices</i>. Oxford University Press, 2024. <a href=\"https://doi.org/10.1093/imrn/rnad266\">https://doi.org/10.1093/imrn/rnad266</a>.","apa":"Chan, S. (2024). The 3-isogeny selmer groups of the elliptic curves y2=x3+n2. <i>International Mathematics Research Notices</i>. Oxford University Press. <a href=\"https://doi.org/10.1093/imrn/rnad266\">https://doi.org/10.1093/imrn/rnad266</a>","ama":"Chan S. The 3-isogeny selmer groups of the elliptic curves y2=x3+n2. <i>International Mathematics Research Notices</i>. 2024;2024(9):7571-7593. doi:<a href=\"https://doi.org/10.1093/imrn/rnad266\">10.1093/imrn/rnad266</a>"},"date_updated":"2025-07-10T11:51:44Z","author":[{"first_name":"Yik Tung","id":"c4c0afc8-9262-11ed-9231-d8b0bc743af1","orcid":"0000-0001-8467-4106","full_name":"Chan, Yik Tung","last_name":"Chan"}],"article_processing_charge":"No","type":"journal_article","oa_version":"Preprint","publication_status":"published","doi":"10.1093/imrn/rnad266","status":"public","publication":"International Mathematics Research Notices","article_type":"original","title":"The 3-isogeny selmer groups of the elliptic curves y2=x3+n2","volume":2024,"OA_place":"repository","publisher":"Oxford University Press","arxiv":1,"day":"01","_id":"19486","year":"2024","issue":"9","date_published":"2024-05-01T00:00:00Z","oa":1,"quality_controlled":"1","date_created":"2025-04-05T10:50:33Z","abstract":[{"text":"Consider the family of elliptic curves En:y2=x3+n2, where n varies over positive cubefree integers. There is a rational 3-isogeny ϕ from En to E^n:y2=x3−27n2 and a dual isogeny ϕ^:E^n→En. We show that for almost all n, the rank of Selϕ(En) is 0, and the rank of Selϕ^(E^n) is determined by the number of prime factors of n that are congruent to 2mod3 and the congruence class of nmod9.","lang":"eng"}],"intvolume":"      2024","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2211.06062"}]},{"citation":{"mla":"Modoranu, Ionut-Vlad, et al. “MICROADAM: Accurate Adaptive Optimization with Low Space Overhead and Provable Convergence.” <i>38th Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024.","ieee":"I.-V. Modoranu <i>et al.</i>, “MICROADAM: Accurate adaptive optimization with low space overhead and provable convergence,” in <i>38th Conference on Neural Information Processing Systems</i>, 2024, vol. 37.","short":"I.-V. Modoranu, M. Safaryan, G. Malinovsky, E. Kurtic, T. Robert, P. Richtárik, D.-A. Alistarh, in:, 38th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","ista":"Modoranu I-V, Safaryan M, Malinovsky G, Kurtic E, Robert T, Richtárik P, Alistarh D-A. 2024. MICROADAM: Accurate adaptive optimization with low space overhead and provable convergence. 38th Conference on Neural Information Processing Systems. , Advances in Neural Information Processing Systems, vol. 37.","ama":"Modoranu I-V, Safaryan M, Malinovsky G, et al. MICROADAM: Accurate adaptive optimization with low space overhead and provable convergence. In: <i>38th Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024.","chicago":"Modoranu, Ionut-Vlad, Mher Safaryan, Grigory Malinovsky, Eldar Kurtic, Thomas Robert, Peter Richtárik, and Dan-Adrian Alistarh. “MICROADAM: Accurate Adaptive Optimization with Low Space Overhead and Provable Convergence.” In <i>38th Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024.","apa":"Modoranu, I.-V., Safaryan, M., Malinovsky, G., Kurtic, E., Robert, T., Richtárik, P., &#38; Alistarh, D.-A. (2024). MICROADAM: Accurate adaptive optimization with low space overhead and provable convergence. In <i>38th Conference on Neural Information Processing Systems</i> (Vol. 37). Neural Information Processing Systems Foundation."},"language":[{"iso":"eng"}],"scopus_import":"1","project":[{"_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c","call_identifier":"H2020","name":"IST-BRIDGE: International postdoctoral program","grant_number":"101034413"}],"corr_author":"1","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"12","external_id":{"arxiv":["2405.15593"]},"OA_type":"green","acknowledged_ssus":[{"_id":"CampIT"}],"related_material":{"link":[{"relation":"software","url":"https://github.com/IST-DASLab/MicroAdam"}]},"acknowledgement":"The authors thank Razvan Pascanu, Mahdi Nikdan and Soroush Tabesh for their valuable feedback, the IT department from Institute of Science and Technology Austria for the hardware support and Weights and Biases for the infrastructure to track all our experiments. Mher Safaryan has received funding from the European Union’s Horizon 2020 research and innovation program under the Marie Sklodowska-Curie grant agreement No 101034413.","publication_identifier":{"issn":["1049-5258"]},"status":"public","publication_status":"published","oa_version":"Preprint","type":"conference","author":[{"first_name":"Ionut-Vlad","id":"449f7a18-f128-11eb-9611-9b430c0c6333","full_name":"Modoranu, Ionut-Vlad","last_name":"Modoranu"},{"id":"dd546b39-0804-11ed-9c55-ef075c39778d","first_name":"Mher","last_name":"Safaryan","full_name":"Safaryan, Mher"},{"first_name":"Grigory","last_name":"Malinovsky","full_name":"Malinovsky, Grigory"},{"full_name":"Kurtic, Eldar","last_name":"Kurtic","id":"47beb3a5-07b5-11eb-9b87-b108ec578218","first_name":"Eldar"},{"id":"de632733-1457-11f0-ae22-b5914b8c1c41","first_name":"Thomas","last_name":"Robert","full_name":"Robert, Thomas"},{"last_name":"Richtárik","full_name":"Richtárik, Peter","first_name":"Peter"},{"full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"}],"article_processing_charge":"No","date_updated":"2025-05-14T11:32:52Z","department":[{"_id":"DaAl"}],"_id":"19510","day":"20","publisher":"Neural Information Processing Systems Foundation","arxiv":1,"OA_place":"repository","volume":37,"title":"MICROADAM: Accurate adaptive optimization with low space overhead and provable convergence","ec_funded":1,"publication":"38th Conference on Neural Information Processing Systems","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2405.15593","open_access":"1"}],"abstract":[{"lang":"eng","text":"We propose a new variant of the Adam optimizer [Kingma and Ba, 2014] called\r\nMICROADAM that specifically minimizes memory overheads, while maintaining\r\ntheoretical convergence guarantees. We achieve this by compressing the gradient\r\ninformation before it is fed into the optimizer state, thereby reducing its memory\r\nfootprint significantly. We control the resulting compression error via a novel\r\ninstance of the classical error feedback mechanism from distributed optimization [Seide et al., 2014, Alistarh et al., 2018, Karimireddy et al., 2019] in which\r\nthe error correction information is itself compressed to allow for practical memory\r\ngains. We prove that the resulting approach maintains theoretical convergence\r\nguarantees competitive to those of AMSGrad, while providing good practical performance. Specifically, we show that MICROADAM can be implemented efficiently\r\non GPUs: on both million-scale (BERT) and billion-scale (LLaMA) models, MICROADAM provides practical convergence competitive to that of the uncompressed\r\nAdam baseline, with lower memory usage and similar running time. Our code is\r\navailable at https://github.com/IST-DASLab/MicroAdam."}],"date_created":"2025-04-06T22:01:32Z","intvolume":"        37","quality_controlled":"1","oa":1,"date_published":"2024-12-20T00:00:00Z","alternative_title":["Advances in Neural Information Processing Systems"],"year":"2024"},{"publication":"38th Conference on Neural Information Processing Systems","OA_place":"repository","volume":37,"title":"QuaRot: Outlier-free 4-bit inference in rotated LLMs","_id":"19511","publisher":"Neural Information Processing Systems Foundation","arxiv":1,"day":"20","date_published":"2024-12-20T00:00:00Z","year":"2024","alternative_title":["Advances in Neural Information Processing Systems"],"oa":1,"abstract":[{"lang":"eng","text":"We introduce QuaRot, a new Quantization scheme based on Rotations, which is able to quantize LLMs end-to-end, including all weights, activations, and KV cache in 4 bits. QuaRot rotates LLMs in a way that removes outliers from the hidden state without changing the output, making quantization easier. This computational invariance is applied to the hidden state (residual) of the LLM, as well as to the activations of the feed-forward components, aspects of the attention mechanism, and to the KV cache. The result is a quantized model where all matrix multiplications are performed in 4 bits, without any channels identified for retention in higher precision. Our 4-bit quantized LLAMA2-70B model has losses of at most 0.47 WikiText-2 perplexity and retains 99% of the zero-shot performance. We also show that QuaRot can provide lossless 6 and 8 bit LLAMA-2 models without any calibration data using round-to-nearest quantization. Code is available at github.com/spcl/QuaRot."}],"date_created":"2025-04-06T22:01:32Z","intvolume":"        37","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2404.00456","open_access":"1"}],"quality_controlled":"1","related_material":{"link":[{"relation":"software","url":"https://github.com/spcl/QuaRot"}]},"external_id":{"arxiv":["2404.00456"]},"OA_type":"green","publication_identifier":{"issn":["1049-5258"]},"month":"12","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","conference":{"name":"NeurIPS: Neural Information Processing Systems","start_date":"2024-12-09","location":"Vancouver, Canada","end_date":"2024-12-15"},"scopus_import":"1","citation":{"mla":"Ashkboos, Saleh, et al. “QuaRot: Outlier-Free 4-Bit Inference in Rotated LLMs.” <i>38th Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024.","short":"S. Ashkboos, A. Mohtashami, M.L. Croci, B. Li, P. Cameron, M. Jaggi, D.-A. Alistarh, T. Hoefler, J. Hensman, in:, 38th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","ieee":"S. Ashkboos <i>et al.</i>, “QuaRot: Outlier-free 4-bit inference in rotated LLMs,” in <i>38th Conference on Neural Information Processing Systems</i>, Vancouver, Canada, 2024, vol. 37.","ista":"Ashkboos S, Mohtashami A, Croci ML, Li B, Cameron P, Jaggi M, Alistarh D-A, Hoefler T, Hensman J. 2024. QuaRot: Outlier-free 4-bit inference in rotated LLMs. 38th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 37.","ama":"Ashkboos S, Mohtashami A, Croci ML, et al. QuaRot: Outlier-free 4-bit inference in rotated LLMs. In: <i>38th Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024.","chicago":"Ashkboos, Saleh, Amirkeivan Mohtashami, Maximilian L. Croci, Bo Li, Pashmina Cameron, Martin Jaggi, Dan-Adrian Alistarh, Torsten Hoefler, and James Hensman. “QuaRot: Outlier-Free 4-Bit Inference in Rotated LLMs.” In <i>38th Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024.","apa":"Ashkboos, S., Mohtashami, A., Croci, M. L., Li, B., Cameron, P., Jaggi, M., … Hensman, J. (2024). QuaRot: Outlier-free 4-bit inference in rotated LLMs. In <i>38th Conference on Neural Information Processing Systems</i> (Vol. 37). Vancouver, Canada: Neural Information Processing Systems Foundation."},"language":[{"iso":"eng"}],"date_updated":"2025-05-14T11:33:12Z","department":[{"_id":"DaAl"}],"article_processing_charge":"No","author":[{"first_name":"Saleh","full_name":"Ashkboos, Saleh","last_name":"Ashkboos"},{"last_name":"Mohtashami","full_name":"Mohtashami, Amirkeivan","first_name":"Amirkeivan"},{"full_name":"Croci, Maximilian L.","last_name":"Croci","first_name":"Maximilian L."},{"full_name":"Li, Bo","last_name":"Li","first_name":"Bo"},{"first_name":"Pashmina","last_name":"Cameron","full_name":"Cameron, Pashmina"},{"last_name":"Jaggi","full_name":"Jaggi, Martin","first_name":"Martin"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh"},{"first_name":"Torsten","full_name":"Hoefler, Torsten","last_name":"Hoefler"},{"last_name":"Hensman","full_name":"Hensman, James","first_name":"James"}],"publication_status":"published","type":"conference","oa_version":"Preprint","status":"public"},{"date_created":"2025-04-06T22:01:32Z","abstract":[{"lang":"eng","text":"Differential privacy with gradual expiration models the setting where data items\r\narrive in a stream and at a given time t the privacy loss guaranteed for a data item\r\nseen at time (t − d) is εg(d), where g is a monotonically non-decreasing function.\r\nWe study the fundamental continual (binary) counting problem where each data\r\nitem consists of a bit, and the algorithm needs to output at each time step the sum of\r\nall the bits streamed so far. For a stream of length T and privacy without expiration\r\ncontinual counting is possible with maximum (over all time steps) additive error\r\nO(log2\r\n(T)/ε) and the best known lower bound is Ω(log(T)/ε); closing this gap\r\nis a challenging open problem.\r\nWe show that the situation is very different for privacy with gradual expiration by\r\ngiving upper and lower bounds for a large set of expiration functions g. Specifically,\r\nour algorithm achieves an additive error of O(log(T)/ε) for a large set of privacy\r\nexpiration functions. We also give a lower bound that shows that if C is the additive\r\nerror of any ε-DP algorithm for this problem, then the product of C and the privacy\r\nexpiration function after 2C steps must be Ω(log(T)/ε). Our algorithm matches\r\nthis lower bound as its additive error is O(log(T)/ε), even when g(2C) = O(1).\r\nOur empirical evaluation shows that we achieve a slowly growing privacy loss\r\nwith significantly smaller empirical privacy loss for large values of d than a natural\r\nbaseline algorithm."}],"intvolume":"        37","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2406.03802","open_access":"1"}],"quality_controlled":"1","date_published":"2024-12-20T00:00:00Z","year":"2024","alternative_title":["Advances in Neural Information Processing Systems"],"oa":1,"_id":"19512","publisher":"Neural Information Processing Systems Foundation","arxiv":1,"day":"20","publication":"38th Conference on Neural Information Processing Systems","volume":37,"OA_place":"repository","ec_funded":1,"title":"Continual counting with gradual privacy expiration","publication_status":"published","type":"conference","oa_version":"Preprint","status":"public","date_updated":"2025-05-14T11:33:22Z","department":[{"_id":"MoHe"}],"author":[{"first_name":"Joel Daniel","last_name":"Andersson","full_name":"Andersson, Joel Daniel"},{"first_name":"Monika H","id":"540c9bbd-f2de-11ec-812d-d04a5be85630","orcid":"0000-0002-5008-6530","full_name":"Henzinger, Monika H","last_name":"Henzinger"},{"first_name":"Rasmus","full_name":"Pagh, Rasmus","last_name":"Pagh"},{"first_name":"Teresa Anna","last_name":"Steiner","full_name":"Steiner, Teresa Anna"},{"full_name":"Upadhyay, Jalaj","last_name":"Upadhyay","first_name":"Jalaj"}],"article_processing_charge":"No","scopus_import":"1","corr_author":"1","project":[{"call_identifier":"H2020","name":"The design and evaluation of modern fully dynamic data structures","_id":"bd9ca328-d553-11ed-ba76-dc4f890cfe62","grant_number":"101019564"},{"name":"Efficient algorithms","_id":"34def286-11ca-11ed-8bc3-da5948e1613c","grant_number":"Z00422"},{"name":"Static and Dynamic Hierarchical Graph Decompositions","_id":"bda196b2-d553-11ed-ba76-8e8ee6c21103","grant_number":"I05982"},{"name":"Fast Algorithms for a Reactive Network Layer","_id":"bd9e3a2e-d553-11ed-ba76-8aa684ce17fe","grant_number":"P33775"}],"citation":{"apa":"Andersson, J. D., Henzinger, M., Pagh, R., Steiner, T. A., &#38; Upadhyay, J. (2024). Continual counting with gradual privacy expiration. In <i>38th Conference on Neural Information Processing Systems</i> (Vol. 37). Vancouver, Canada: Neural Information Processing Systems Foundation.","chicago":"Andersson, Joel Daniel, Monika Henzinger, Rasmus Pagh, Teresa Anna Steiner, and Jalaj Upadhyay. “Continual Counting with Gradual Privacy Expiration.” In <i>38th Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024.","ama":"Andersson JD, Henzinger M, Pagh R, Steiner TA, Upadhyay J. Continual counting with gradual privacy expiration. In: <i>38th Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024.","ieee":"J. D. Andersson, M. Henzinger, R. Pagh, T. A. Steiner, and J. Upadhyay, “Continual counting with gradual privacy expiration,” in <i>38th Conference on Neural Information Processing Systems</i>, Vancouver, Canada, 2024, vol. 37.","short":"J.D. Andersson, M. Henzinger, R. Pagh, T.A. Steiner, J. Upadhyay, in:, 38th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","ista":"Andersson JD, Henzinger M, Pagh R, Steiner TA, Upadhyay J. 2024. Continual counting with gradual privacy expiration. 38th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 37.","mla":"Andersson, Joel Daniel, et al. “Continual Counting with Gradual Privacy Expiration.” <i>38th Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024."},"language":[{"iso":"eng"}],"acknowledgement":"Monika Henzinger: This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (Grant agreement No. 101019564) and the Austrian Science Fund (FWF) grant DOI 10.55776/Z422, grant DOI 10.55776/I5982, and grant DOI 10.55776/P33775 with additional funding from the netidee SCIENCE Stiftung, 2020–2024. Joel Daniel Andersson and Rasmus Pagh are affiliated with Basic Algorithms Research Copenhagen (BARC), supported by the VILLUM Foundation grant 16582, and are also supported by Providentia, a Data Science Distinguished Investigator grant from Novo Nordisk Fonden. Teresa Anna Steiner is supported by a research grant (VIL51463) from VILLUM FONDEN. This work was done while Teresa Anna Steiner was a Postdoc at the Technical University of Denmark. Jalaj Upadhyay’s research was funded by the Rutgers Decanal Grant no. 302918 and an unrestricted gift from Google.","OA_type":"green","external_id":{"arxiv":["2406.03802"]},"publication_identifier":{"issn":["1049-5258"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"12","conference":{"end_date":"2024-12-15","name":"NeurIPS: Neural Information Processing Systems","location":"Vancouver, Canada","start_date":"2024-12-09"}},{"quality_controlled":"1","date_created":"2025-04-06T22:01:32Z","intvolume":"        37","abstract":[{"lang":"eng","text":"Neural models learn data representations that lie on low-dimensional manifolds,\r\nyet modeling the relation between these representational spaces is an ongoing challenge. By integrating spectral geometry principles into neural modeling, we show\r\nthat this problem can be better addressed in the functional domain, mitigating complexity, while enhancing interpretability and performances on downstream tasks.\r\nTo this end, we introduce a multi-purpose framework to the representation learning\r\ncommunity, which allows to: (i) compare different spaces in an interpretable way\r\nand measure their intrinsic similarity; (ii) find correspondences between them, both\r\nin unsupervised and weakly supervised settings, and (iii) to effectively transfer\r\nrepresentations between distinct spaces. We validate our framework on various\r\napplications, ranging from stitching to retrieval tasks, and on multiple modalities,\r\ndemonstrating that Latent Functional Maps can serve as a swiss-army knife for\r\nrepresentation alignment"}],"main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2406.14183","open_access":"1"}],"oa":1,"year":"2024","alternative_title":["Advances in Neural Information Processing Systems"],"date_published":"2024-12-20T00:00:00Z","publisher":"Neural Information Processing Systems Foundation","arxiv":1,"day":"20","_id":"19515","ec_funded":1,"title":"Latent functional maps: A spectral framework for representation alignment","OA_place":"repository","volume":37,"publication":"38th Conference on Neural Information Processing Systems","status":"public","type":"conference","oa_version":"Preprint","publication_status":"published","author":[{"full_name":"Fumero, Marco","last_name":"Fumero","id":"1c1593eb-393f-11ef-bb8e-ab4f1e979650","first_name":"Marco"},{"last_name":"Pegoraro","full_name":"Pegoraro, Marco","first_name":"Marco"},{"full_name":"Maiorca, Valentino","last_name":"Maiorca","first_name":"Valentino"},{"id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","first_name":"Francesco","last_name":"Locatello","full_name":"Locatello, Francesco"},{"first_name":"Emanuele","full_name":"Rodolà, Emanuele","last_name":"Rodolà"}],"article_processing_charge":"No","department":[{"_id":"FrLo"}],"date_updated":"2025-05-14T11:36:51Z","language":[{"iso":"eng"}],"citation":{"ista":"Fumero M, Pegoraro M, Maiorca V, Locatello F, Rodolà E. 2024. Latent functional maps: A spectral framework for representation alignment. 38th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 37.","short":"M. Fumero, M. Pegoraro, V. Maiorca, F. Locatello, E. Rodolà, in:, 38th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","ieee":"M. Fumero, M. Pegoraro, V. Maiorca, F. Locatello, and E. Rodolà, “Latent functional maps: A spectral framework for representation alignment,” in <i>38th Conference on Neural Information Processing Systems</i>, Vancouver, Canada, 2024, vol. 37.","mla":"Fumero, Marco, et al. “Latent Functional Maps: A Spectral Framework for Representation Alignment.” <i>38th Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024.","ama":"Fumero M, Pegoraro M, Maiorca V, Locatello F, Rodolà E. Latent functional maps: A spectral framework for representation alignment. In: <i>38th Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024.","apa":"Fumero, M., Pegoraro, M., Maiorca, V., Locatello, F., &#38; Rodolà, E. (2024). Latent functional maps: A spectral framework for representation alignment. In <i>38th Conference on Neural Information Processing Systems</i> (Vol. 37). Vancouver, Canada: Neural Information Processing Systems Foundation.","chicago":"Fumero, Marco, Marco Pegoraro, Valentino Maiorca, Francesco Locatello, and Emanuele Rodolà. “Latent Functional Maps: A Spectral Framework for Representation Alignment.” In <i>38th Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024."},"corr_author":"1","project":[{"name":"IST-BRIDGE: International postdoctoral program","call_identifier":"H2020","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c","grant_number":"101034413"}],"scopus_import":"1","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"12","conference":{"end_date":"2024-12-15","location":"Vancouver, Canada","start_date":"2024-12-09","name":"NeurIPS: Neural Information Processing Systems"},"publication_identifier":{"issn":["1049-5258"]},"acknowledgement":"MF is supported by the MSCA IST-Bridge fellowship which has received funding from the European Union’s Horizon 2020 research and innovation program under the Marie Skłodowska-Curie grant agreement No 101034413. ER and VM are supported by the PNRR MUR project PE0000013-FAIR. MP is supported by the Sapienza grant \"Predicting and Explaining Clinical Trial Outcomes\", prot. RG12218166FA3F13.","external_id":{"arxiv":["2406.14183"]},"OA_type":"green"},{"year":"2024","alternative_title":["Advances in Neural Information Processing Systems"],"date_published":"2024-12-20T00:00:00Z","oa":1,"quality_controlled":"1","date_created":"2025-04-06T22:01:32Z","intvolume":"        37","abstract":[{"text":"In this paper, we present a novel data-free method for merging neural networks in weight space. Differently from most existing works, our method optimizes for the permutations of network neurons globally across all layers. This allows us to enforce cycle consistency of the permutations when merging n ≥ 3 models, allowing circular compositions of permutations to be computed without accumulating error along the path. We qualitatively and quantitatively motivate the need for such a constraint, showing its benefits when merging sets of models in scenarios spanning varying architectures and datasets. We finally show that, when coupled\r\nwith activation renormalization, our approach yields the best results in the task.","lang":"eng"}],"main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2405.17897","open_access":"1"}],"publication":"38th Conference on Neural Information Processing Systems","ec_funded":1,"title":"C2M3: Cycle-consistent multi-model merging","OA_place":"repository","volume":37,"publisher":"Neural Information Processing Systems Foundation","arxiv":1,"day":"20","_id":"19517","department":[{"_id":"FrLo"}],"date_updated":"2025-05-14T11:36:59Z","author":[{"last_name":"Crisostomi","full_name":"Crisostomi, Donato","first_name":"Donato"},{"first_name":"Marco","id":"1c1593eb-393f-11ef-bb8e-ab4f1e979650","last_name":"Fumero","full_name":"Fumero, Marco"},{"full_name":"Baieri, Daniele","last_name":"Baieri","first_name":"Daniele"},{"last_name":"Bernard","full_name":"Bernard, Florian","first_name":"Florian"},{"last_name":"Rodolà","full_name":"Rodolà, Emanuele","first_name":"Emanuele"}],"article_processing_charge":"No","type":"conference","oa_version":"Preprint","publication_status":"published","status":"public","publication_identifier":{"issn":["1049-5258"]},"acknowledgement":"This work is supported by the ERC grant no.802554 (SPECGEO), PRIN 2020 project\r\nno.2020TA3K9N (LEGO.AI), and PNRR MUR project PE0000013-FAIR. Marco Fumero is supported by the MSCA IST-Bridge fellowship which has received funding from the European Union’s Horizon 2020 research and innovation program under the Marie Skłodowska-Curie grant agreement No 101034413. We thank Simone Scardapane for the helpful feedback on the paper.","external_id":{"arxiv":["2405.17897"]},"OA_type":"green","month":"12","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","conference":{"name":"NeurIPS: Neural Information Processing Systems","location":"Vancouver, Canada","start_date":"2024-12-09","end_date":"2024-12-15"},"corr_author":"1","project":[{"grant_number":"101034413","name":"IST-BRIDGE: International postdoctoral program","call_identifier":"H2020","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c"}],"scopus_import":"1","language":[{"iso":"eng"}],"citation":{"ama":"Crisostomi D, Fumero M, Baieri D, Bernard F, Rodolà E. C2M3: Cycle-consistent multi-model merging. In: <i>38th Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024.","chicago":"Crisostomi, Donato, Marco Fumero, Daniele Baieri, Florian Bernard, and Emanuele Rodolà. “C2M3: Cycle-Consistent Multi-Model Merging.” In <i>38th Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024.","apa":"Crisostomi, D., Fumero, M., Baieri, D., Bernard, F., &#38; Rodolà, E. (2024). C2M3: Cycle-consistent multi-model merging. In <i>38th Conference on Neural Information Processing Systems</i> (Vol. 37). Vancouver, Canada: Neural Information Processing Systems Foundation.","mla":"Crisostomi, Donato, et al. “C2M3: Cycle-Consistent Multi-Model Merging.” <i>38th Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024.","short":"D. Crisostomi, M. Fumero, D. Baieri, F. Bernard, E. Rodolà, in:, 38th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","ieee":"D. Crisostomi, M. Fumero, D. Baieri, F. Bernard, and E. Rodolà, “C2M3: Cycle-consistent multi-model merging,” in <i>38th Conference on Neural Information Processing Systems</i>, Vancouver, Canada, 2024, vol. 37.","ista":"Crisostomi D, Fumero M, Baieri D, Bernard F, Rodolà E. 2024. C2M3: Cycle-consistent multi-model merging. 38th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 37."}}]