[{"author":[{"last_name":"Montagna","first_name":"Francesco","id":"353afc8e-19f4-11f0-9db9-811f1723c83f","full_name":"Montagna, Francesco"}],"ddc":["000"],"date_published":"2026-02-11T00:00:00Z","corr_author":"1","OA_type":"gold","has_accepted_license":"1","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"publisher":"OpenReview","date_updated":"2026-02-16T08:15:11Z","status":"public","publication_status":"accepted","day":"11","year":"2026","title":"On the identifiability of causal graphs with multiple environments","publication":"The 14th International Conference on Learning Representations","abstract":[{"text":"Causal discovery from i.i.d. observational data is known to be generally ill-posed. We demonstrate that if we have access to the distribution induced by a structural causal model, and additional data from (in the best case) only two environments that sufficiently differ in the noise statistics, the unique causal graph is identifiable. Notably, this is the first result in the literature that guarantees the entire causal graph recovery with a constant number of environments and arbitrary nonlinear mechanisms. Our only constraint is the Gaussianity of the noise terms; however, we propose potential ways to relax this requirement. Of interest on its own, we expand on the well-known duality between independent component analysis (ICA) and causal discovery; recent advancements have shown that nonlinear ICA can be solved from multiple environments, at least as many as the number of sources: we show that the same can be achieved for causal discovery while having access to much less auxiliary information.","lang":"eng"}],"article_processing_charge":"No","conference":{"end_date":"2026-04-27","start_date":"2026-04-23","name":"ICLR: International Conference on Learning Representations","location":"Rio de Janeiro, Brazil"},"external_id":{"arxiv":["2510.13583"]},"arxiv":1,"citation":{"ista":"Montagna F. On the identifiability of causal graphs with multiple environments. The 14th International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","apa":"Montagna, F. (n.d.). On the identifiability of causal graphs with multiple environments. In <i>The 14th International Conference on Learning Representations</i>. Rio de Janeiro, Brazil: OpenReview.","short":"F. Montagna, in:, The 14th International Conference on Learning Representations, OpenReview, n.d.","ieee":"F. Montagna, “On the identifiability of causal graphs with multiple environments,” in <i>The 14th International Conference on Learning Representations</i>, Rio de Janeiro, Brazil.","mla":"Montagna, Francesco. “On the Identifiability of Causal Graphs with Multiple Environments.” <i>The 14th International Conference on Learning Representations</i>, OpenReview.","ama":"Montagna F. On the identifiability of causal graphs with multiple environments. In: <i>The 14th International Conference on Learning Representations</i>. OpenReview.","chicago":"Montagna, Francesco. “On the Identifiability of Causal Graphs with Multiple Environments.” In <i>The 14th International Conference on Learning Representations</i>. OpenReview, n.d."},"month":"02","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","type":"conference","date_created":"2026-01-30T08:16:25Z","_id":"21113","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2510.13583","open_access":"1"}],"department":[{"_id":"FrLo"}],"oa":1,"oa_version":"Published Version","language":[{"iso":"eng"}],"OA_place":"publisher"},{"publication_status":"published","status":"public","publisher":"ML Research Press","article_type":"original","date_updated":"2026-01-05T09:54:59Z","PlanS_conform":"1","day":"18","year":"2025","file":[{"file_name":"2025_PMLR_Montagna.pdf","file_id":"20939","success":1,"checksum":"968c471bb1f682cf823b2d4cadea8a3f","date_created":"2026-01-05T09:51:28Z","relation":"main_file","access_level":"open_access","creator":"dernst","file_size":1030280,"content_type":"application/pdf","date_updated":"2026-01-05T09:51:28Z"}],"ddc":["000"],"date_published":"2025-12-18T00:00:00Z","corr_author":"1","OA_type":"gold","author":[{"first_name":"Francesco","last_name":"Montagna","full_name":"Montagna, Francesco","id":"353afc8e-19f4-11f0-9db9-811f1723c83f"},{"first_name":"Maximilian T","last_name":"Cairney-Leeming","full_name":"Cairney-Leeming, Maximilian T","id":"2214a80c-31f8-11ee-a48d-cf52cc58759b"},{"full_name":"Sridhar, Dhanya","first_name":"Dhanya","last_name":"Sridhar"},{"last_name":"Locatello","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683"}],"tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"has_accepted_license":"1","related_material":{"link":[{"url":"https://github.com/francescomontagna/learning-to-induce.git","relation":"software"}]},"_id":"20934","publication_identifier":{"eissn":["2835-8856"]},"date_created":"2026-01-04T23:01:35Z","citation":{"chicago":"Montagna, Francesco, Maximilian T Cairney-Leeming, Dhanya Sridhar, and Francesco Locatello. “Demystifying Amortized Causal Discovery with Transformers.” <i>Transactions on Machine Learning Research</i>. ML Research Press, 2025.","ama":"Montagna F, Cairney-Leeming MT, Sridhar D, Locatello F. Demystifying amortized causal discovery with transformers. <i>Transactions on Machine Learning Research</i>. 2025.","mla":"Montagna, Francesco, et al. “Demystifying Amortized Causal Discovery with Transformers.” <i>Transactions on Machine Learning Research</i>, ML Research Press, 2025.","ieee":"F. Montagna, M. T. Cairney-Leeming, D. Sridhar, and F. Locatello, “Demystifying amortized causal discovery with transformers,” <i>Transactions on Machine Learning Research</i>. ML Research Press, 2025.","short":"F. Montagna, M.T. Cairney-Leeming, D. Sridhar, F. Locatello, Transactions on Machine Learning Research (2025).","apa":"Montagna, F., Cairney-Leeming, M. T., Sridhar, D., &#38; Locatello, F. (2025). Demystifying amortized causal discovery with transformers. <i>Transactions on Machine Learning Research</i>. ML Research Press.","ista":"Montagna F, Cairney-Leeming MT, Sridhar D, Locatello F. 2025. Demystifying amortized causal discovery with transformers. Transactions on Machine Learning Research."},"scopus_import":"1","type":"journal_article","month":"12","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","language":[{"iso":"eng"}],"quality_controlled":"1","OA_place":"publisher","department":[{"_id":"FrLo"}],"oa":1,"oa_version":"Published Version","file_date_updated":"2026-01-05T09:51:28Z","alternative_title":["TMLR"],"abstract":[{"lang":"eng","text":" Supervised learning for causal discovery from observational data often achieves competitive performance despite seemingly avoiding the explicit assumptions that traditional methods require for identifiability. In this work, we analyze CSIvA (Ke et al., 2023) on bivariate causal models, a transformer architecture for amortized inference promising to train on synthetic data and transfer to real ones. First, we bridge the gap with identifiability theory, showing that the training distribution implicitly defines a prior on the causal model of the test observations: consistent with classical approaches, good performance is achieved when we have a good prior on the test data, and the underlying model is identifiable. Second, we find that CSIvA can not generalize to classes of causal models unseen during training: to overcome this limitation, we theoretically and empirically analyze \\textit{when} training CSIvA on datasets generated by multiple identifiable causal models with different structural assumptions improves its generalization at test time. Overall, we find that amortized causal discovery still adheres to identifiability theory, violating the previous hypothesis from Lopez-Paz et al. (2015) that supervised learning methods could overcome its restrictions."}],"publication":"Transactions on Machine Learning Research","title":"Demystifying amortized causal discovery with transformers","arxiv":1,"article_processing_charge":"No","external_id":{"arxiv":["2405.16924"]}},{"title":"How to probe: Simple yet effective techniques for improving post-hoc explanations","file_date_updated":"2026-02-09T06:06:14Z","abstract":[{"lang":"eng","text":"Post-hoc importance attribution methods are a popular tool for “explaining” Deep Neural Networks (DNNs) and are inherently based on the assumption that the explanations can be applied independently of how the models were trained. Contrarily, in this work we bring forward empirical evidence that challenges this very notion. Surprisingly, we discover a strong dependency on and demonstrate that the training details of a pre-trained model’s classification layer (<10% of model parameters) play a crucial role, much more than the pre-training scheme itself. This is of high practical relevance: (1) as techniques for pre-training models are becoming increasingly diverse, understanding the interplay between these techniques and attribution methods is critical; (2) it sheds light on an important yet overlooked assumption of post-hoc attribution methods which can drastically impact model explanations and how they are interpreted eventually. With this finding we also present simple yet effective adjustments to the classification layers, that can significantly enhance the quality of model explanations. We validate our findings across several visual pre-training frameworks (fully-supervised, self-supervised, contrastive vision-language training) and analyse how they impact explanations for a wide range of attribution methods on a diverse set of evaluation metrics."}],"publication":"13th International Conference on Learning Representations","conference":{"location":"Singapore","name":"ICLR: International Conference on Learning Representations","end_date":"2025-04-28","start_date":"2025-04-24"},"external_id":{"arxiv":["2503.00641"]},"article_processing_charge":"No","arxiv":1,"citation":{"apa":"Gairola, S., Böhle, M., Locatello, F., &#38; Schiele, B. (2025). How to probe: Simple yet effective techniques for improving post-hoc explanations. In <i>13th International Conference on Learning Representations</i>. Singapore: ICLR.","ista":"Gairola S, Böhle M, Locatello F, Schiele B. 2025. How to probe: Simple yet effective techniques for improving post-hoc explanations. 13th International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","ama":"Gairola S, Böhle M, Locatello F, Schiele B. How to probe: Simple yet effective techniques for improving post-hoc explanations. In: <i>13th International Conference on Learning Representations</i>. ICLR; 2025.","chicago":"Gairola, Siddhartha, Moritz Böhle, Francesco Locatello, and Bernt Schiele. “How to Probe: Simple yet Effective Techniques for Improving Post-Hoc Explanations.” In <i>13th International Conference on Learning Representations</i>. ICLR, 2025.","short":"S. Gairola, M. Böhle, F. Locatello, B. Schiele, in:, 13th International Conference on Learning Representations, ICLR, 2025.","mla":"Gairola, Siddhartha, et al. “How to Probe: Simple yet Effective Techniques for Improving Post-Hoc Explanations.” <i>13th International Conference on Learning Representations</i>, ICLR, 2025.","ieee":"S. Gairola, M. Böhle, F. Locatello, and B. Schiele, “How to probe: Simple yet effective techniques for improving post-hoc explanations,” in <i>13th International Conference on Learning Representations</i>, Singapore, 2025."},"type":"conference","month":"01","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"21049","related_material":{"link":[{"url":"https://github.com/sidgairo18/how-to-probe","relation":"software"}]},"date_created":"2026-01-27T12:48:35Z","acknowledgement":"We sincerely thank Sukrut Rao and Yue Fan for their valuable feedback on the paper and insightful discussions throughout the project. Additionally, we appreciate Sukrut’s help\r\nwith some LATEX sorcery. This work was partially supported by ELSA Mobility Program1\r\nas part of the ELLIS2 exchange program to the Institute of Science and Technology Austria (ISTA), where a portion of this research was conducted.","department":[{"_id":"FrLo"}],"oa_version":"Published Version","oa":1,"language":[{"iso":"eng"}],"quality_controlled":"1","OA_place":"publisher","author":[{"last_name":"Gairola","first_name":"Siddhartha","id":"fb21489d-057c-11f1-b1b6-d68cd6ae64f5","full_name":"Gairola, Siddhartha"},{"last_name":"Böhle","first_name":"Moritz","full_name":"Böhle, Moritz"},{"last_name":"Locatello","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683"},{"last_name":"Schiele","first_name":"Bernt","full_name":"Schiele, Bernt"}],"ddc":["000"],"date_published":"2025-01-22T00:00:00Z","OA_type":"gold","corr_author":"1","has_accepted_license":"1","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"publisher":"ICLR","date_updated":"2026-02-09T06:11:17Z","publication_status":"published","status":"public","file":[{"date_created":"2026-02-09T06:06:14Z","checksum":"6c8dfe4291c41d5a2c2fd838105e10b9","relation":"main_file","access_level":"open_access","creator":"dernst","content_type":"application/pdf","date_updated":"2026-02-09T06:06:14Z","file_size":24386863,"file_name":"2025_ICLR_Gairola.pdf","file_id":"21162","success":1}],"day":"22","year":"2025"},{"OA_place":"publisher","language":[{"iso":"eng"}],"quality_controlled":"1","oa":1,"oa_version":"Published Version","department":[{"_id":"FrLo"}],"acknowledgement":"Philipp M. Faller was supported by a doctoral scholarship of the Studienstiftung des deutschen\r\nVolkes (German Academic Scholarship Foundation). This work has been supported by AFOSR,\r\ngrant n. FA8655-20-1-7035. FM is supported by Programma Operativo Nazionale ricerca e innovazione 2014-2020. We thank Atalanti A. Mastakouri, Kun Zhang and Haoyue Dai for the insightful discussions.","main_file_link":[{"url":"https://proceedings.mlr.press/v275/montagna25a.html","open_access":"1"}],"_id":"21066","publication_identifier":{"eissn":["2640-3498"]},"date_created":"2026-01-29T14:19:09Z","type":"conference","month":"05","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","citation":{"short":"F. Montagna, P. Faller, P. Blöbaum, E. Kirschbaum, F. Locatello, in:, Proceedings of the Fourth Conference on Causal Learning and Reasoning, ML Research Press, 2025, pp. 552–605.","mla":"Montagna, Francesco, et al. “Score Matching through the Roof: Linear, Nonlinear, and Latent Variables Causal Discovery.” <i>Proceedings of the Fourth Conference on Causal Learning and Reasoning</i>, vol. 275, ML Research Press, 2025, pp. 552–605.","ieee":"F. Montagna, P. Faller, P. Blöbaum, E. Kirschbaum, and F. Locatello, “Score matching through the roof: Linear, nonlinear, and latent variables causal discovery,” in <i>Proceedings of the Fourth Conference on Causal Learning and Reasoning</i>, Lausanne, Switzerland, 2025, vol. 275, pp. 552–605.","ama":"Montagna F, Faller P, Blöbaum P, Kirschbaum E, Locatello F. Score matching through the roof: Linear, nonlinear, and latent variables causal discovery. In: <i>Proceedings of the Fourth Conference on Causal Learning and Reasoning</i>. Vol 275. ML Research Press; 2025:552-605.","chicago":"Montagna, Francesco, Philipp Faller, Patrik Blöbaum, Elke Kirschbaum, and Francesco Locatello. “Score Matching through the Roof: Linear, Nonlinear, and Latent Variables Causal Discovery.” In <i>Proceedings of the Fourth Conference on Causal Learning and Reasoning</i>, 275:552–605. ML Research Press, 2025.","ista":"Montagna F, Faller P, Blöbaum P, Kirschbaum E, Locatello F. 2025. Score matching through the roof: Linear, nonlinear, and latent variables causal discovery. Proceedings of the Fourth Conference on Causal Learning and Reasoning. CLeaR: Conference on Causal Learning and Reasoning, PMLR, vol. 275, 552–605.","apa":"Montagna, F., Faller, P., Blöbaum, P., Kirschbaum, E., &#38; Locatello, F. (2025). Score matching through the roof: Linear, nonlinear, and latent variables causal discovery. In <i>Proceedings of the Fourth Conference on Causal Learning and Reasoning</i> (Vol. 275, pp. 552–605). Lausanne, Switzerland: ML Research Press."},"arxiv":1,"conference":{"name":"CLeaR: Conference on Causal Learning and Reasoning","location":"Lausanne, Switzerland","start_date":"2025-05-07","end_date":"2025-05-09"},"external_id":{"arxiv":["2407.18755"]},"page":"552-605","article_processing_charge":"No","alternative_title":["PMLR"],"volume":275,"abstract":[{"text":"Causal discovery from observational data holds great promise, but existing methods rely on strong assumptions about the underlying causal structure, often requiring full observability of all relevant variables. We tackle these challenges by leveraging the score function ∇logp(X)\r\n of observed variables for causal discovery and propose the following contributions. First, we generalize the existing results of identifiability with the score to additive noise models with minimal requirements on the causal mechanisms. Second, we establish conditions for inferring causal relations from the score even in the presence of hidden variables; this result is two-faced: we demonstrate the score’s potential as an alternative to conditional independence tests to infer the equivalence class of causal graphs with hidden variables, and we provide the necessary conditions for identifying direct causes in latent variable models. Building on these insights, we propose a flexible algorithm for causal discovery across linear, nonlinear, and latent variable models, which we empirically validate.","lang":"eng"}],"publication":"Proceedings of the Fourth Conference on Causal Learning and Reasoning","file_date_updated":"2026-01-29T14:17:48Z","title":"Score matching through the roof: Linear, nonlinear, and latent variables causal discovery","year":"2025","day":"01","file":[{"access_level":"open_access","date_created":"2026-01-29T14:17:48Z","checksum":"f2bc44b2320667d4049b3518b1f2fe5d","relation":"main_file","file_size":1739334,"content_type":"application/pdf","date_updated":"2026-01-29T14:17:48Z","creator":"flocatel","file_id":"21067","file_name":"montagna25a.pdf","success":1}],"publication_status":"published","status":"public","date_updated":"2026-02-10T11:54:02Z","publisher":"ML Research Press","intvolume":"       275","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"has_accepted_license":"1","corr_author":"1","OA_type":"gold","ddc":["000"],"date_published":"2025-05-01T00:00:00Z","author":[{"first_name":"Francesco","last_name":"Montagna","full_name":"Montagna, Francesco"},{"first_name":"Philipp","last_name":"Faller","full_name":"Faller, Philipp"},{"full_name":"Blöbaum, Patrik","first_name":"Patrik","last_name":"Blöbaum"},{"last_name":"Kirschbaum","first_name":"Elke","full_name":"Kirschbaum, Elke"},{"last_name":"Locatello","first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco"}]},{"alternative_title":["Advances in Neural Information Processing Systems"],"volume":38,"abstract":[{"lang":"eng","text":"Causal reasoning and discovery, two fundamental tasks of causal analysis,\r\noften face challenges in applications due to the complexity, noisiness, and highdimensionality of real-world data. Despite recent progress in identifying latent\r\ncausal structures using causal representation learning (CRL), what makes learned\r\nrepresentations useful for causal downstream tasks and how to evaluate them are\r\nstill not well understood. In this paper, we reinterpret CRL using a measurement\r\nmodel framework, where the learned representations are viewed as proxy measurements of the latent causal variables. Our approach clarifies the conditions under\r\nwhich learned representations support downstream causal reasoning and provides\r\na principled basis for quantitatively assessing the quality of representations using\r\na new Test-based Measurement EXclusivity (T-MEX) score. We validate T-MEX\r\nacross diverse causal inference scenarios, including numerical simulations and\r\nreal-world ecological video analysis, demonstrating that the proposed framework\r\nand corresponding score effectively assess the identification of learned representations and their usefulness for causal downstream tasks. Reproducible code can\r\nbe found at https://github.com/shimenghuang/a-measurement-perspective-of-crl."}],"publication":"39th Annual Conference on Neural Information Processing Systems","title":"The third pillar of causal analysis? A measurement perspective on causal representations","arxiv":1,"article_processing_charge":"No","external_id":{"arxiv":["2505.17708"]},"conference":{"start_date":"2025-12-02","end_date":"2025-12-07","name":"NeurIPS: Neural Information Processing Systems","location":"San Diego, CA, United States"},"_id":"21068","publication_identifier":{"issn":["1049-5258"]},"related_material":{"link":[{"url":"https://github.com/shimenghuang/a-measurement-perspective-of-crl","relation":"software"}]},"date_created":"2026-01-29T14:24:56Z","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2505.17708"}],"acknowledgement":"This research was funded in whole or in part by the Austrian Science Fund (FWF) 10.55776/COE12. For open access purposes, the author has applied a CC BY public copyright license to any accepted manuscript version arising from this submission.\r\n","citation":{"ama":"Yao D, Huang S, Cadei R, Zhang K, Locatello F. The third pillar of causal analysis? A measurement perspective on causal representations. In: <i>39th Annual Conference on Neural Information Processing Systems</i>. Vol 38. Neural Information Processing Systems Foundation; 2025.","chicago":"Yao, Dingling, Shimeng Huang, Riccardo Cadei, Kun Zhang, and Francesco Locatello. “The Third Pillar of Causal Analysis? A Measurement Perspective on Causal Representations.” In <i>39th Annual Conference on Neural Information Processing Systems</i>, Vol. 38. Neural Information Processing Systems Foundation, 2025.","short":"D. Yao, S. Huang, R. Cadei, K. Zhang, F. Locatello, in:, 39th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2025.","ieee":"D. Yao, S. Huang, R. Cadei, K. Zhang, and F. Locatello, “The third pillar of causal analysis? A measurement perspective on causal representations,” in <i>39th Annual Conference on Neural Information Processing Systems</i>, San Diego, CA, United States, 2025, vol. 38.","mla":"Yao, Dingling, et al. “The Third Pillar of Causal Analysis? A Measurement Perspective on Causal Representations.” <i>39th Annual Conference on Neural Information Processing Systems</i>, vol. 38, Neural Information Processing Systems Foundation, 2025.","apa":"Yao, D., Huang, S., Cadei, R., Zhang, K., &#38; Locatello, F. (2025). The third pillar of causal analysis? A measurement perspective on causal representations. In <i>39th Annual Conference on Neural Information Processing Systems</i> (Vol. 38). San Diego, CA, United States: Neural Information Processing Systems Foundation.","ista":"Yao D, Huang S, Cadei R, Zhang K, Locatello F. 2025. The third pillar of causal analysis? A measurement perspective on causal representations. 39th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 38."},"type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"12","quality_controlled":"1","language":[{"iso":"eng"}],"OA_place":"repository","department":[{"_id":"FrLo"}],"oa_version":"Preprint","oa":1,"ddc":["000"],"date_published":"2025-12-15T00:00:00Z","OA_type":"green","corr_author":"1","author":[{"full_name":"Yao, Dingling","id":"d3e02e50-48a8-11ee-8f62-c108061797fa","first_name":"Dingling","last_name":"Yao"},{"first_name":"Shimeng","last_name":"Huang","orcid":"0000-0001-6919-821X","full_name":"Huang, Shimeng","id":"989c2a06-fb4e-11ef-a992-ab766442255b"},{"first_name":"Riccardo","last_name":"Cadei","full_name":"Cadei, Riccardo","id":"0fa8b76f-72f0-11ef-b75a-a5da96e5ad6b"},{"last_name":"Zhang","first_name":"Kun","full_name":"Zhang, Kun"},{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","last_name":"Locatello","first_name":"Francesco"}],"tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"intvolume":"        38","has_accepted_license":"1","publication_status":"epub_ahead","status":"public","publisher":"Neural Information Processing Systems Foundation","date_updated":"2026-02-10T12:08:52Z","day":"15","year":"2025"},{"publication":"39th Annual Conference on Neural Information Processing Systems","abstract":[{"lang":"eng","text":"Deep learning systems deployed in real-world applications often encounter data that is different from their in-distribution (ID). A reliable model should ideally abstain from making decisions in this out-of-distribution (OOD) setting. Existing state-of-the-art methods primarily focus on feature distances, such as k-th nearest neighbors and distances to decision boundaries, either overlooking or ineffectively using in-distribution statistics. In this work, we propose a novel angle-based metric for OOD detection that is computed relative to the in-distribution structure. We demonstrate that the angles between feature representations and decision boundaries, viewed from the mean of in-distribution features, serve as an effective discriminative factor between ID and OOD data. We evaluate our method on nine ImageNet-pretrained models. Our approach achieves the lowest FPR in 5 out of 9 ImageNet models, obtains the best average FPR overall, and consistently ranking among the top 3 across all evaluated models. Furthermore, we highlight the benefits of contrastive representations by showing strong performance with ResNet SCL and CLIP architectures. Finally, we demonstrate that the scale-invariant nature of our score enables an ensemble strategy via simple score summation. "}],"volume":38,"alternative_title":["Advances in Neural Information Processing Systems"],"title":"Out-of-Distribution detection with relative angles","arxiv":1,"article_processing_charge":"No","external_id":{"arxiv":["2410.04525"]},"conference":{"location":"San Diego, CA, United States","name":"NeurIPS: Neural Information Processing Systems","end_date":"2025-12-07","start_date":"2025-12-02"},"date_created":"2026-01-29T14:26:47Z","_id":"21070","publication_identifier":{"issn":["1049-5258"]},"related_material":{"link":[{"relation":"software","url":"https://github.com/berkerdemirel/ORA-OOD-Detection-with-Relative-Angles"}]},"main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2410.04525","open_access":"1"}],"acknowledgement":"This research was funded in whole or in part by the Austrian Science Fund (FWF) 10.55776/COE12. For open access purposes, the author has applied a CC BY public copyright license to any accepted manuscript version arising from this submission.\r\n","citation":{"ista":"Demirel B, Fumero M, Locatello F. 2025. Out-of-Distribution detection with relative angles. 39th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 38.","apa":"Demirel, B., Fumero, M., &#38; Locatello, F. (2025). Out-of-Distribution detection with relative angles. In <i>39th Annual Conference on Neural Information Processing Systems</i> (Vol. 38). San Diego, CA, United States: Neural Information Processing Systems Foundation.","short":"B. Demirel, M. Fumero, F. Locatello, in:, 39th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2025.","mla":"Demirel, Berker, et al. “Out-of-Distribution Detection with Relative Angles.” <i>39th Annual Conference on Neural Information Processing Systems</i>, vol. 38, Neural Information Processing Systems Foundation, 2025.","ieee":"B. Demirel, M. Fumero, and F. Locatello, “Out-of-Distribution detection with relative angles,” in <i>39th Annual Conference on Neural Information Processing Systems</i>, San Diego, CA, United States, 2025, vol. 38.","ama":"Demirel B, Fumero M, Locatello F. Out-of-Distribution detection with relative angles. In: <i>39th Annual Conference on Neural Information Processing Systems</i>. Vol 38. Neural Information Processing Systems Foundation; 2025.","chicago":"Demirel, Berker, Marco  Fumero, and Francesco Locatello. “Out-of-Distribution Detection with Relative Angles.” In <i>39th Annual Conference on Neural Information Processing Systems</i>, Vol. 38. Neural Information Processing Systems Foundation, 2025."},"month":"12","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","type":"conference","language":[{"iso":"eng"}],"quality_controlled":"1","OA_place":"repository","department":[{"_id":"FrLo"}],"oa_version":"Preprint","oa":1,"ddc":["000"],"date_published":"2025-12-01T00:00:00Z","OA_type":"green","corr_author":"1","author":[{"full_name":"Demirel, Berker","id":"8b4bc47f-3200-11ee-973b-8f0e7be21a9f","first_name":"Berker","last_name":"Demirel"},{"full_name":"Fumero, Marco ","first_name":"Marco ","last_name":"Fumero"},{"first_name":"Francesco","last_name":"Locatello","orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4"}],"tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"intvolume":"        38","has_accepted_license":"1","status":"public","publication_status":"epub_ahead","publisher":"Neural Information Processing Systems Foundation","date_updated":"2026-02-16T11:38:25Z","day":"01","year":"2025"},{"oa":1,"oa_version":"Preprint","department":[{"_id":"FrLo"}],"OA_place":"publisher","language":[{"iso":"eng"}],"quality_controlled":"1","type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"12","citation":{"ista":"Basile L, Maiorca V, Doimo D, Locatello F, Cazzaniga A. 2025. Head pursuit: Probing attention specialization in multimodal transformers. 39th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 38.","apa":"Basile, L., Maiorca, V., Doimo, D., Locatello, F., &#38; Cazzaniga, A. (2025). Head pursuit: Probing attention specialization in multimodal transformers. In <i>39th Annual Conference on Neural Information Processing Systems</i> (Vol. 38). San Diego, CA, United States: Neural Information Processing Systems Foundation.","mla":"Basile, Lorenzo, et al. “Head Pursuit: Probing Attention Specialization in Multimodal Transformers.” <i>39th Annual Conference on Neural Information Processing Systems</i>, vol. 38, Neural Information Processing Systems Foundation, 2025.","ieee":"L. Basile, V. Maiorca, D. Doimo, F. Locatello, and A. Cazzaniga, “Head pursuit: Probing attention specialization in multimodal transformers,” in <i>39th Annual Conference on Neural Information Processing Systems</i>, San Diego, CA, United States, 2025, vol. 38.","short":"L. Basile, V. Maiorca, D. Doimo, F. Locatello, A. Cazzaniga, in:, 39th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2025.","chicago":"Basile, Lorenzo, Valentino Maiorca, Diego Doimo, Francesco Locatello, and Alberto Cazzaniga. “Head Pursuit: Probing Attention Specialization in Multimodal Transformers.” In <i>39th Annual Conference on Neural Information Processing Systems</i>, Vol. 38. Neural Information Processing Systems Foundation, 2025.","ama":"Basile L, Maiorca V, Doimo D, Locatello F, Cazzaniga A. Head pursuit: Probing attention specialization in multimodal transformers. In: <i>39th Annual Conference on Neural Information Processing Systems</i>. Vol 38. Neural Information Processing Systems Foundation; 2025."},"acknowledgement":"The authors acknowledge the Area Science Park supercomputing platform ORFEO made available for conducting the research reported in this paper, and the technical support of the Laboratory of Data Engineering staff. LB, DD and AC were supported by the project “Supporto alla diagnosi di malattie rare tramite l’intelligenza artificiale\" CUP: F53C22001770002 and “Valutazione automatica delle immagini diagnostiche tramite l’intelligenza artificiale\", CUP: F53C22001780002. LB was supported by the European Union – NextGenerationEU within the project PNRR “Finanziamento di progetti presentati da giovani ricercatori\" - Mission 4 Component 2 Investment 1.2, CUP: J93C25000440001. AC was supported by the European Union – NextGenerationEU within the project PNRR “PRP@CERIC\" IR0000028 - Mission 4 Component 2 Investment 3.1 Action 3.1.1. ","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2510.21518","open_access":"1"}],"_id":"21072","publication_identifier":{"issn":["1049-5258"]},"date_created":"2026-01-29T14:29:23Z","conference":{"name":"NeurIPS: Neural Information Processing Systems","location":"San Diego, CA, United States","start_date":"2025-12-02","end_date":"2025-12-07"},"article_processing_charge":"No","external_id":{"arxiv":["2510.21518"]},"arxiv":1,"title":"Head pursuit: Probing attention specialization in multimodal transformers","volume":38,"publication":"39th Annual Conference on Neural Information Processing Systems","abstract":[{"text":"Language and vision-language models have shown impressive performance across a wide range of tasks, but their internal mechanisms remain only partly understood. In this work, we study how individual attention heads in text-generative models specialize in specific semantic or visual attributes. Building on an established interpretability method, we reinterpret the practice of probing intermediate activations with the final decoding layer through the lens of signal processing. This lets us analyze multiple samples in a principled way and rank attention heads based on their relevance to target concepts. Our results show consistent patterns of specialization at the head level across both unimodal and multimodal transformers. Remarkably, we find that editing as few as 1% of the heads, selected using our method, can reliably suppress or enhance targeted concepts in the model output. We validate our approach on language tasks such as question answering and toxicity mitigation, as well as vision-language tasks including image classification and captioning. Our findings highlight an interpretable and controllable structure within attention layers, offering simple tools for understanding and editing large-scale generative models.","lang":"eng"}],"file_date_updated":"2026-01-29T14:29:14Z","file":[{"date_created":"2026-01-29T14:29:14Z","relation":"main_file","checksum":"85be3f98663e2595cf37001852b477cb","access_level":"open_access","creator":"flocatel","file_size":4271547,"date_updated":"2026-01-29T14:29:14Z","content_type":"application/pdf","file_name":"2510.21518v2.pdf","file_id":"21073","success":1}],"year":"2025","day":"15","date_updated":"2026-02-11T08:55:36Z","publisher":"Neural Information Processing Systems Foundation","publication_status":"epub_ahead","status":"public","has_accepted_license":"1","intvolume":"        38","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"author":[{"full_name":"Basile, Lorenzo","first_name":"Lorenzo","last_name":"Basile"},{"full_name":"Maiorca, Valentino","last_name":"Maiorca","first_name":"Valentino"},{"full_name":"Doimo, Diego","first_name":"Diego","last_name":"Doimo"},{"last_name":"Locatello","first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco"},{"last_name":"Cazzaniga","first_name":"Alberto","full_name":"Cazzaniga, Alberto"}],"OA_type":"gold","date_published":"2025-12-15T00:00:00Z","ddc":["000"]},{"oa_version":"Published Version","oa":1,"department":[{"_id":"FrLo"}],"OA_place":"publisher","quality_controlled":"1","language":[{"iso":"eng"}],"type":"conference","month":"12","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","citation":{"apa":"Yu, H., Inal, B., Arvanitidis, G., Hauberg, S., Locatello, F., &#38; Fumero, M. (2025). Connecting neural models latent geometries with relative geodesic representations. In <i>39th Annual Conference on Neural Information Processing Systems</i> (Vol. 38). San Diego, CA, United States: Neural Information Processing Systems Foundation.","ista":"Yu H, Inal B, Arvanitidis G, Hauberg S, Locatello F, Fumero M. 2025. Connecting neural models latent geometries with relative geodesic representations. 39th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 38.","chicago":"Yu, Hanlin, Befrin Inal, Georgios Arvanitidis, Soren Hauberg, Francesco Locatello, and Marco Fumero. “Connecting Neural Models Latent Geometries with Relative Geodesic Representations.” In <i>39th Annual Conference on Neural Information Processing Systems</i>, Vol. 38. Neural Information Processing Systems Foundation, 2025.","ama":"Yu H, Inal B, Arvanitidis G, Hauberg S, Locatello F, Fumero M. Connecting neural models latent geometries with relative geodesic representations. In: <i>39th Annual Conference on Neural Information Processing Systems</i>. Vol 38. Neural Information Processing Systems Foundation; 2025.","ieee":"H. Yu, B. Inal, G. Arvanitidis, S. Hauberg, F. Locatello, and M. Fumero, “Connecting neural models latent geometries with relative geodesic representations,” in <i>39th Annual Conference on Neural Information Processing Systems</i>, San Diego, CA, United States, 2025, vol. 38.","mla":"Yu, Hanlin, et al. “Connecting Neural Models Latent Geometries with Relative Geodesic Representations.” <i>39th Annual Conference on Neural Information Processing Systems</i>, vol. 38, Neural Information Processing Systems Foundation, 2025.","short":"H. Yu, B. Inal, G. Arvanitidis, S. Hauberg, F. Locatello, M. Fumero, in:, 39th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2025."},"acknowledgement":"We thank Gregor Krzmanc, German Magai, Vital Fernandez for insightful discussions in the early stages of the project. HY was supported by the Research Council of Finland Flagship programme: Finnish Center for Artificial Intelligence FCAI. HY wishes to acknowledge CSC - IT Center for Science, Finland, for computational resources. GA was supported by the DFF Sapere Aude Starting Grant “GADL”. SH was supported by a research grant (42062) from VILLUM FONDEN and partly funded by the Novo Nordisk Foundation through the Center for Basic Research in Life Science (NNF20OC0062606). SH received funding from the European Research Council (ERC) under the European Union’s Horizon Programme (grant agreement 101125003). MF is supported by the MSCA IST-Bridge fellowship which has received funding from the European Union’s Horizon 2020 research and innovation program under the Marie Skłodowska-Curie grant agreement No 101034413.","_id":"21074","publication_identifier":{"issn":["1049-5258"]},"date_created":"2026-01-29T14:31:52Z","conference":{"start_date":"2025-12-02","end_date":"2025-12-07","name":"NeurIPS: Neural Information Processing Systems","location":"San Diego, CA, United States"},"external_id":{"arxiv":["2506.01599"]},"article_processing_charge":"No","project":[{"call_identifier":"H2020","name":"IST-BRIDGE: International postdoctoral program","grant_number":"101034413","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c"}],"arxiv":1,"title":"Connecting neural models latent geometries with relative geodesic representations","ec_funded":1,"alternative_title":["Advances in Neural Information Processing Systems"],"volume":38,"publication":"39th Annual Conference on Neural Information Processing Systems","abstract":[{"lang":"eng","text":"Neural models learn representations of high-dimensional data on low-dimensional manifolds. Multiple factors, including stochasticities in the training process, model architectures, and additional inductive biases, may induce different representations, even when learning the same task on the same data. However, it has recently been shown that when a latent structure is shared between distinct latent spaces, relative distances between representations can be preserved, up to distortions. Building on this idea, we demonstrate that exploiting the differential-geometric structure of latent spaces of neural models, it is possible to capture precisely the transformations between representational spaces trained on similar data distributions. Specifically, we assume that distinct neural models parametrize approximately the same underlying manifold, and introduce a representation based on the pullback metric that captures the intrinsic structure of the latent space, while scaling efficiently to large models. We validate experimentally our method on model stitching and retrieval tasks, covering autoencoders and vision foundation discriminative models, across diverse architectures, datasets, pretraining schemes and modalities. Code is available at the following link."}],"file_date_updated":"2026-01-29T14:31:42Z","file":[{"file_name":"2506.01599v2.pdf","file_id":"21075","success":1,"relation":"main_file","date_created":"2026-01-29T14:31:42Z","checksum":"b1a645418025f46394764cd16d0cb089","access_level":"open_access","creator":"flocatel","file_size":7749349,"date_updated":"2026-01-29T14:31:42Z","content_type":"application/pdf"}],"year":"2025","day":"15","date_updated":"2026-02-11T09:03:37Z","publisher":"Neural Information Processing Systems Foundation","publication_status":"epub_ahead","status":"public","has_accepted_license":"1","intvolume":"        38","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"author":[{"full_name":"Yu, Hanlin","first_name":"Hanlin","last_name":"Yu"},{"first_name":"Befrin","last_name":"Inal","full_name":"Inal, Befrin"},{"full_name":"Arvanitidis, Georgios","first_name":"Georgios","last_name":"Arvanitidis"},{"full_name":"Hauberg, Soren","first_name":"Soren","last_name":"Hauberg"},{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","last_name":"Locatello","first_name":"Francesco"},{"full_name":"Fumero, Marco","id":"1c1593eb-393f-11ef-bb8e-ab4f1e979650","first_name":"Marco","last_name":"Fumero"}],"OA_type":"gold","corr_author":"1","ddc":["000"],"date_published":"2025-12-15T00:00:00Z"},{"has_accepted_license":"1","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"intvolume":"        38","author":[{"first_name":"Riccardo","last_name":"Cadei","full_name":"Cadei, Riccardo","id":"0fa8b76f-72f0-11ef-b75a-a5da96e5ad6b"},{"full_name":"Demirel, Ilker","first_name":"Ilker","last_name":"Demirel"},{"full_name":"De Bartolomeis, Piersilvio","first_name":"Piersilvio","last_name":"De Bartolomeis"},{"last_name":"Lindorfer","first_name":"Lukas","id":"85f0e6d3-06b3-11ec-8982-8c5049fa4455","full_name":"Lindorfer, Lukas"},{"first_name":"Sylvia","last_name":"Cremer","full_name":"Cremer, Sylvia","id":"2F64EC8C-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-2193-3868"},{"full_name":"Schmid, Cordelia","last_name":"Schmid","first_name":"Cordelia"},{"orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","last_name":"Locatello"}],"ddc":["000"],"date_published":"2025-12-15T00:00:00Z","OA_type":"gold","file":[{"access_level":"open_access","date_created":"2026-01-29T14:35:02Z","relation":"main_file","checksum":"92467fa566cd36671a6a3b9e71ae0f71","content_type":"application/pdf","date_updated":"2026-01-29T14:35:02Z","file_size":8489023,"creator":"flocatel","file_id":"21077","file_name":"17546_Prediction_Powered_Causa.pdf","success":1}],"day":"15","year":"2025","publisher":"Neural Information Processing Systems Foundation","date_updated":"2026-02-16T11:39:33Z","publication_status":"epub_ahead","status":"public","conference":{"name":"NeurIPS: Neural Information Processing Systems","location":"San Diego, CA, United States","end_date":"2025-12-07","start_date":"2025-12-02"},"article_processing_charge":"No","title":"Prediction-powered causal inferences","file_date_updated":"2026-01-29T14:35:02Z","alternative_title":["Advances in Neural Information Processing Systems"],"volume":38,"abstract":[{"lang":"eng","text":"In many scientific experiments, the data annotating cost constraints the pace for testing novel hypotheses. Yet, modern machine learning pipelines offer a promising solution—provided their predictions yield correct conclusions. We focus on Prediction-Powered Causal Inferences (PPCI), i.e., estimating the treatment effect in an unlabeled target experiment, relying on training data with the same outcome annotated but potentially different treatment or effect modifiers. We first show that conditional calibration guarantees valid PPCI at population level. Then, we introduce a sufficient representation constraint transferring validity across experiments, which we propose to enforce in practice in Deconfounded Empirical Risk Minimization, our new model-agnostic training objective. We validate our method on synthetic and real-world scientific data, solving impossible problem instances for Empirical Risk Minimization even with standard invariance constraints. In particular, for the first time, we achieve valid causal inference on a scientific experiment with complex recording and no human annotations, fine-tuning a foundational model on our similar annotated experiment."}],"publication":"39th Annual Conference on Neural Information Processing Systems","department":[{"_id":"FrLo"},{"_id":"SyCr"}],"oa":1,"oa_version":"Published Version","quality_controlled":"1","language":[{"iso":"eng"}],"OA_place":"publisher","citation":{"ista":"Cadei R, Demirel I, De Bartolomeis P, Lindorfer L, Cremer S, Schmid C, Locatello F. 2025. Prediction-powered causal inferences. 39th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 38.","apa":"Cadei, R., Demirel, I., De Bartolomeis, P., Lindorfer, L., Cremer, S., Schmid, C., &#38; Locatello, F. (2025). Prediction-powered causal inferences. In <i>39th Annual Conference on Neural Information Processing Systems</i> (Vol. 38). San Diego, CA, United States: Neural Information Processing Systems Foundation.","short":"R. Cadei, I. Demirel, P. De Bartolomeis, L. Lindorfer, S. Cremer, C. Schmid, F. Locatello, in:, 39th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2025.","mla":"Cadei, Riccardo, et al. “Prediction-Powered Causal Inferences.” <i>39th Annual Conference on Neural Information Processing Systems</i>, vol. 38, Neural Information Processing Systems Foundation, 2025.","ieee":"R. Cadei <i>et al.</i>, “Prediction-powered causal inferences,” in <i>39th Annual Conference on Neural Information Processing Systems</i>, San Diego, CA, United States, 2025, vol. 38.","ama":"Cadei R, Demirel I, De Bartolomeis P, et al. Prediction-powered causal inferences. In: <i>39th Annual Conference on Neural Information Processing Systems</i>. Vol 38. Neural Information Processing Systems Foundation; 2025.","chicago":"Cadei, Riccardo, Ilker Demirel, Piersilvio De Bartolomeis, Lukas Lindorfer, Sylvia Cremer, Cordelia Schmid, and Francesco Locatello. “Prediction-Powered Causal Inferences.” In <i>39th Annual Conference on Neural Information Processing Systems</i>, Vol. 38. Neural Information Processing Systems Foundation, 2025."},"type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"12","publication_identifier":{"issn":["1049-5258"]},"_id":"21076","date_created":"2026-01-29T14:35:11Z","acknowledgement":"We thank the Causal Learning and Artificial Intelligence group at ISTA for the continuous feedback on the project and valuable discussions. We thank the Social Immunity group at ISTA, particularly Jinook Oh, for the annotation program and Michaela Hoenigsberger for supporting our ecological experiment. Riccardo Cadei is supported by a Google Research Scholar Award and a Google Initiated Gift to Francesco Locatello. This research was funded in part by the Austrian Science Fund (FWF) 10.55776/COE12). It was further partially supported by the ISTA Interdisciplinary Project Committee for the collaborative project “ALED” between Francesco Locatello and Sylvia Cremer. For open access purposes, the author has applied a CC BY public copyright license to any author accepted manuscript version arising from this submission."},{"tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"has_accepted_license":"1","date_published":"2025-01-22T00:00:00Z","ddc":["000"],"corr_author":"1","OA_type":"gold","author":[{"id":"d3e02e50-48a8-11ee-8f62-c108061797fa","full_name":"Yao, Dingling","last_name":"Yao","first_name":"Dingling"},{"id":"feb58f2e-72ef-11ef-b75a-8f0894539cd0","full_name":"Rancati, Dario","last_name":"Rancati","first_name":"Dario"},{"id":"0fa8b76f-72f0-11ef-b75a-a5da96e5ad6b","full_name":"Cadei, Riccardo","last_name":"Cadei","first_name":"Riccardo"},{"id":"1c1593eb-393f-11ef-bb8e-ab4f1e979650","full_name":"Fumero, Marco","last_name":"Fumero","first_name":"Marco"},{"last_name":"Locatello","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683"}],"day":"22","year":"2025","file":[{"creator":"flocatel","date_updated":"2026-01-27T12:43:25Z","content_type":"application/pdf","file_size":877014,"relation":"main_file","checksum":"c4b5a4a644228c6d1b0283e1368bce9e","date_created":"2026-01-27T12:43:25Z","access_level":"open_access","success":1,"file_name":"4356_Unifying_Causal_Represent (1).pdf","file_id":"21048"}],"publication_status":"published","status":"public","publisher":"ICLR","date_updated":"2026-02-09T05:52:14Z","arxiv":1,"conference":{"location":"Singapore","name":"ICLR: International Conference on Learning Representations","end_date":"2025-04-28","start_date":"2025-04-24"},"external_id":{"arxiv":["2409.02772"]},"article_processing_charge":"No","file_date_updated":"2026-01-27T12:43:25Z","abstract":[{"lang":"eng","text":"Causal representation learning aims at recovering latent causal variables from high-dimensional observations to solve causal downstream tasks, such as predicting the effect of new interventions or more robust classification. A plethora of methods have been developed, each tackling carefully crafted problem settings that lead to different types of identifiability. The folklore is that these different settings are important, as they are often linked to different rungs of Pearl's causal hierarchy, although not all neatly fit. Our main contribution is to show that many existing causal representation learning approaches methodologically align the representation to known data symmetries. Identification of the variables is guided by equivalence classes across different \"data pockets\" that are not necessarily causal. This result suggests important implications, allowing us to unify many existing approaches in a single method that can mix and match different assumptions, including non-causal ones, based on the invariances relevant to our application. It also significantly benefits applicability, which we demonstrate by improving treatment effect estimation on real-world high-dimensional ecological data. Overall, this paper clarifies the role of causality assumptions in the discovery of causal variables and shifts the focus to preserving data symmetries."}],"publication":"13th International Conference on Learning Representations","title":"Unifying causal representation learning with the invariance principle","quality_controlled":"1","language":[{"iso":"eng"}],"OA_place":"publisher","department":[{"_id":"FrLo"}],"oa_version":"Published Version","oa":1,"_id":"19010","date_created":"2025-02-05T09:23:25Z","acknowledgement":"We thank Jiaqi Zhang, Francesco Montagna, David Lopez-Paz, Kartik Ahuja, Thomas Kipf, Sara\r\nMagliacane, Julius von Kügelgen, Kun Zhang, and Bernhard Schölkopf for extremely helpful discussion. Riccardo Cadei was supported by a Google Research Scholar Award to Francesco Locatello. We acknowledge the Third Bellairs Workshop on Causal Representation Learning held at the Bellairs Research Institute, February 9/16, 2024, and a debate on the difference between interventions and counterfactuals in disentanglement and CRL that took place during Dhanya Sridhar’s lecture, which motivated us to significantly broaden the scope of the paper. We thank Dhanya and all participants of the workshop.","citation":{"chicago":"Yao, Dingling, Dario Rancati, Riccardo Cadei, Marco Fumero, and Francesco Locatello. “Unifying Causal Representation Learning with the Invariance Principle.” In <i>13th International Conference on Learning Representations</i>. ICLR, 2025.","ama":"Yao D, Rancati D, Cadei R, Fumero M, Locatello F. Unifying causal representation learning with the invariance principle. In: <i>13th International Conference on Learning Representations</i>. ICLR; 2025.","mla":"Yao, Dingling, et al. “Unifying Causal Representation Learning with the Invariance Principle.” <i>13th International Conference on Learning Representations</i>, ICLR, 2025.","ieee":"D. Yao, D. Rancati, R. Cadei, M. Fumero, and F. Locatello, “Unifying causal representation learning with the invariance principle,” in <i>13th International Conference on Learning Representations</i>, Singapore, 2025.","short":"D. Yao, D. Rancati, R. Cadei, M. Fumero, F. Locatello, in:, 13th International Conference on Learning Representations, ICLR, 2025.","apa":"Yao, D., Rancati, D., Cadei, R., Fumero, M., &#38; Locatello, F. (2025). Unifying causal representation learning with the invariance principle. In <i>13th International Conference on Learning Representations</i>. Singapore: ICLR.","ista":"Yao D, Rancati D, Cadei R, Fumero M, Locatello F. 2025. Unifying causal representation learning with the invariance principle. 13th International Conference on Learning Representations. ICLR: International Conference on Learning Representations."},"scopus_import":"1","type":"conference","month":"01","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87"},{"author":[{"full_name":"Basile, Lorenzo","last_name":"Basile","first_name":"Lorenzo"},{"full_name":"Maiorca, Valentino","last_name":"Maiorca","first_name":"Valentino"},{"last_name":"Bortolussi","first_name":"Luca","full_name":"Bortolussi, Luca"},{"last_name":"Rodolà","first_name":"Emanuele","full_name":"Rodolà, Emanuele"},{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","last_name":"Locatello","first_name":"Francesco"}],"article_number":"2411.00246","title":"ResiDual transformer alignment with spectral decomposition","date_published":"2025-04-14T00:00:00Z","publication":"arXiv","abstract":[{"text":"When examined through the lens of their residual streams, a puzzling property emerges in transformer networks: residual contributions (e.g., attention heads) sometimes specialize in specific tasks or input attributes. In this paper, we analyze this phenomenon in vision transformers, focusing on the spectral geometry of residuals, and explore its implications for modality alignment in vision-language models. First, we link it to the intrinsically low-dimensional structure of visual head representations, zooming into their principal components and showing that they encode specialized roles across a wide variety of input data distributions. Then, we analyze the effect of head specialization in multimodal models, focusing on how improved alignment between text and specialized heads impacts zero-shot classification performance. This specialization-performance link consistently holds across diverse pre-training data, network sizes, and objectives, demonstrating a powerful new mechanism for boosting zero-shot classification through targeted alignment. Ultimately, we translate these insights into actionable terms by introducing ResiDual, a technique for spectral alignment of the residual stream. Much like panning for gold, it lets the noise from irrelevant unit principal components (i.e., attributes) wash away to amplify task-relevant ones. Remarkably, this dual perspective on modality alignment yields fine-tuning level performance on different data distributions while modelling an extremely interpretable and parameter-efficient transformation, as we extensively show on 70 pre-trained network-dataset combinations (7 models, 10 datasets).","lang":"eng"}],"OA_type":"green","article_processing_charge":"No","external_id":{"arxiv":["2411.00246"]},"arxiv":1,"citation":{"chicago":"Basile, Lorenzo, Valentino Maiorca, Luca Bortolussi, Emanuele Rodolà, and Francesco Locatello. “ResiDual Transformer Alignment with Spectral Decomposition.” <i>ArXiv</i>, n.d. <a href=\"https://doi.org/10.48550/arXiv.2411.00246\">https://doi.org/10.48550/arXiv.2411.00246</a>.","ama":"Basile L, Maiorca V, Bortolussi L, Rodolà E, Locatello F. ResiDual transformer alignment with spectral decomposition. <i>arXiv</i>. doi:<a href=\"https://doi.org/10.48550/arXiv.2411.00246\">10.48550/arXiv.2411.00246</a>","ieee":"L. Basile, V. Maiorca, L. Bortolussi, E. Rodolà, and F. Locatello, “ResiDual transformer alignment with spectral decomposition,” <i>arXiv</i>. .","mla":"Basile, Lorenzo, et al. “ResiDual Transformer Alignment with Spectral Decomposition.” <i>ArXiv</i>, 2411.00246, doi:<a href=\"https://doi.org/10.48550/arXiv.2411.00246\">10.48550/arXiv.2411.00246</a>.","short":"L. Basile, V. Maiorca, L. Bortolussi, E. Rodolà, F. Locatello, ArXiv (n.d.).","apa":"Basile, L., Maiorca, V., Bortolussi, L., Rodolà, E., &#38; Locatello, F. (n.d.). ResiDual transformer alignment with spectral decomposition. <i>arXiv</i>. <a href=\"https://doi.org/10.48550/arXiv.2411.00246\">https://doi.org/10.48550/arXiv.2411.00246</a>","ista":"Basile L, Maiorca V, Bortolussi L, Rodolà E, Locatello F. ResiDual transformer alignment with spectral decomposition. arXiv, 2411.00246."},"month":"04","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","date_updated":"2025-05-19T07:03:16Z","type":"preprint","doi":"10.48550/arXiv.2411.00246","date_created":"2025-05-11T22:02:41Z","status":"public","publication_status":"submitted","_id":"19674","acknowledgement":"The authors gratefully acknowledge Volkan Cevher for an insightful discussion about sparse recovery algorithms, Alex Smola for valuable feedback on the experiments, and Marco Baroni for an engaging conversation on the phenomenon of head specialization in NLP.\r\n","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2411.00246","open_access":"1"}],"department":[{"_id":"FrLo"}],"oa":1,"oa_version":"Preprint","day":"14","language":[{"iso":"eng"}],"year":"2025","OA_place":"repository"},{"file":[{"success":1,"file_id":"20065","file_name":"2025_ICLR_Chen.pdf","file_size":732745,"content_type":"application/pdf","date_updated":"2025-07-22T07:58:22Z","creator":"dernst","access_level":"open_access","checksum":"64cfdb12ae3e4e8ba57b1403e1066776","relation":"main_file","date_created":"2025-07-22T07:58:22Z"}],"year":"2025","day":"01","date_updated":"2025-08-04T08:03:11Z","publisher":"ICLR","publication_status":"published","status":"public","has_accepted_license":"1","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"author":[{"orcid":"0000-0001-5337-5875","full_name":"Chen, Jiale","id":"4d0a9064-1ff6-11ee-9fa6-ec046c604785","first_name":"Jiale","last_name":"Chen"},{"id":"d3e02e50-48a8-11ee-8f62-c108061797fa","full_name":"Yao, Dingling","last_name":"Yao","first_name":"Dingling"},{"full_name":"Pervez, Adeel A","id":"fca6d90c-d47f-11ee-bc87-93ff51604981","first_name":"Adeel A","last_name":"Pervez"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","last_name":"Alistarh","first_name":"Dan-Adrian"},{"id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","last_name":"Locatello","first_name":"Francesco"}],"corr_author":"1","OA_type":"diamond","ddc":["000"],"date_published":"2025-04-01T00:00:00Z","oa_version":"Published Version","oa":1,"department":[{"_id":"DaAl"},{"_id":"FrLo"}],"OA_place":"publisher","quality_controlled":"1","language":[{"iso":"eng"}],"type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"04","citation":{"ista":"Chen J, Yao D, Pervez AA, Alistarh D-A, Locatello F. 2025. Scalable mechanistic neural networks. 13th International Conference on Learning Representations. ICLR: International Conference on Learning Representations, 63716–63737.","apa":"Chen, J., Yao, D., Pervez, A. A., Alistarh, D.-A., &#38; Locatello, F. (2025). Scalable mechanistic neural networks. In <i>13th International Conference on Learning Representations</i> (pp. 63716–63737). Singapore, Singapore: ICLR.","short":"J. Chen, D. Yao, A.A. Pervez, D.-A. Alistarh, F. Locatello, in:, 13th International Conference on Learning Representations, ICLR, 2025, pp. 63716–63737.","mla":"Chen, Jiale, et al. “Scalable Mechanistic Neural Networks.” <i>13th International Conference on Learning Representations</i>, ICLR, 2025, pp. 63716–37.","ieee":"J. Chen, D. Yao, A. A. Pervez, D.-A. Alistarh, and F. Locatello, “Scalable mechanistic neural networks,” in <i>13th International Conference on Learning Representations</i>, Singapore, Singapore, 2025, pp. 63716–63737.","ama":"Chen J, Yao D, Pervez AA, Alistarh D-A, Locatello F. Scalable mechanistic neural networks. In: <i>13th International Conference on Learning Representations</i>. ICLR; 2025:63716-63737.","chicago":"Chen, Jiale, Dingling Yao, Adeel A Pervez, Dan-Adrian Alistarh, and Francesco Locatello. “Scalable Mechanistic Neural Networks.” In <i>13th International Conference on Learning Representations</i>, 63716–37. ICLR, 2025."},"scopus_import":"1","_id":"20032","related_material":{"link":[{"relation":"software","url":"https://github.com/IST-DASLab/ScalableMNN"}]},"publication_identifier":{"isbn":["9798331320850"]},"date_created":"2025-07-20T22:02:01Z","external_id":{"arxiv":["2410.06074"]},"article_processing_charge":"No","conference":{"end_date":"2025-04-28","start_date":"2025-04-24","location":"Singapore, Singapore","name":"ICLR: International Conference on Learning Representations"},"page":"63716-63737","arxiv":1,"title":"Scalable mechanistic neural networks","abstract":[{"text":"We propose Scalable Mechanistic Neural Network (S-MNN), an enhanced neural network framework designed for scientific machine learning applications involving long temporal sequences. By reformulating the original Mechanistic Neural Network (MNN) (Pervez et al., 2024), we reduce the computational time and space complexities from cubic and quadratic with respect to the sequence length, respectively, to linear. This significant improvement enables efficient modeling of long-term dynamics without sacrificing accuracy or interpretability. Extensive experiments demonstrate that S-MNN matches the original MNN in precision while substantially reducing computational resources. Consequently, S-MNN can drop-in replace the original MNN in applications, providing a practical and efficient tool for integrating mechanistic bottlenecks into neural network models of complex dynamical systems. Source code is available at https://github.com/IST-DASLab/ScalableMNN.","lang":"eng"}],"publication":"13th International Conference on Learning Representations","file_date_updated":"2025-07-22T07:58:22Z"},{"department":[{"_id":"FrLo"}],"oa_version":"Published Version","oa":1,"quality_controlled":"1","language":[{"iso":"eng"}],"OA_place":"publisher","scopus_import":"1","citation":{"short":"V. Pariza, M. Salehi, G. Burghouts, F. Locatello, Y.M. Asano, in:, 13th International Conference on Learning Representations, ICLR, 2025, pp. 72303–72330.","mla":"Pariza, Valentinos, et al. “Near, Far: Patch-Ordering Enhances Vision Foundation Models’ Scene Understanding.” <i>13th International Conference on Learning Representations</i>, ICLR, 2025, pp. 72303–30.","ieee":"V. Pariza, M. Salehi, G. Burghouts, F. Locatello, and Y. M. Asano, “Near, far: Patch-ordering enhances vision foundation models’ scene understanding,” in <i>13th International Conference on Learning Representations</i>, Singapore, Singapore, 2025, pp. 72303–72330.","ama":"Pariza V, Salehi M, Burghouts G, Locatello F, Asano YM. Near, far: Patch-ordering enhances vision foundation models’ scene understanding. In: <i>13th International Conference on Learning Representations</i>. ICLR; 2025:72303-72330.","chicago":"Pariza, Valentinos, Mohammadreza Salehi, Gertjan Burghouts, Francesco Locatello, and Yuki M. Asano. “Near, Far: Patch-Ordering Enhances Vision Foundation Models’ Scene Understanding.” In <i>13th International Conference on Learning Representations</i>, 72303–30. ICLR, 2025.","ista":"Pariza V, Salehi M, Burghouts G, Locatello F, Asano YM. 2025. Near, far: Patch-ordering enhances vision foundation models’ scene understanding. 13th International Conference on Learning Representations. ICLR: International Conference on Learning Representations, 72303–72330.","apa":"Pariza, V., Salehi, M., Burghouts, G., Locatello, F., &#38; Asano, Y. M. (2025). Near, far: Patch-ordering enhances vision foundation models’ scene understanding. In <i>13th International Conference on Learning Representations</i> (pp. 72303–72330). Singapore, Singapore: ICLR."},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"04","type":"conference","date_created":"2025-07-20T22:02:03Z","_id":"20036","publication_identifier":{"isbn":["9798331320850"]},"article_processing_charge":"No","external_id":{"arxiv":["2408.11054"]},"conference":{"name":"ICLR: International Conference on Learning Representations","location":"Singapore, Singapore","start_date":"2025-04-24","end_date":"2025-04-28"},"page":"72303-72330","arxiv":1,"title":"Near, far: Patch-ordering enhances vision foundation models' scene understanding","file_date_updated":"2025-08-04T08:09:43Z","publication":"13th International Conference on Learning Representations","abstract":[{"text":"We introduce NeCo: Patch Neighbor Consistency, a novel self-supervised training loss that enforces patch-level nearest neighbor consistency across a student and teacher model. Compared to contrastive approaches that only yield binary learning signals, i.e. \"attract\" and \"repel\", this approach benefits from the more fine-grained learning signal of sorting spatially dense features relative to reference patches. Our method leverages differentiable sorting applied on top of pretrained representations, such as DINOv2-registers to bootstrap the learning signal and further improve upon them. This dense post-pretraining leads to superior performance across various models and datasets, despite requiring only 19 hours on a single GPU. This method generates high-quality dense feature encoders and establishes several new state-of-the-art results such as +2.3 % and +4.2% for non-parametric in-context semantic segmentation on ADE20k and Pascal VOC, +1.6% and +4.8% for linear segmentation evaluations on COCO-Things and -Stuff and improvements in the 3D understanding of multi-view consistency on SPair-71k, by more than 1.5%.","lang":"eng"}],"file":[{"success":1,"file_name":"2025_ICLR_Pariza.pdf","file_id":"20109","creator":"dernst","file_size":37788223,"content_type":"application/pdf","date_updated":"2025-08-04T08:09:43Z","checksum":"ddbe981f3ad3f6cb6daf12c954822eb8","relation":"main_file","date_created":"2025-08-04T08:09:43Z","access_level":"open_access"}],"day":"01","year":"2025","publisher":"ICLR","date_updated":"2025-08-04T08:10:55Z","status":"public","publication_status":"published","has_accepted_license":"1","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"author":[{"last_name":"Pariza","first_name":"Valentinos","full_name":"Pariza, Valentinos"},{"full_name":"Salehi, Mohammadreza","first_name":"Mohammadreza","last_name":"Salehi"},{"full_name":"Burghouts, Gertjan","first_name":"Gertjan","last_name":"Burghouts"},{"last_name":"Locatello","first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco"},{"full_name":"Asano, Yuki M.","last_name":"Asano","first_name":"Yuki M."}],"date_published":"2025-04-01T00:00:00Z","ddc":["000"],"OA_type":"diamond"},{"year":"2025","day":"01","date_updated":"2025-09-09T07:47:13Z","publisher":"ML Research Press","status":"public","publication_status":"published","intvolume":"       258","author":[{"first_name":"Shimeng","last_name":"Huang","full_name":"Huang, Shimeng","id":"989c2a06-fb4e-11ef-a992-ab766442255b","orcid":"0000-0001-6919-821X"},{"first_name":"Niklas","last_name":"Pfister","full_name":"Pfister, Niklas"},{"full_name":"Bowden, Jack","last_name":"Bowden","first_name":"Jack"}],"OA_type":"green","date_published":"2025-05-01T00:00:00Z","oa_version":"Preprint","oa":1,"department":[{"_id":"FrLo"}],"OA_place":"repository","quality_controlled":"1","language":[{"iso":"eng"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"05","type":"conference","scopus_import":"1","citation":{"short":"S. Huang, N. Pfister, J. Bowden, in:, The 28th International Conference on Artificial Intelligence and Statistics, ML Research Press, 2025, pp. 3394–3402.","ieee":"S. Huang, N. Pfister, and J. Bowden, “Sparse causal effect estimation using two-sample summary statistics in the presence of unmeasured confounding,” in <i>The 28th International Conference on Artificial Intelligence and Statistics</i>, Mai Khao, Thailand, 2025, vol. 258, pp. 3394–3402.","mla":"Huang, Shimeng, et al. “Sparse Causal Effect Estimation Using Two-Sample Summary Statistics in the Presence of Unmeasured Confounding.” <i>The 28th International Conference on Artificial Intelligence and Statistics</i>, vol. 258, ML Research Press, 2025, pp. 3394–402.","ama":"Huang S, Pfister N, Bowden J. Sparse causal effect estimation using two-sample summary statistics in the presence of unmeasured confounding. In: <i>The 28th International Conference on Artificial Intelligence and Statistics</i>. Vol 258. ML Research Press; 2025:3394-3402.","chicago":"Huang, Shimeng, Niklas Pfister, and Jack Bowden. “Sparse Causal Effect Estimation Using Two-Sample Summary Statistics in the Presence of Unmeasured Confounding.” In <i>The 28th International Conference on Artificial Intelligence and Statistics</i>, 258:3394–3402. ML Research Press, 2025.","ista":"Huang S, Pfister N, Bowden J. 2025. Sparse causal effect estimation using two-sample summary statistics in the presence of unmeasured confounding. The 28th International Conference on Artificial Intelligence and Statistics. AISTATS: Conference on Artificial Intelligence and Statistics, PMLR, vol. 258, 3394–3402.","apa":"Huang, S., Pfister, N., &#38; Bowden, J. (2025). Sparse causal effect estimation using two-sample summary statistics in the presence of unmeasured confounding. In <i>The 28th International Conference on Artificial Intelligence and Statistics</i> (Vol. 258, pp. 3394–3402). Mai Khao, Thailand: ML Research Press."},"main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2410.12300","open_access":"1"}],"acknowledgement":"The authors would like to thank Stephen Burgess and Ashish Patel for helpful discussions at\r\nthe start of this research project, and Anton Rask Lundborg for helpful discussions on the\r\nuniform asymptotic results. This work was partially completed during SH’s research visit at\r\nNovo Nordisk. The authors would like to thank Jesper Ferkinghoff-Borg, Kang Li and Lewis\r\nMarsh for facilitating this visit and for discussing necessary concepts and tools in statistical\r\ngenetics at an early stage. SH and NP are supported by a research grant (0069071) from Novo\r\nNordisk Fonden. JB is funded at the University of Exeter by research grant MR/X011372/1.","date_created":"2025-09-07T22:01:35Z","_id":"20303","publication_identifier":{"eissn":["2640-3498"]},"page":"3394-3402","conference":{"end_date":"2025-05-05","start_date":"2025-05-03","name":"AISTATS: Conference on Artificial Intelligence and Statistics","location":"Mai Khao, Thailand"},"external_id":{"arxiv":["2410.12300"]},"article_processing_charge":"No","arxiv":1,"title":"Sparse causal effect estimation using two-sample summary statistics in the presence of unmeasured confounding","publication":"The 28th International Conference on Artificial Intelligence and Statistics","abstract":[{"lang":"eng","text":"Observational genome-wide association studies are now widely used for causal inference in genetic epidemiology. To maintain privacy, such data is often only publicly available as summary statistics, and often studies for the endogenous covariates and the outcome are available separately. This has necessitated methods tailored to two-sample summary statistics. Current state-of-the-art methods modify linear instrumental variable (IV) regression---with genetic variants as instruments---to account for unmeasured confounding. However, since the endogenous covariates can be high dimensional, standard IV assumptions are generally insufficient to identify all causal effects simultaneously. We ensure identifiability by assuming the causal effects are sparse and propose a sparse causal effect two-sample IV estimator, spaceTSIV, adapting the spaceIV estimator by Pfister and Peters (2022) for two-sample summary statistics. We provide two methods, based on L0- and L1-penalization, respectively. We prove identifiability of the sparse causal effects in the two-sample setting and consistency of spaceTSIV. The performance of spaceTSIV is compared with existing two-sample IV methods in simulations. Finally, we showcase our methods using real proteomic and gene-expression data for drug-target discovery."}],"alternative_title":["PMLR"],"volume":258},{"language":[{"iso":"eng"}],"quality_controlled":"1","OA_place":"repository","department":[{"_id":"FrLo"}],"oa":1,"oa_version":"Preprint","date_created":"2025-11-02T23:01:35Z","publication_identifier":{"eissn":["2640-3498"]},"_id":"20592","acknowledgement":"NB gratefully acknowledge co-funding by the European Union (ERC, ANUBIS, 101123955. Views and opinions expressed are however those of the author(s) only and do not necessarily reflect those of the European Union or the European Research Council. Neither the European Union nor the granting authority can be held responsible for them). NB thanks the International\r\nMax Planck Research School for Intelligent Systems (IMPRS-IS) for their support.","main_file_link":[{"url":"https://openreview.net/forum?id=sgPCP9jOlS","open_access":"1"}],"scopus_import":"1","citation":{"ieee":"D. Yao, F. Tronarp, and N. Bosch, “Propagating model uncertainty through filtering-based probabilistic numerical ODE solvers,” in <i>Proceedings of the 1st International Conference on Probabilistic Numerics</i>, Sophia Antipolis, France, 2025, vol. 271.","mla":"Yao, Dingling, et al. “Propagating Model Uncertainty through Filtering-Based Probabilistic Numerical ODE Solvers.” <i>Proceedings of the 1st International Conference on Probabilistic Numerics</i>, vol. 271, ML Research Press, 2025.","short":"D. Yao, F. Tronarp, N. Bosch, in:, Proceedings of the 1st International Conference on Probabilistic Numerics, ML Research Press, 2025.","chicago":"Yao, Dingling, Filip Tronarp, and Nathanael Bosch. “Propagating Model Uncertainty through Filtering-Based Probabilistic Numerical ODE Solvers.” In <i>Proceedings of the 1st International Conference on Probabilistic Numerics</i>, Vol. 271. ML Research Press, 2025.","ama":"Yao D, Tronarp F, Bosch N. Propagating model uncertainty through filtering-based probabilistic numerical ODE solvers. In: <i>Proceedings of the 1st International Conference on Probabilistic Numerics</i>. Vol 271. ML Research Press; 2025.","ista":"Yao D, Tronarp F, Bosch N. 2025. Propagating model uncertainty through filtering-based probabilistic numerical ODE solvers. Proceedings of the 1st International Conference on Probabilistic Numerics. ProbNum: Conference on Probabilistic Numerics, PMLR, vol. 271.","apa":"Yao, D., Tronarp, F., &#38; Bosch, N. (2025). Propagating model uncertainty through filtering-based probabilistic numerical ODE solvers. In <i>Proceedings of the 1st International Conference on Probabilistic Numerics</i> (Vol. 271). Sophia Antipolis, France: ML Research Press."},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"01","type":"conference","arxiv":1,"conference":{"name":"ProbNum: Conference on Probabilistic Numerics","location":"Sophia Antipolis, France","end_date":"2025-09-03","start_date":"2025-09-01"},"article_processing_charge":"No","external_id":{"arxiv":["2503.04684"]},"abstract":[{"text":"Filtering-based probabilistic numerical solvers for ordinary differential equations (ODEs), also known as ODE filters, have been established as efficient methods for quantifying numerical uncertainty in the solution of ODEs. In practical applications, however, the underlying dynamical system often contains uncertain parameters, requiring the propagation of this model uncertainty to the ODE solution. In this paper, we demonstrate that ODE filters, despite their probabilistic nature, do not automatically solve this uncertainty propagation problem. To address this limitation, we present a novel approach that combines ODE filters with numerical quadrature to properly marginalize over uncertain parameters, while accounting for both parameter uncertainty and numerical solver uncertainty. Experiments across multiple dynamical systems demonstrate that the resulting uncertainty estimates closely match reference solutions. Notably, we show\r\nhow the numerical uncertainty from the ODE solver can help prevent overconfidence in the propagated uncertainty estimates, especially when using larger step sizes. Our results illustrate that probabilistic numerical methods can effectively quantify both numerical and parametric uncertainty in dynamical systems. ","lang":"eng"}],"publication":"Proceedings of the 1st International Conference on Probabilistic Numerics","volume":271,"alternative_title":["PMLR"],"title":"Propagating model uncertainty through filtering-based probabilistic numerical ODE solvers","day":"01","year":"2025","license":"https://creativecommons.org/licenses/by-sa/4.0/","status":"public","publication_status":"published","publisher":"ML Research Press","date_updated":"2025-11-10T08:33:11Z","tmp":{"legal_code_url":"https://creativecommons.org/licenses/by-sa/4.0/legalcode","short":"CC BY-SA (4.0)","image":"/images/cc_by_sa.png","name":"Creative Commons Attribution-ShareAlike 4.0 International Public License (CC BY-SA 4.0)"},"intvolume":"       271","has_accepted_license":"1","ddc":["000"],"date_published":"2025-01-01T00:00:00Z","OA_type":"green","author":[{"id":"d3e02e50-48a8-11ee-8f62-c108061797fa","full_name":"Yao, Dingling","last_name":"Yao","first_name":"Dingling"},{"last_name":"Tronarp","first_name":"Filip","full_name":"Tronarp, Filip"},{"first_name":"Nathanael","last_name":"Bosch","full_name":"Bosch, Nathanael"}]},{"has_accepted_license":"1","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"intvolume":"       267","author":[{"last_name":"Pervez","first_name":"Adeel A","id":"fca6d90c-d47f-11ee-bc87-93ff51604981","full_name":"Pervez, Adeel A"},{"full_name":"Gavves, Efstratios","first_name":"Efstratios","last_name":"Gavves"},{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","last_name":"Locatello","first_name":"Francesco"}],"ddc":["000"],"date_published":"2025-05-01T00:00:00Z","OA_type":"gold","corr_author":"1","file":[{"file_id":"20827","file_name":"2025_ICML_Pervez.pdf","success":1,"access_level":"open_access","date_created":"2025-12-16T12:21:49Z","relation":"main_file","checksum":"933cb673fb41416f537278fb990df6c3","content_type":"application/pdf","date_updated":"2025-12-16T12:21:49Z","file_size":993381,"creator":"dernst"}],"day":"01","year":"2025","publisher":"ML Research Press","date_updated":"2025-12-16T12:24:55Z","status":"public","publication_status":"published","project":[{"name":"IST-BRIDGE: International postdoctoral program","call_identifier":"H2020","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c","grant_number":"101034413"}],"conference":{"name":"ICML: International Conference on Machine Learning","location":"Vancouver, Canada","start_date":"2025-07-13","end_date":"2025-07-19"},"external_id":{"arxiv":["2502.18377"]},"page":"48962-48973","article_processing_charge":"No","arxiv":1,"title":"Mechanistic PDE networks for discovery of governing equations","file_date_updated":"2025-12-16T12:21:49Z","abstract":[{"text":"We present Mechanistic PDE Networks -- a model for discovery of governing partial differential equations from data. Mechanistic PDE Networks represent spatiotemporal data as space-time dependent linear partial differential equations in neural network hidden representations. The represented PDEs are then solved and decoded for specific tasks. The learned PDE representations naturally express the spatiotemporal dynamics in data in neural network hidden space, enabling increased modeling power. Solving the PDE representations in a compute and memory-efficient way, however, is a significant challenge. We develop a native, GPU-capable, parallel, sparse and differentiable multigrid solver specialized for linear partial differential equations that acts as a module in Mechanistic PDE Networks. Leveraging the PDE solver we propose a discovery architecture that can discovers nonlinear PDEs in complex settings, while being robust to noise. We validate PDE discovery on a number of PDEs including reaction-diffusion and Navier-Stokes equations.","lang":"eng"}],"publication":"42nd International Conference on Machine Learning","volume":267,"alternative_title":["PMLR"],"ec_funded":1,"department":[{"_id":"FrLo"}],"oa":1,"oa_version":"Published Version","quality_controlled":"1","language":[{"iso":"eng"}],"OA_place":"publisher","scopus_import":"1","citation":{"ista":"Pervez AA, Gavves E, Locatello F. 2025. Mechanistic PDE networks for discovery of governing equations. 42nd International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 267, 48962–48973.","apa":"Pervez, A. A., Gavves, E., &#38; Locatello, F. (2025). Mechanistic PDE networks for discovery of governing equations. In <i>42nd International Conference on Machine Learning</i> (Vol. 267, pp. 48962–48973). Vancouver, Canada: ML Research Press.","short":"A.A. Pervez, E. Gavves, F. Locatello, in:, 42nd International Conference on Machine Learning, ML Research Press, 2025, pp. 48962–48973.","ieee":"A. A. Pervez, E. Gavves, and F. Locatello, “Mechanistic PDE networks for discovery of governing equations,” in <i>42nd International Conference on Machine Learning</i>, Vancouver, Canada, 2025, vol. 267, pp. 48962–48973.","mla":"Pervez, Adeel A., et al. “Mechanistic PDE Networks for Discovery of Governing Equations.” <i>42nd International Conference on Machine Learning</i>, vol. 267, ML Research Press, 2025, pp. 48962–73.","ama":"Pervez AA, Gavves E, Locatello F. Mechanistic PDE networks for discovery of governing equations. In: <i>42nd International Conference on Machine Learning</i>. Vol 267. ML Research Press; 2025:48962-48973.","chicago":"Pervez, Adeel A, Efstratios Gavves, and Francesco Locatello. “Mechanistic PDE Networks for Discovery of Governing Equations.” In <i>42nd International Conference on Machine Learning</i>, 267:48962–73. ML Research Press, 2025."},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"05","type":"conference","date_created":"2025-12-14T23:02:04Z","_id":"20817","publication_identifier":{"eissn":["2640-3498"]},"related_material":{"link":[{"relation":"software","url":"https://github.com/ alpz/mech-nn-discovery-pde"}]},"acknowledgement":"AP. This project has received funding from the European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie Grant Agreement No. 101034413.\r\nFL. This research was funded in whole or in part by the Austrian Science Fund (FWF) 10.55776/COE12. For open access purposes, the author has applied a CC BY public\r\ncopyright license to any author accepted manuscript version arising from this submission."},{"day":"03","language":[{"iso":"eng"}],"quality_controlled":"1","year":"2024","department":[{"_id":"FrLo"}],"file":[{"content_type":"application/pdf","date_updated":"2024-02-12T08:40:36Z","file_size":8038511,"creator":"dernst","access_level":"open_access","relation":"main_file","date_created":"2024-02-12T08:40:36Z","checksum":"8fad894c34f1b3d5a14fb8ffb12f7277","success":1,"file_id":"14978","file_name":"2024_CPAL_Lao.pdf"}],"oa":1,"oa_version":"Published Version","status":"public","date_created":"2023-08-22T14:19:59Z","_id":"14213","publication_status":"published","citation":{"ista":"Lao D, Hu Z, Locatello F, Yang Y, Soatto S. 2024. Divided attention: Unsupervised multi-object discovery with contextually separated slots. 1st Conference on Parsimony and Learning. CPAL: Conference on Parsimony and Learning.","apa":"Lao, D., Hu, Z., Locatello, F., Yang, Y., &#38; Soatto, S. (2024). Divided attention: Unsupervised multi-object discovery with contextually separated slots. In <i>1st Conference on Parsimony and Learning</i>. Hong Kong, China.","mla":"Lao, Dong, et al. “Divided Attention: Unsupervised Multi-Object Discovery with Contextually Separated Slots.” <i>1st Conference on Parsimony and Learning</i>, 2024.","ieee":"D. Lao, Z. Hu, F. Locatello, Y. Yang, and S. Soatto, “Divided attention: Unsupervised multi-object discovery with contextually separated slots,” in <i>1st Conference on Parsimony and Learning</i>, Hong Kong, China, 2024.","short":"D. Lao, Z. Hu, F. Locatello, Y. Yang, S. Soatto, in:, 1st Conference on Parsimony and Learning, 2024.","chicago":"Lao, Dong, Zhengyang Hu, Francesco Locatello, Yanchao Yang, and Stefano Soatto. “Divided Attention: Unsupervised Multi-Object Discovery with Contextually Separated Slots.” In <i>1st Conference on Parsimony and Learning</i>, 2024.","ama":"Lao D, Hu Z, Locatello F, Yang Y, Soatto S. Divided attention: Unsupervised multi-object discovery with contextually separated slots. In: <i>1st Conference on Parsimony and Learning</i>. ; 2024."},"date_updated":"2024-02-12T08:56:23Z","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"01","type":"conference","arxiv":1,"has_accepted_license":"1","article_processing_charge":"No","external_id":{"arxiv":["2304.01430"]},"conference":{"end_date":"2024-01-03","start_date":"2024-01-03","name":"CPAL: Conference on Parsimony and Learning","location":"Hong Kong, China"},"file_date_updated":"2024-02-12T08:40:36Z","date_published":"2024-01-03T00:00:00Z","ddc":["000"],"publication":"1st Conference on Parsimony and Learning","abstract":[{"lang":"eng","text":"We introduce a method to segment the visual field into independently moving regions, trained with no ground truth or supervision. It consists of an adversarial conditional encoder-decoder architecture based on Slot Attention, modified to use the image as context to decode optical flow without attempting to reconstruct the image itself. In the resulting multi-modal representation, one modality (flow) feeds the encoder to produce separate latent codes (slots), whereas the other modality (image) conditions the decoder to generate the first (flow) from the slots. This design frees the representation from having to encode complex nuisance variability in the image due to, for instance, illumination and reflectance properties of the scene. Since customary autoencoding based on minimizing the reconstruction error does not preclude the entire flow from being encoded into a single slot, we modify the loss to an adversarial criterion based on Contextual Information Separation. The resulting min-max optimization fosters the separation of objects and their assignment to different attention slots, leading to Divided Attention, or DivA. DivA outperforms recent unsupervised multi-object motion segmentation methods while tripling run-time speed up to 104FPS and reducing the performance gap from supervised methods to 12% or less. DivA can handle different numbers of objects and different image sizes at training and test time, is invariant to permutation of object labels, and does not require explicit regularization."}],"author":[{"last_name":"Lao","first_name":"Dong","full_name":"Lao, Dong"},{"full_name":"Hu, Zhengyang","first_name":"Zhengyang","last_name":"Hu"},{"id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","last_name":"Locatello","first_name":"Francesco"},{"first_name":"Yanchao","last_name":"Yang","full_name":"Yang, Yanchao"},{"full_name":"Soatto, Stefano","last_name":"Soatto","first_name":"Stefano"}],"title":"Divided attention: Unsupervised multi-object discovery with contextually separated slots"},{"citation":{"apa":"Yao, D., Xu, D., Lachapelle, S., Magliacane, S., Taslakian, P., Martius, G., … Locatello, F. (2024). Multi-view causal representation learning with partial observability. In <i>12th International Conference on Learning Representations</i>. Vienna, Austria: Curran Associates.","ista":"Yao D, Xu D, Lachapelle S, Magliacane S, Taslakian P, Martius G, Kügelgen J von, Locatello F. 2024. Multi-view causal representation learning with partial observability. 12th International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","chicago":"Yao, Dingling, Danru Xu, Sébastien Lachapelle, Sara Magliacane, Perouz Taslakian, Georg Martius, Julius von Kügelgen, and Francesco Locatello. “Multi-View Causal Representation Learning with Partial Observability.” In <i>12th International Conference on Learning Representations</i>. Curran Associates, 2024.","ama":"Yao D, Xu D, Lachapelle S, et al. Multi-view causal representation learning with partial observability. In: <i>12th International Conference on Learning Representations</i>. Curran Associates; 2024.","ieee":"D. Yao <i>et al.</i>, “Multi-view causal representation learning with partial observability,” in <i>12th International Conference on Learning Representations</i>, Vienna, Austria, 2024.","mla":"Yao, Dingling, et al. “Multi-View Causal Representation Learning with Partial Observability.” <i>12th International Conference on Learning Representations</i>, Curran Associates, 2024.","short":"D. Yao, D. Xu, S. Lachapelle, S. Magliacane, P. Taslakian, G. Martius, J. von Kügelgen, F. Locatello, in:, 12th International Conference on Learning Representations, Curran Associates, 2024."},"month":"11","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","type":"conference","date_created":"2024-02-07T14:28:34Z","_id":"14946","acknowledgement":"This work was initiated at the Second Bellairs Workshop on Causality held at the Bellairs Research Institute, January 6–13, 2022; we thank all workshop participants for providing a stimulating research environment. Further, we thank Cian Eastwood, Luigi Gresele, Stefano Soatto, Marco Bagatella and A. René Geist for helpful discussion. GM is a member of the Machine Learning Cluster of Excellence, EXC number 2064/1 – Project number 390727645. JvK and GM acknowledge support from the German Federal Ministry of Education and Research (BMBF) through the Tübingen AI Center (FKZ: 01IS18039B). The research of DX and SM was supported by the Air Force Office of Scientific Research under award number FA8655-22-1-7155. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the United States Air Force. We also thank SURF for the support in using the Dutch National Supercomputer Snellius. SL was supported by an IVADO excellence PhD scholarship and by Samsung Electronics Co., Ldt. DY was supported by an Amazon fellowship, the International Max Planck Research School for Intelligent Systems (IMPRS-IS) and the ISTA graduate school. Work done outside of Amazon.","department":[{"_id":"FrLo"}],"oa_version":"Published Version","oa":1,"quality_controlled":"1","language":[{"iso":"eng"}],"OA_place":"repository","title":"Multi-view causal representation learning with partial observability","file_date_updated":"2025-02-04T12:34:23Z","abstract":[{"text":"We present a unified framework for studying the identifiability of representations learned from simultaneously observed views, such as different data modalities. We allow a partially observed setting in which each view constitutes a nonlinear mixture of a subset of underlying latent variables, which can be causally related. We prove that the information shared across all subsets of any number of views can be learned up to a smooth bijection using contrastive learning and a single encoder per view. We also provide graphical criteria indicating which latent variables can be identified through a simple set of rules, which we refer to as identifiability algebra. Our general framework and theoretical results unify and extend several previous work on multi-view nonlinear ICA, disentanglement, and causal representation learning. We experimentally validate our claims on numerical, image, and multi-modal data sets. Further, we demonstrate that the performance of prior methods is recovered in different special cases of our setup. Overall, we find that access to multiple partial views offers unique opportunities for identifiable representation learning, enabling the discovery of latent structures from purely observational data.","lang":"eng"}],"publication":"12th International Conference on Learning Representations","conference":{"start_date":"2024-05-07","end_date":"2024-05-07","location":"Vienna, Austria","name":"ICLR: International Conference on Learning Representations"},"external_id":{"arxiv":["2311.04056"]},"article_processing_charge":"No","arxiv":1,"publisher":"Curran Associates","date_updated":"2025-02-11T10:34:32Z","status":"public","publication_status":"published","file":[{"file_name":"2024_ICLR_Yao.pdf","file_id":"18995","success":1,"checksum":"8ed3c34706eeec622c7e8968dc0f747a","date_created":"2025-02-04T12:34:23Z","relation":"main_file","access_level":"open_access","creator":"dernst","file_size":1713606,"date_updated":"2025-02-04T12:34:23Z","content_type":"application/pdf"}],"day":"07","year":"2024","author":[{"last_name":"Yao","first_name":"Dingling","id":"d3e02e50-48a8-11ee-8f62-c108061797fa","full_name":"Yao, Dingling"},{"last_name":"Xu","first_name":"Danru","full_name":"Xu, Danru"},{"last_name":"Lachapelle","first_name":"Sébastien","full_name":"Lachapelle, Sébastien"},{"full_name":"Magliacane, Sara","first_name":"Sara","last_name":"Magliacane"},{"last_name":"Taslakian","first_name":"Perouz","full_name":"Taslakian, Perouz"},{"first_name":"Georg","last_name":"Martius","full_name":"Martius, Georg"},{"full_name":"Kügelgen, Julius von","first_name":"Julius von","last_name":"Kügelgen"},{"first_name":"Francesco","last_name":"Locatello","orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4"}],"ddc":["000"],"date_published":"2024-11-07T00:00:00Z","OA_type":"green","corr_author":"1","has_accepted_license":"1"},{"conference":{"start_date":"2024-07-26","end_date":"2024-07-26","name":"ICML: International Conference on Machine Learning"},"external_id":{"arxiv":["2405.17151"]},"article_processing_charge":"No","arxiv":1,"title":"Smoke and mirrors in causal downstream tasks","file_date_updated":"2025-01-27T11:42:24Z","volume":38,"abstract":[{"lang":"eng","text":"Machine Learning and AI have the potential to transform data-driven\r\nscientific discovery, enabling accurate predictions for several scientific\r\nphenomena. As many scientific questions are inherently causal, this paper looks\r\nat the causal inference task of treatment effect estimation, where the outcome\r\nof interest is recorded in high-dimensional observations in a Randomized\r\nControlled Trial (RCT). Despite being the simplest possible causal setting and\r\na perfect fit for deep learning, we theoretically find that many common choices\r\nin the literature may lead to biased estimates. To test the practical impact of\r\nthese considerations, we recorded ISTAnt, the first real-world benchmark for\r\ncausal inference downstream tasks on high-dimensional observations as an RCT\r\nstudying how garden ants (Lasius neglectus) respond to microparticles applied\r\nonto their colony members by hygienic grooming. Comparing 6 480 models\r\nfine-tuned from state-of-the-art visual backbones, we find that the sampling\r\nand modeling choices significantly affect the accuracy of the causal estimate,\r\nand that classification accuracy is not a proxy thereof. We further validated\r\nthe analysis, repeating it on a synthetically generated visual data set\r\ncontrolling the causal model. Our results suggest that future benchmarks should\r\ncarefully consider real downstream scientific questions, especially causal\r\nones. Further, we highlight guidelines for representation learning methods to\r\nhelp answer causal questions in the sciences."}],"publication":"ICML 2024 Workshop AI4Science","department":[{"_id":"SyCr"},{"_id":"FrLo"},{"_id":"GradSch"}],"oa_version":"Published Version","oa":1,"language":[{"iso":"eng"}],"quality_controlled":"1","OA_place":"publisher","citation":{"ista":"Cadei R, Lindorfer L, Cremer S, Schmid C, Locatello F. 2024. Smoke and mirrors in causal downstream tasks. ICML 2024 Workshop AI4Science. ICML: International Conference on Machine Learning vol. 38.","apa":"Cadei, R., Lindorfer, L., Cremer, S., Schmid, C., &#38; Locatello, F. (2024). Smoke and mirrors in causal downstream tasks. In <i>ICML 2024 Workshop AI4Science</i> (Vol. 38). Curran Associates.","short":"R. Cadei, L. Lindorfer, S. Cremer, C. Schmid, F. Locatello, in:, ICML 2024 Workshop AI4Science, Curran Associates, 2024.","ieee":"R. Cadei, L. Lindorfer, S. Cremer, C. Schmid, and F. Locatello, “Smoke and mirrors in causal downstream tasks,” in <i>ICML 2024 Workshop AI4Science</i>, 2024, vol. 38.","mla":"Cadei, Riccardo, et al. “Smoke and Mirrors in Causal Downstream Tasks.” <i>ICML 2024 Workshop AI4Science</i>, vol. 38, Curran Associates, 2024.","ama":"Cadei R, Lindorfer L, Cremer S, Schmid C, Locatello F. Smoke and mirrors in causal downstream tasks. In: <i>ICML 2024 Workshop AI4Science</i>. Vol 38. Curran Associates; 2024.","chicago":"Cadei, Riccardo, Lukas Lindorfer, Sylvia Cremer, Cordelia Schmid, and Francesco Locatello. “Smoke and Mirrors in Causal Downstream Tasks.” In <i>ICML 2024 Workshop AI4Science</i>, Vol. 38. Curran Associates, 2024."},"scopus_import":"1","type":"conference","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"09","related_material":{"link":[{"url":"https://github.com/CausalLearningAI/ISTAnt","relation":"software"}],"record":[{"id":"18895","status":"public","relation":"research_data"},{"relation":"is_continued_by","status":"for_moderation","id":"19509"}]},"_id":"18847","date_created":"2025-01-14T07:27:26Z","acknowledgement":"We thank Piersilvio De Bartolomeis, and the full Causal Learning and Artificial Intelligence (CLAI) group at ISTA for the extremely helpful discussions. Riccardo Cadei was supported by a Google Research Scholar Award and a Google Initiated Gift to Francesco Locatello. We thank the Social Immunity team at ISTA particularly Michaela Hönigsberger and Wilfrid Jean Louis, for supporting the ecological experiment and Farnaz Beikzadeh Abbasi, Luisa Fiebig and Martin Estermann for annotating ant behavior in ISTAnt.","has_accepted_license":"1","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"intvolume":"        38","author":[{"id":"0fa8b76f-72f0-11ef-b75a-a5da96e5ad6b","full_name":"Cadei, Riccardo","last_name":"Cadei","first_name":"Riccardo"},{"first_name":"Lukas","last_name":"Lindorfer","full_name":"Lindorfer, Lukas","id":"85f0e6d3-06b3-11ec-8982-8c5049fa4455"},{"last_name":"Cremer","first_name":"Sylvia","orcid":"0000-0002-2193-3868","id":"2F64EC8C-F248-11E8-B48F-1D18A9856A87","full_name":"Cremer, Sylvia"},{"first_name":"Cordelia","last_name":"Schmid","full_name":"Schmid, Cordelia"},{"last_name":"Locatello","first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco"}],"ddc":["000","570"],"date_published":"2024-09-25T00:00:00Z","OA_type":"gold","corr_author":"1","file":[{"checksum":"beedf05388bbdb7ddda81ec3d5ec7026","relation":"main_file","date_created":"2025-01-27T11:42:24Z","access_level":"open_access","creator":"dernst","file_size":4453014,"content_type":"application/pdf","date_updated":"2025-01-27T11:42:24Z","file_name":"2024_ICML_Cadei.pdf","file_id":"18896","success":1}],"day":"25","year":"2024","publisher":"Curran Associates","date_updated":"2025-07-10T11:51:50Z","publication_status":"published","status":"public"},{"article_processing_charge":"No","date_published":"2024-10-23T00:00:00Z","ddc":["570"],"corr_author":"1","OA_type":"gold","abstract":[{"lang":"eng","text":"ISTAnt is a new ecological dataset for social immunity and represents the first real-world benchmark for causal inference downstream tasks on high-dimensional observations. It analyzes grooming behavior in the ant Lasius neglectus in groups of three worker ants. The workers for the experiment were obtained from their laboratory stock colony, which had been collected from the field in 2022 in the Botanical Garden Jena, Germany. Ant collection and all experimental work were performed in compliance with international, national and institutional regulations and ethical guidelines. For the experiment, the body surface of one of the three ants was treated with a suspension of either of two microparticle types (diameter ~5 µm) to induce grooming by the two nestmates, which were individually color-coded by application of a dot of blue or orange paint, respectively. The three ants were housed in small plastic containers (diameter 28mm, height 30mm) with moistened, plastered ground and the interior walls covered with PTFE (polytetrafluoroethane) to hamper climbing by the ants. Filming occurred in a temperature- and humidity-controlled room at 23°C within a custom-made filming box with controlled lighting and ventilation conditions. We set up nine ant groups at a time (always containing both treatments) and placed them randomly on positions 1-9 marked on the floor in a 3x3 grid, about 3mm from each other. The experiment was performed on two consecutive days. Videos were acquired using a USB camera (FLIR blackfly S BFS-U3-120S4C, Teledyne FLIR) with a high-performance lens (HP Series 25mm Focal Length, Edmund optics 86-572) in OBS studio 29.0.0 \\citep{bailey2017obs} at a framerate of 30 FPS and a resolution of 2500x2500 pixels. From each original video (105x105 mm), we generated nine individual videos .mkv (each ~32x32 mm, 770x770 pixels) by determining exact coordinates per container from one frame in GIMP 2.10.36 and cropping of the videos with FFmpeg 6.1.1. Annotation was performed over two consecutive days by three observers who had not been involved in the experimental setup or recording and were unaware of the treatment assignments to ensure bias-free behavioral annotation. They annotated the behavior of the ants during video observations, using custom-made software that saves the start and end frames of behaviors marked in a .csv file (see 'annotations' folder). In one of the videos, one of the nestmates' legs got inadvertently stuck to its body surface during the color-coding, interfering with its behavior, so the video was discarded. This left 44 videos from 5 independent setups (n=24 of treatment 1 and n=20 of treatment 2) of 10 minutes each for a total of 792 000 annotated frames (see 'video' folder). For each video, we provide the following information: the number of the set to which it belongs (1-5); the number of the position within the set reflecting the position of the ant group under the camera (1-9), for which we also provide ‘coordinates’ in the 3x3 grid (taking values -1/0/1 for both X and Y axis); treatment (1 or 2); the hour of the day when the recording was started (in 24h CEST); experimental day (A or B); the top left coordinate of the cropping square from the original video (CropX/CropY); the person annotating the video (given as A, B, C); the date of annotation (1: first day, 2: second day) and in which order the videos were annotated by each person, both reflecting a possible training effect of the person (see 'experiments_settings.csv' file)."}],"author":[{"id":"0fa8b76f-72f0-11ef-b75a-a5da96e5ad6b","full_name":"Cadei, Riccardo","last_name":"Cadei","first_name":"Riccardo"},{"first_name":"Francesco","last_name":"Locatello","orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4"},{"id":"2F64EC8C-F248-11E8-B48F-1D18A9856A87","full_name":"Cremer, Sylvia M","orcid":"0000-0002-2193-3868","last_name":"Cremer","first_name":"Sylvia M"},{"last_name":"Lindorfer","first_name":"Lukas","id":"85f0e6d3-06b3-11ec-8982-8c5049fa4455","full_name":"Lindorfer, Lukas"},{"first_name":"Cordelia","last_name":"Schmid","full_name":"Schmid, Cordelia"}],"title":"ISTAnt","day":"23","year":"2024","OA_place":"repository","department":[{"_id":"SyCr"},{"_id":"FrLo"},{"_id":"GradSch"}],"oa_version":"Published Version","oa":1,"related_material":{"record":[{"id":"18847","status":"public","relation":"used_in_publication"}]},"_id":"18895","doi":"10.6084/M9.FIGSHARE.26484934.V2","date_created":"2025-01-27T11:45:43Z","status":"public","main_file_link":[{"url":"https://10.6084/M9.FIGSHARE.26484934.V2","open_access":"1"}],"publisher":"Institute of Science and Technology Austria","citation":{"ista":"Cadei R, Locatello F, Cremer S, Lindorfer L, Schmid C. 2024. ISTAnt, Institute of Science and Technology Austria, <a href=\"https://doi.org/10.6084/M9.FIGSHARE.26484934.V2\">10.6084/M9.FIGSHARE.26484934.V2</a>.","apa":"Cadei, R., Locatello, F., Cremer, S., Lindorfer, L., &#38; Schmid, C. (2024). ISTAnt. Institute of Science and Technology Austria. <a href=\"https://doi.org/10.6084/M9.FIGSHARE.26484934.V2\">https://doi.org/10.6084/M9.FIGSHARE.26484934.V2</a>","mla":"Cadei, Riccardo, et al. <i>ISTAnt</i>. Institute of Science and Technology Austria, 2024, doi:<a href=\"https://doi.org/10.6084/M9.FIGSHARE.26484934.V2\">10.6084/M9.FIGSHARE.26484934.V2</a>.","ieee":"R. Cadei, F. Locatello, S. Cremer, L. Lindorfer, and C. Schmid, “ISTAnt.” Institute of Science and Technology Austria, 2024.","short":"R. Cadei, F. Locatello, S. Cremer, L. Lindorfer, C. Schmid, (2024).","chicago":"Cadei, Riccardo, Francesco Locatello, Sylvia Cremer, Lukas Lindorfer, and Cordelia Schmid. “ISTAnt.” Institute of Science and Technology Austria, 2024. <a href=\"https://doi.org/10.6084/M9.FIGSHARE.26484934.V2\">https://doi.org/10.6084/M9.FIGSHARE.26484934.V2</a>.","ama":"Cadei R, Locatello F, Cremer S, Lindorfer L, Schmid C. ISTAnt. 2024. doi:<a href=\"https://doi.org/10.6084/M9.FIGSHARE.26484934.V2\">10.6084/M9.FIGSHARE.26484934.V2</a>"},"type":"research_data_reference","month":"10","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","date_updated":"2025-01-27T11:58:38Z"}]