[{"publication":"Transactions on Machine Learning Research","month":"12","has_accepted_license":"1","year":"2025","citation":{"ieee":"F. Montagna, M. T. Cairney-Leeming, D. Sridhar, and F. Locatello, “Demystifying amortized causal discovery with transformers,” <i>Transactions on Machine Learning Research</i>. ML Research Press, 2025.","chicago":"Montagna, Francesco, Maximilian T Cairney-Leeming, Dhanya Sridhar, and Francesco Locatello. “Demystifying Amortized Causal Discovery with Transformers.” <i>Transactions on Machine Learning Research</i>. ML Research Press, 2025.","short":"F. Montagna, M.T. Cairney-Leeming, D. Sridhar, F. Locatello, Transactions on Machine Learning Research (2025).","ista":"Montagna F, Cairney-Leeming MT, Sridhar D, Locatello F. 2025. Demystifying amortized causal discovery with transformers. Transactions on Machine Learning Research.","mla":"Montagna, Francesco, et al. “Demystifying Amortized Causal Discovery with Transformers.” <i>Transactions on Machine Learning Research</i>, ML Research Press, 2025.","apa":"Montagna, F., Cairney-Leeming, M. T., Sridhar, D., &#38; Locatello, F. (2025). Demystifying amortized causal discovery with transformers. <i>Transactions on Machine Learning Research</i>. ML Research Press.","ama":"Montagna F, Cairney-Leeming MT, Sridhar D, Locatello F. Demystifying amortized causal discovery with transformers. <i>Transactions on Machine Learning Research</i>. 2025."},"external_id":{"arxiv":["2405.16924"]},"file_date_updated":"2026-01-05T09:51:28Z","file":[{"creator":"dernst","date_updated":"2026-01-05T09:51:28Z","file_size":1030280,"content_type":"application/pdf","date_created":"2026-01-05T09:51:28Z","success":1,"file_name":"2025_PMLR_Montagna.pdf","file_id":"20939","access_level":"open_access","checksum":"968c471bb1f682cf823b2d4cadea8a3f","relation":"main_file"}],"day":"18","article_processing_charge":"No","OA_type":"gold","oa_version":"Published Version","oa":1,"status":"public","publication_status":"published","PlanS_conform":"1","ddc":["000"],"date_updated":"2026-01-05T09:54:59Z","_id":"20934","alternative_title":["TMLR"],"date_published":"2025-12-18T00:00:00Z","language":[{"iso":"eng"}],"date_created":"2026-01-04T23:01:35Z","OA_place":"publisher","scopus_import":"1","publisher":"ML Research Press","article_type":"original","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","quality_controlled":"1","abstract":[{"lang":"eng","text":" Supervised learning for causal discovery from observational data often achieves competitive performance despite seemingly avoiding the explicit assumptions that traditional methods require for identifiability. In this work, we analyze CSIvA (Ke et al., 2023) on bivariate causal models, a transformer architecture for amortized inference promising to train on synthetic data and transfer to real ones. First, we bridge the gap with identifiability theory, showing that the training distribution implicitly defines a prior on the causal model of the test observations: consistent with classical approaches, good performance is achieved when we have a good prior on the test data, and the underlying model is identifiable. Second, we find that CSIvA can not generalize to classes of causal models unseen during training: to overcome this limitation, we theoretically and empirically analyze \\textit{when} training CSIvA on datasets generated by multiple identifiable causal models with different structural assumptions improves its generalization at test time. Overall, we find that amortized causal discovery still adheres to identifiability theory, violating the previous hypothesis from Lopez-Paz et al. (2015) that supervised learning methods could overcome its restrictions."}],"author":[{"full_name":"Montagna, Francesco","last_name":"Montagna","first_name":"Francesco","id":"353afc8e-19f4-11f0-9db9-811f1723c83f"},{"full_name":"Cairney-Leeming, Maximilian T","last_name":"Cairney-Leeming","first_name":"Maximilian T","id":"2214a80c-31f8-11ee-a48d-cf52cc58759b"},{"last_name":"Sridhar","first_name":"Dhanya","full_name":"Sridhar, Dhanya"},{"full_name":"Locatello, Francesco","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","last_name":"Locatello"}],"department":[{"_id":"FrLo"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"title":"Demystifying amortized causal discovery with transformers","related_material":{"link":[{"url":"https://github.com/francescomontagna/learning-to-induce.git","relation":"software"}]},"license":"https://creativecommons.org/licenses/by/4.0/","type":"journal_article","corr_author":"1","arxiv":1,"publication_identifier":{"eissn":["2835-8856"]}},{"external_id":{"arxiv":["2503.00641"]},"citation":{"ieee":"S. Gairola, M. Böhle, F. Locatello, and B. Schiele, “How to probe: Simple yet effective techniques for improving post-hoc explanations,” in <i>13th International Conference on Learning Representations</i>, Singapore, 2025.","apa":"Gairola, S., Böhle, M., Locatello, F., &#38; Schiele, B. (2025). How to probe: Simple yet effective techniques for improving post-hoc explanations. In <i>13th International Conference on Learning Representations</i>. Singapore: ICLR.","ama":"Gairola S, Böhle M, Locatello F, Schiele B. How to probe: Simple yet effective techniques for improving post-hoc explanations. In: <i>13th International Conference on Learning Representations</i>. ICLR; 2025.","chicago":"Gairola, Siddhartha, Moritz Böhle, Francesco Locatello, and Bernt Schiele. “How to Probe: Simple yet Effective Techniques for Improving Post-Hoc Explanations.” In <i>13th International Conference on Learning Representations</i>. ICLR, 2025.","mla":"Gairola, Siddhartha, et al. “How to Probe: Simple yet Effective Techniques for Improving Post-Hoc Explanations.” <i>13th International Conference on Learning Representations</i>, ICLR, 2025.","ista":"Gairola S, Böhle M, Locatello F, Schiele B. 2025. How to probe: Simple yet effective techniques for improving post-hoc explanations. 13th International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","short":"S. Gairola, M. Böhle, F. Locatello, B. Schiele, in:, 13th International Conference on Learning Representations, ICLR, 2025."},"year":"2025","has_accepted_license":"1","publication":"13th International Conference on Learning Representations","month":"01","date_updated":"2026-02-09T06:11:17Z","publication_status":"published","status":"public","ddc":["000"],"oa_version":"Published Version","OA_type":"gold","oa":1,"file_date_updated":"2026-02-09T06:06:14Z","file":[{"creator":"dernst","file_size":24386863,"date_updated":"2026-02-09T06:06:14Z","content_type":"application/pdf","date_created":"2026-02-09T06:06:14Z","file_id":"21162","file_name":"2025_ICLR_Gairola.pdf","success":1,"checksum":"6c8dfe4291c41d5a2c2fd838105e10b9","access_level":"open_access","relation":"main_file"}],"day":"22","article_processing_charge":"No","publisher":"ICLR","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","conference":{"location":"Singapore","name":"ICLR: International Conference on Learning Representations","start_date":"2025-04-24","end_date":"2025-04-28"},"acknowledgement":"We sincerely thank Sukrut Rao and Yue Fan for their valuable feedback on the paper and insightful discussions throughout the project. Additionally, we appreciate Sukrut’s help\r\nwith some LATEX sorcery. This work was partially supported by ELSA Mobility Program1\r\nas part of the ELLIS2 exchange program to the Institute of Science and Technology Austria (ISTA), where a portion of this research was conducted.","language":[{"iso":"eng"}],"OA_place":"publisher","date_created":"2026-01-27T12:48:35Z","_id":"21049","date_published":"2025-01-22T00:00:00Z","arxiv":1,"corr_author":"1","type":"conference","title":"How to probe: Simple yet effective techniques for improving post-hoc explanations","related_material":{"link":[{"url":"https://github.com/sidgairo18/how-to-probe","relation":"software"}]},"author":[{"full_name":"Gairola, Siddhartha","last_name":"Gairola","id":"fb21489d-057c-11f1-b1b6-d68cd6ae64f5","first_name":"Siddhartha"},{"full_name":"Böhle, Moritz","last_name":"Böhle","first_name":"Moritz"},{"first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","last_name":"Locatello","full_name":"Locatello, Francesco"},{"last_name":"Schiele","first_name":"Bernt","full_name":"Schiele, Bernt"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"department":[{"_id":"FrLo"}],"quality_controlled":"1","abstract":[{"text":"Post-hoc importance attribution methods are a popular tool for “explaining” Deep Neural Networks (DNNs) and are inherently based on the assumption that the explanations can be applied independently of how the models were trained. Contrarily, in this work we bring forward empirical evidence that challenges this very notion. Surprisingly, we discover a strong dependency on and demonstrate that the training details of a pre-trained model’s classification layer (<10% of model parameters) play a crucial role, much more than the pre-training scheme itself. This is of high practical relevance: (1) as techniques for pre-training models are becoming increasingly diverse, understanding the interplay between these techniques and attribution methods is critical; (2) it sheds light on an important yet overlooked assumption of post-hoc attribution methods which can drastically impact model explanations and how they are interpreted eventually. With this finding we also present simple yet effective adjustments to the classification layers, that can significantly enhance the quality of model explanations. We validate our findings across several visual pre-training frameworks (fully-supervised, self-supervised, contrastive vision-language training) and analyse how they impact explanations for a wide range of attribution methods on a diverse set of evaluation metrics.","lang":"eng"}]},{"ddc":["000"],"status":"public","publication_status":"published","date_updated":"2026-02-10T11:54:02Z","article_processing_charge":"No","day":"01","volume":275,"file_date_updated":"2026-01-29T14:17:48Z","file":[{"creator":"flocatel","file_size":1739334,"date_updated":"2026-01-29T14:17:48Z","content_type":"application/pdf","date_created":"2026-01-29T14:17:48Z","file_name":"montagna25a.pdf","file_id":"21067","success":1,"checksum":"f2bc44b2320667d4049b3518b1f2fe5d","access_level":"open_access","relation":"main_file"}],"oa":1,"oa_version":"Published Version","OA_type":"gold","has_accepted_license":"1","external_id":{"arxiv":["2407.18755"]},"year":"2025","citation":{"ama":"Montagna F, Faller P, Blöbaum P, Kirschbaum E, Locatello F. Score matching through the roof: Linear, nonlinear, and latent variables causal discovery. In: <i>Proceedings of the Fourth Conference on Causal Learning and Reasoning</i>. Vol 275. ML Research Press; 2025:552-605.","apa":"Montagna, F., Faller, P., Blöbaum, P., Kirschbaum, E., &#38; Locatello, F. (2025). Score matching through the roof: Linear, nonlinear, and latent variables causal discovery. In <i>Proceedings of the Fourth Conference on Causal Learning and Reasoning</i> (Vol. 275, pp. 552–605). Lausanne, Switzerland: ML Research Press.","chicago":"Montagna, Francesco, Philipp Faller, Patrik Blöbaum, Elke Kirschbaum, and Francesco Locatello. “Score Matching through the Roof: Linear, Nonlinear, and Latent Variables Causal Discovery.” In <i>Proceedings of the Fourth Conference on Causal Learning and Reasoning</i>, 275:552–605. ML Research Press, 2025.","short":"F. Montagna, P. Faller, P. Blöbaum, E. Kirschbaum, F. Locatello, in:, Proceedings of the Fourth Conference on Causal Learning and Reasoning, ML Research Press, 2025, pp. 552–605.","mla":"Montagna, Francesco, et al. “Score Matching through the Roof: Linear, Nonlinear, and Latent Variables Causal Discovery.” <i>Proceedings of the Fourth Conference on Causal Learning and Reasoning</i>, vol. 275, ML Research Press, 2025, pp. 552–605.","ista":"Montagna F, Faller P, Blöbaum P, Kirschbaum E, Locatello F. 2025. Score matching through the roof: Linear, nonlinear, and latent variables causal discovery. Proceedings of the Fourth Conference on Causal Learning and Reasoning. CLeaR: Conference on Causal Learning and Reasoning, PMLR, vol. 275, 552–605.","ieee":"F. Montagna, P. Faller, P. Blöbaum, E. Kirschbaum, and F. Locatello, “Score matching through the roof: Linear, nonlinear, and latent variables causal discovery,” in <i>Proceedings of the Fourth Conference on Causal Learning and Reasoning</i>, Lausanne, Switzerland, 2025, vol. 275, pp. 552–605."},"month":"05","publication":"Proceedings of the Fourth Conference on Causal Learning and Reasoning","title":"Score matching through the roof: Linear, nonlinear, and latent variables causal discovery","main_file_link":[{"url":"https://proceedings.mlr.press/v275/montagna25a.html","open_access":"1"}],"arxiv":1,"publication_identifier":{"eissn":["2640-3498"]},"corr_author":"1","type":"conference","abstract":[{"text":"Causal discovery from observational data holds great promise, but existing methods rely on strong assumptions about the underlying causal structure, often requiring full observability of all relevant variables. We tackle these challenges by leveraging the score function ∇logp(X)\r\n of observed variables for causal discovery and propose the following contributions. First, we generalize the existing results of identifiability with the score to additive noise models with minimal requirements on the causal mechanisms. Second, we establish conditions for inferring causal relations from the score even in the presence of hidden variables; this result is two-faced: we demonstrate the score’s potential as an alternative to conditional independence tests to infer the equivalence class of causal graphs with hidden variables, and we provide the necessary conditions for identifying direct causes in latent variable models. Building on these insights, we propose a flexible algorithm for causal discovery across linear, nonlinear, and latent variable models, which we empirically validate.","lang":"eng"}],"quality_controlled":"1","department":[{"_id":"FrLo"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"author":[{"full_name":"Montagna, Francesco","first_name":"Francesco","last_name":"Montagna"},{"first_name":"Philipp","last_name":"Faller","full_name":"Faller, Philipp"},{"full_name":"Blöbaum, Patrik","first_name":"Patrik","last_name":"Blöbaum"},{"full_name":"Kirschbaum, Elke","first_name":"Elke","last_name":"Kirschbaum"},{"full_name":"Locatello, Francesco","last_name":"Locatello","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco"}],"conference":{"start_date":"2025-05-07","location":"Lausanne, Switzerland","name":"CLeaR: Conference on Causal Learning and Reasoning","end_date":"2025-05-09"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publisher":"ML Research Press","date_published":"2025-05-01T00:00:00Z","page":"552-605","_id":"21066","alternative_title":["PMLR"],"intvolume":"       275","date_created":"2026-01-29T14:19:09Z","OA_place":"publisher","acknowledgement":"Philipp M. Faller was supported by a doctoral scholarship of the Studienstiftung des deutschen\r\nVolkes (German Academic Scholarship Foundation). This work has been supported by AFOSR,\r\ngrant n. FA8655-20-1-7035. FM is supported by Programma Operativo Nazionale ricerca e innovazione 2014-2020. We thank Atalanti A. Mastakouri, Kun Zhang and Haoyue Dai for the insightful discussions.","language":[{"iso":"eng"}]},{"OA_type":"green","oa_version":"Preprint","oa":1,"volume":38,"day":"15","article_processing_charge":"No","date_updated":"2026-02-10T12:08:52Z","status":"public","publication_status":"epub_ahead","ddc":["000"],"month":"12","publication":"39th Annual Conference on Neural Information Processing Systems","citation":{"ama":"Yao D, Huang S, Cadei R, Zhang K, Locatello F. The third pillar of causal analysis? A measurement perspective on causal representations. In: <i>39th Annual Conference on Neural Information Processing Systems</i>. Vol 38. Neural Information Processing Systems Foundation; 2025.","apa":"Yao, D., Huang, S., Cadei, R., Zhang, K., &#38; Locatello, F. (2025). The third pillar of causal analysis? A measurement perspective on causal representations. In <i>39th Annual Conference on Neural Information Processing Systems</i> (Vol. 38). San Diego, CA, United States: Neural Information Processing Systems Foundation.","chicago":"Yao, Dingling, Shimeng Huang, Riccardo Cadei, Kun Zhang, and Francesco Locatello. “The Third Pillar of Causal Analysis? A Measurement Perspective on Causal Representations.” In <i>39th Annual Conference on Neural Information Processing Systems</i>, Vol. 38. Neural Information Processing Systems Foundation, 2025.","mla":"Yao, Dingling, et al. “The Third Pillar of Causal Analysis? A Measurement Perspective on Causal Representations.” <i>39th Annual Conference on Neural Information Processing Systems</i>, vol. 38, Neural Information Processing Systems Foundation, 2025.","ista":"Yao D, Huang S, Cadei R, Zhang K, Locatello F. 2025. The third pillar of causal analysis? A measurement perspective on causal representations. 39th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 38.","short":"D. Yao, S. Huang, R. Cadei, K. Zhang, F. Locatello, in:, 39th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2025.","ieee":"D. Yao, S. Huang, R. Cadei, K. Zhang, and F. Locatello, “The third pillar of causal analysis? A measurement perspective on causal representations,” in <i>39th Annual Conference on Neural Information Processing Systems</i>, San Diego, CA, United States, 2025, vol. 38."},"year":"2025","external_id":{"arxiv":["2505.17708"]},"has_accepted_license":"1","author":[{"full_name":"Yao, Dingling","last_name":"Yao","id":"d3e02e50-48a8-11ee-8f62-c108061797fa","first_name":"Dingling"},{"full_name":"Huang, Shimeng","id":"989c2a06-fb4e-11ef-a992-ab766442255b","orcid":"0000-0001-6919-821X","first_name":"Shimeng","last_name":"Huang"},{"last_name":"Cadei","id":"0fa8b76f-72f0-11ef-b75a-a5da96e5ad6b","first_name":"Riccardo","full_name":"Cadei, Riccardo"},{"first_name":"Kun","last_name":"Zhang","full_name":"Zhang, Kun"},{"full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","last_name":"Locatello"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"department":[{"_id":"FrLo"}],"abstract":[{"text":"Causal reasoning and discovery, two fundamental tasks of causal analysis,\r\noften face challenges in applications due to the complexity, noisiness, and highdimensionality of real-world data. Despite recent progress in identifying latent\r\ncausal structures using causal representation learning (CRL), what makes learned\r\nrepresentations useful for causal downstream tasks and how to evaluate them are\r\nstill not well understood. In this paper, we reinterpret CRL using a measurement\r\nmodel framework, where the learned representations are viewed as proxy measurements of the latent causal variables. Our approach clarifies the conditions under\r\nwhich learned representations support downstream causal reasoning and provides\r\na principled basis for quantitatively assessing the quality of representations using\r\na new Test-based Measurement EXclusivity (T-MEX) score. We validate T-MEX\r\nacross diverse causal inference scenarios, including numerical simulations and\r\nreal-world ecological video analysis, demonstrating that the proposed framework\r\nand corresponding score effectively assess the identification of learned representations and their usefulness for causal downstream tasks. Reproducible code can\r\nbe found at https://github.com/shimenghuang/a-measurement-perspective-of-crl.","lang":"eng"}],"quality_controlled":"1","corr_author":"1","type":"conference","publication_identifier":{"issn":["1049-5258"]},"arxiv":1,"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2505.17708"}],"title":"The third pillar of causal analysis? A measurement perspective on causal representations","related_material":{"link":[{"relation":"software","url":"https://github.com/shimenghuang/a-measurement-perspective-of-crl"}]},"language":[{"iso":"eng"}],"acknowledgement":"This research was funded in whole or in part by the Austrian Science Fund (FWF) 10.55776/COE12. For open access purposes, the author has applied a CC BY public copyright license to any accepted manuscript version arising from this submission.\r\n","date_created":"2026-01-29T14:24:56Z","OA_place":"repository","alternative_title":["Advances in Neural Information Processing Systems"],"_id":"21068","intvolume":"        38","date_published":"2025-12-15T00:00:00Z","publisher":"Neural Information Processing Systems Foundation","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","conference":{"name":"NeurIPS: Neural Information Processing Systems","location":"San Diego, CA, United States","start_date":"2025-12-02","end_date":"2025-12-07"}},{"OA_type":"green","oa_version":"Preprint","oa":1,"volume":38,"day":"01","article_processing_charge":"No","date_updated":"2026-02-16T11:38:25Z","status":"public","publication_status":"epub_ahead","ddc":["000"],"month":"12","publication":"39th Annual Conference on Neural Information Processing Systems","year":"2025","citation":{"ieee":"B. Demirel, M. Fumero, and F. Locatello, “Out-of-Distribution detection with relative angles,” in <i>39th Annual Conference on Neural Information Processing Systems</i>, San Diego, CA, United States, 2025, vol. 38.","apa":"Demirel, B., Fumero, M., &#38; Locatello, F. (2025). Out-of-Distribution detection with relative angles. In <i>39th Annual Conference on Neural Information Processing Systems</i> (Vol. 38). San Diego, CA, United States: Neural Information Processing Systems Foundation.","ama":"Demirel B, Fumero M, Locatello F. Out-of-Distribution detection with relative angles. In: <i>39th Annual Conference on Neural Information Processing Systems</i>. Vol 38. Neural Information Processing Systems Foundation; 2025.","short":"B. Demirel, M. Fumero, F. Locatello, in:, 39th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2025.","ista":"Demirel B, Fumero M, Locatello F. 2025. Out-of-Distribution detection with relative angles. 39th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 38.","mla":"Demirel, Berker, et al. “Out-of-Distribution Detection with Relative Angles.” <i>39th Annual Conference on Neural Information Processing Systems</i>, vol. 38, Neural Information Processing Systems Foundation, 2025.","chicago":"Demirel, Berker, Marco  Fumero, and Francesco Locatello. “Out-of-Distribution Detection with Relative Angles.” In <i>39th Annual Conference on Neural Information Processing Systems</i>, Vol. 38. Neural Information Processing Systems Foundation, 2025."},"external_id":{"arxiv":["2410.04525"]},"has_accepted_license":"1","author":[{"first_name":"Berker","id":"8b4bc47f-3200-11ee-973b-8f0e7be21a9f","last_name":"Demirel","full_name":"Demirel, Berker"},{"last_name":"Fumero","first_name":"Marco ","full_name":"Fumero, Marco "},{"full_name":"Locatello, Francesco","last_name":"Locatello","orcid":"0000-0002-4850-0683","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"department":[{"_id":"FrLo"}],"abstract":[{"lang":"eng","text":"Deep learning systems deployed in real-world applications often encounter data that is different from their in-distribution (ID). A reliable model should ideally abstain from making decisions in this out-of-distribution (OOD) setting. Existing state-of-the-art methods primarily focus on feature distances, such as k-th nearest neighbors and distances to decision boundaries, either overlooking or ineffectively using in-distribution statistics. In this work, we propose a novel angle-based metric for OOD detection that is computed relative to the in-distribution structure. We demonstrate that the angles between feature representations and decision boundaries, viewed from the mean of in-distribution features, serve as an effective discriminative factor between ID and OOD data. We evaluate our method on nine ImageNet-pretrained models. Our approach achieves the lowest FPR in 5 out of 9 ImageNet models, obtains the best average FPR overall, and consistently ranking among the top 3 across all evaluated models. Furthermore, we highlight the benefits of contrastive representations by showing strong performance with ResNet SCL and CLIP architectures. Finally, we demonstrate that the scale-invariant nature of our score enables an ensemble strategy via simple score summation. "}],"quality_controlled":"1","corr_author":"1","type":"conference","arxiv":1,"publication_identifier":{"issn":["1049-5258"]},"main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2410.04525","open_access":"1"}],"title":"Out-of-Distribution detection with relative angles","related_material":{"link":[{"url":"https://github.com/berkerdemirel/ORA-OOD-Detection-with-Relative-Angles","relation":"software"}]},"language":[{"iso":"eng"}],"acknowledgement":"This research was funded in whole or in part by the Austrian Science Fund (FWF) 10.55776/COE12. For open access purposes, the author has applied a CC BY public copyright license to any accepted manuscript version arising from this submission.\r\n","OA_place":"repository","date_created":"2026-01-29T14:26:47Z","_id":"21070","intvolume":"        38","alternative_title":["Advances in Neural Information Processing Systems"],"date_published":"2025-12-01T00:00:00Z","publisher":"Neural Information Processing Systems Foundation","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","conference":{"name":"NeurIPS: Neural Information Processing Systems","location":"San Diego, CA, United States","start_date":"2025-12-02","end_date":"2025-12-07"}},{"has_accepted_license":"1","external_id":{"arxiv":["2510.21518"]},"citation":{"ieee":"L. Basile, V. Maiorca, D. Doimo, F. Locatello, and A. Cazzaniga, “Head pursuit: Probing attention specialization in multimodal transformers,” in <i>39th Annual Conference on Neural Information Processing Systems</i>, San Diego, CA, United States, 2025, vol. 38.","ama":"Basile L, Maiorca V, Doimo D, Locatello F, Cazzaniga A. Head pursuit: Probing attention specialization in multimodal transformers. In: <i>39th Annual Conference on Neural Information Processing Systems</i>. Vol 38. Neural Information Processing Systems Foundation; 2025.","apa":"Basile, L., Maiorca, V., Doimo, D., Locatello, F., &#38; Cazzaniga, A. (2025). Head pursuit: Probing attention specialization in multimodal transformers. In <i>39th Annual Conference on Neural Information Processing Systems</i> (Vol. 38). San Diego, CA, United States: Neural Information Processing Systems Foundation.","mla":"Basile, Lorenzo, et al. “Head Pursuit: Probing Attention Specialization in Multimodal Transformers.” <i>39th Annual Conference on Neural Information Processing Systems</i>, vol. 38, Neural Information Processing Systems Foundation, 2025.","short":"L. Basile, V. Maiorca, D. Doimo, F. Locatello, A. Cazzaniga, in:, 39th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2025.","ista":"Basile L, Maiorca V, Doimo D, Locatello F, Cazzaniga A. 2025. Head pursuit: Probing attention specialization in multimodal transformers. 39th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 38.","chicago":"Basile, Lorenzo, Valentino Maiorca, Diego Doimo, Francesco Locatello, and Alberto Cazzaniga. “Head Pursuit: Probing Attention Specialization in Multimodal Transformers.” In <i>39th Annual Conference on Neural Information Processing Systems</i>, Vol. 38. Neural Information Processing Systems Foundation, 2025."},"year":"2025","publication":"39th Annual Conference on Neural Information Processing Systems","month":"12","status":"public","publication_status":"epub_ahead","ddc":["000"],"date_updated":"2026-02-11T08:55:36Z","file_date_updated":"2026-01-29T14:29:14Z","file":[{"date_updated":"2026-01-29T14:29:14Z","file_size":4271547,"creator":"flocatel","content_type":"application/pdf","success":1,"file_id":"21073","file_name":"2510.21518v2.pdf","date_created":"2026-01-29T14:29:14Z","relation":"main_file","access_level":"open_access","checksum":"85be3f98663e2595cf37001852b477cb"}],"article_processing_charge":"No","day":"15","volume":38,"oa_version":"Preprint","OA_type":"gold","oa":1,"conference":{"end_date":"2025-12-07","location":"San Diego, CA, United States","name":"NeurIPS: Neural Information Processing Systems","start_date":"2025-12-02"},"publisher":"Neural Information Processing Systems Foundation","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","intvolume":"        38","_id":"21072","date_published":"2025-12-15T00:00:00Z","acknowledgement":"The authors acknowledge the Area Science Park supercomputing platform ORFEO made available for conducting the research reported in this paper, and the technical support of the Laboratory of Data Engineering staff. LB, DD and AC were supported by the project “Supporto alla diagnosi di malattie rare tramite l’intelligenza artificiale\" CUP: F53C22001770002 and “Valutazione automatica delle immagini diagnostiche tramite l’intelligenza artificiale\", CUP: F53C22001780002. LB was supported by the European Union – NextGenerationEU within the project PNRR “Finanziamento di progetti presentati da giovani ricercatori\" - Mission 4 Component 2 Investment 1.2, CUP: J93C25000440001. AC was supported by the European Union – NextGenerationEU within the project PNRR “PRP@CERIC\" IR0000028 - Mission 4 Component 2 Investment 3.1 Action 3.1.1. ","language":[{"iso":"eng"}],"date_created":"2026-01-29T14:29:23Z","OA_place":"publisher","title":"Head pursuit: Probing attention specialization in multimodal transformers","arxiv":1,"publication_identifier":{"issn":["1049-5258"]},"type":"conference","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2510.21518"}],"abstract":[{"text":"Language and vision-language models have shown impressive performance across a wide range of tasks, but their internal mechanisms remain only partly understood. In this work, we study how individual attention heads in text-generative models specialize in specific semantic or visual attributes. Building on an established interpretability method, we reinterpret the practice of probing intermediate activations with the final decoding layer through the lens of signal processing. This lets us analyze multiple samples in a principled way and rank attention heads based on their relevance to target concepts. Our results show consistent patterns of specialization at the head level across both unimodal and multimodal transformers. Remarkably, we find that editing as few as 1% of the heads, selected using our method, can reliably suppress or enhance targeted concepts in the model output. We validate our approach on language tasks such as question answering and toxicity mitigation, as well as vision-language tasks including image classification and captioning. Our findings highlight an interpretable and controllable structure within attention layers, offering simple tools for understanding and editing large-scale generative models.","lang":"eng"}],"quality_controlled":"1","author":[{"full_name":"Basile, Lorenzo","first_name":"Lorenzo","last_name":"Basile"},{"last_name":"Maiorca","first_name":"Valentino","full_name":"Maiorca, Valentino"},{"full_name":"Doimo, Diego","last_name":"Doimo","first_name":"Diego"},{"full_name":"Locatello, Francesco","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","last_name":"Locatello"},{"full_name":"Cazzaniga, Alberto","first_name":"Alberto","last_name":"Cazzaniga"}],"department":[{"_id":"FrLo"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"}},{"title":"Connecting neural models latent geometries with relative geodesic representations","corr_author":"1","type":"conference","arxiv":1,"publication_identifier":{"issn":["1049-5258"]},"abstract":[{"lang":"eng","text":"Neural models learn representations of high-dimensional data on low-dimensional manifolds. Multiple factors, including stochasticities in the training process, model architectures, and additional inductive biases, may induce different representations, even when learning the same task on the same data. However, it has recently been shown that when a latent structure is shared between distinct latent spaces, relative distances between representations can be preserved, up to distortions. Building on this idea, we demonstrate that exploiting the differential-geometric structure of latent spaces of neural models, it is possible to capture precisely the transformations between representational spaces trained on similar data distributions. Specifically, we assume that distinct neural models parametrize approximately the same underlying manifold, and introduce a representation based on the pullback metric that captures the intrinsic structure of the latent space, while scaling efficiently to large models. We validate experimentally our method on model stitching and retrieval tasks, covering autoencoders and vision foundation discriminative models, across diverse architectures, datasets, pretraining schemes and modalities. Code is available at the following link."}],"quality_controlled":"1","tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"department":[{"_id":"FrLo"}],"author":[{"last_name":"Yu","first_name":"Hanlin","full_name":"Yu, Hanlin"},{"last_name":"Inal","first_name":"Befrin","full_name":"Inal, Befrin"},{"full_name":"Arvanitidis, Georgios","first_name":"Georgios","last_name":"Arvanitidis"},{"full_name":"Hauberg, Soren","first_name":"Soren","last_name":"Hauberg"},{"full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","last_name":"Locatello"},{"full_name":"Fumero, Marco","id":"1c1593eb-393f-11ef-bb8e-ab4f1e979650","first_name":"Marco","last_name":"Fumero"}],"conference":{"start_date":"2025-12-02","name":"NeurIPS: Neural Information Processing Systems","location":"San Diego, CA, United States","end_date":"2025-12-07"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publisher":"Neural Information Processing Systems Foundation","date_published":"2025-12-15T00:00:00Z","intvolume":"        38","_id":"21074","alternative_title":["Advances in Neural Information Processing Systems"],"OA_place":"publisher","date_created":"2026-01-29T14:31:52Z","language":[{"iso":"eng"}],"acknowledgement":"We thank Gregor Krzmanc, German Magai, Vital Fernandez for insightful discussions in the early stages of the project. HY was supported by the Research Council of Finland Flagship programme: Finnish Center for Artificial Intelligence FCAI. HY wishes to acknowledge CSC - IT Center for Science, Finland, for computational resources. GA was supported by the DFF Sapere Aude Starting Grant “GADL”. SH was supported by a research grant (42062) from VILLUM FONDEN and partly funded by the Novo Nordisk Foundation through the Center for Basic Research in Life Science (NNF20OC0062606). SH received funding from the European Research Council (ERC) under the European Union’s Horizon Programme (grant agreement 101125003). MF is supported by the MSCA IST-Bridge fellowship which has received funding from the European Union’s Horizon 2020 research and innovation program under the Marie Skłodowska-Curie grant agreement No 101034413.","ddc":["000"],"publication_status":"epub_ahead","status":"public","date_updated":"2026-02-11T09:03:37Z","volume":38,"article_processing_charge":"No","day":"15","file":[{"file_id":"21075","file_name":"2506.01599v2.pdf","success":1,"date_created":"2026-01-29T14:31:42Z","relation":"main_file","access_level":"open_access","checksum":"b1a645418025f46394764cd16d0cb089","file_size":7749349,"date_updated":"2026-01-29T14:31:42Z","creator":"flocatel","content_type":"application/pdf"}],"project":[{"name":"IST-BRIDGE: International postdoctoral program","call_identifier":"H2020","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c","grant_number":"101034413"}],"file_date_updated":"2026-01-29T14:31:42Z","oa":1,"OA_type":"gold","oa_version":"Published Version","has_accepted_license":"1","year":"2025","citation":{"ieee":"H. Yu, B. Inal, G. Arvanitidis, S. Hauberg, F. Locatello, and M. Fumero, “Connecting neural models latent geometries with relative geodesic representations,” in <i>39th Annual Conference on Neural Information Processing Systems</i>, San Diego, CA, United States, 2025, vol. 38.","apa":"Yu, H., Inal, B., Arvanitidis, G., Hauberg, S., Locatello, F., &#38; Fumero, M. (2025). Connecting neural models latent geometries with relative geodesic representations. In <i>39th Annual Conference on Neural Information Processing Systems</i> (Vol. 38). San Diego, CA, United States: Neural Information Processing Systems Foundation.","ama":"Yu H, Inal B, Arvanitidis G, Hauberg S, Locatello F, Fumero M. Connecting neural models latent geometries with relative geodesic representations. In: <i>39th Annual Conference on Neural Information Processing Systems</i>. Vol 38. Neural Information Processing Systems Foundation; 2025.","ista":"Yu H, Inal B, Arvanitidis G, Hauberg S, Locatello F, Fumero M. 2025. Connecting neural models latent geometries with relative geodesic representations. 39th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 38.","mla":"Yu, Hanlin, et al. “Connecting Neural Models Latent Geometries with Relative Geodesic Representations.” <i>39th Annual Conference on Neural Information Processing Systems</i>, vol. 38, Neural Information Processing Systems Foundation, 2025.","short":"H. Yu, B. Inal, G. Arvanitidis, S. Hauberg, F. Locatello, M. Fumero, in:, 39th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2025.","chicago":"Yu, Hanlin, Befrin Inal, Georgios Arvanitidis, Soren Hauberg, Francesco Locatello, and Marco Fumero. “Connecting Neural Models Latent Geometries with Relative Geodesic Representations.” In <i>39th Annual Conference on Neural Information Processing Systems</i>, Vol. 38. Neural Information Processing Systems Foundation, 2025."},"external_id":{"arxiv":["2506.01599"]},"ec_funded":1,"month":"12","publication":"39th Annual Conference on Neural Information Processing Systems"},{"ddc":["000"],"status":"public","publication_status":"epub_ahead","date_updated":"2026-02-16T11:39:33Z","day":"15","article_processing_charge":"No","volume":38,"file":[{"content_type":"application/pdf","creator":"flocatel","date_updated":"2026-01-29T14:35:02Z","file_size":8489023,"access_level":"open_access","checksum":"92467fa566cd36671a6a3b9e71ae0f71","relation":"main_file","date_created":"2026-01-29T14:35:02Z","success":1,"file_id":"21077","file_name":"17546_Prediction_Powered_Causa.pdf"}],"file_date_updated":"2026-01-29T14:35:02Z","oa":1,"oa_version":"Published Version","OA_type":"gold","has_accepted_license":"1","citation":{"chicago":"Cadei, Riccardo, Ilker Demirel, Piersilvio De Bartolomeis, Lukas Lindorfer, Sylvia Cremer, Cordelia Schmid, and Francesco Locatello. “Prediction-Powered Causal Inferences.” In <i>39th Annual Conference on Neural Information Processing Systems</i>, Vol. 38. Neural Information Processing Systems Foundation, 2025.","mla":"Cadei, Riccardo, et al. “Prediction-Powered Causal Inferences.” <i>39th Annual Conference on Neural Information Processing Systems</i>, vol. 38, Neural Information Processing Systems Foundation, 2025.","ista":"Cadei R, Demirel I, De Bartolomeis P, Lindorfer L, Cremer S, Schmid C, Locatello F. 2025. Prediction-powered causal inferences. 39th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 38.","short":"R. Cadei, I. Demirel, P. De Bartolomeis, L. Lindorfer, S. Cremer, C. Schmid, F. Locatello, in:, 39th Annual Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2025.","ama":"Cadei R, Demirel I, De Bartolomeis P, et al. Prediction-powered causal inferences. In: <i>39th Annual Conference on Neural Information Processing Systems</i>. Vol 38. Neural Information Processing Systems Foundation; 2025.","apa":"Cadei, R., Demirel, I., De Bartolomeis, P., Lindorfer, L., Cremer, S., Schmid, C., &#38; Locatello, F. (2025). Prediction-powered causal inferences. In <i>39th Annual Conference on Neural Information Processing Systems</i> (Vol. 38). San Diego, CA, United States: Neural Information Processing Systems Foundation.","ieee":"R. Cadei <i>et al.</i>, “Prediction-powered causal inferences,” in <i>39th Annual Conference on Neural Information Processing Systems</i>, San Diego, CA, United States, 2025, vol. 38."},"year":"2025","month":"12","publication":"39th Annual Conference on Neural Information Processing Systems","title":"Prediction-powered causal inferences","publication_identifier":{"issn":["1049-5258"]},"type":"conference","quality_controlled":"1","abstract":[{"text":"In many scientific experiments, the data annotating cost constraints the pace for testing novel hypotheses. Yet, modern machine learning pipelines offer a promising solution—provided their predictions yield correct conclusions. We focus on Prediction-Powered Causal Inferences (PPCI), i.e., estimating the treatment effect in an unlabeled target experiment, relying on training data with the same outcome annotated but potentially different treatment or effect modifiers. We first show that conditional calibration guarantees valid PPCI at population level. Then, we introduce a sufficient representation constraint transferring validity across experiments, which we propose to enforce in practice in Deconfounded Empirical Risk Minimization, our new model-agnostic training objective. We validate our method on synthetic and real-world scientific data, solving impossible problem instances for Empirical Risk Minimization even with standard invariance constraints. In particular, for the first time, we achieve valid causal inference on a scientific experiment with complex recording and no human annotations, fine-tuning a foundational model on our similar annotated experiment.","lang":"eng"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"department":[{"_id":"FrLo"},{"_id":"SyCr"}],"author":[{"first_name":"Riccardo","id":"0fa8b76f-72f0-11ef-b75a-a5da96e5ad6b","last_name":"Cadei","full_name":"Cadei, Riccardo"},{"first_name":"Ilker","last_name":"Demirel","full_name":"Demirel, Ilker"},{"full_name":"De Bartolomeis, Piersilvio","last_name":"De Bartolomeis","first_name":"Piersilvio"},{"first_name":"Lukas","id":"85f0e6d3-06b3-11ec-8982-8c5049fa4455","last_name":"Lindorfer","full_name":"Lindorfer, Lukas"},{"full_name":"Cremer, Sylvia","id":"2F64EC8C-F248-11E8-B48F-1D18A9856A87","first_name":"Sylvia","orcid":"0000-0002-2193-3868","last_name":"Cremer"},{"last_name":"Schmid","first_name":"Cordelia","full_name":"Schmid, Cordelia"},{"full_name":"Locatello, Francesco","last_name":"Locatello","orcid":"0000-0002-4850-0683","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4"}],"conference":{"end_date":"2025-12-07","name":"NeurIPS: Neural Information Processing Systems","location":"San Diego, CA, United States","start_date":"2025-12-02"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publisher":"Neural Information Processing Systems Foundation","date_published":"2025-12-15T00:00:00Z","alternative_title":["Advances in Neural Information Processing Systems"],"_id":"21076","intvolume":"        38","date_created":"2026-01-29T14:35:11Z","OA_place":"publisher","acknowledgement":"We thank the Causal Learning and Artificial Intelligence group at ISTA for the continuous feedback on the project and valuable discussions. We thank the Social Immunity group at ISTA, particularly Jinook Oh, for the annotation program and Michaela Hoenigsberger for supporting our ecological experiment. Riccardo Cadei is supported by a Google Research Scholar Award and a Google Initiated Gift to Francesco Locatello. This research was funded in part by the Austrian Science Fund (FWF) 10.55776/COE12). It was further partially supported by the ISTA Interdisciplinary Project Committee for the collaborative project “ALED” between Francesco Locatello and Sylvia Cremer. For open access purposes, the author has applied a CC BY public copyright license to any author accepted manuscript version arising from this submission.","language":[{"iso":"eng"}]},{"conference":{"end_date":"2025-04-28","start_date":"2025-04-24","name":"ICLR: International Conference on Learning Representations","location":"Singapore"},"publisher":"ICLR","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"19010","date_published":"2025-01-22T00:00:00Z","language":[{"iso":"eng"}],"acknowledgement":"We thank Jiaqi Zhang, Francesco Montagna, David Lopez-Paz, Kartik Ahuja, Thomas Kipf, Sara\r\nMagliacane, Julius von Kügelgen, Kun Zhang, and Bernhard Schölkopf for extremely helpful discussion. Riccardo Cadei was supported by a Google Research Scholar Award to Francesco Locatello. We acknowledge the Third Bellairs Workshop on Causal Representation Learning held at the Bellairs Research Institute, February 9/16, 2024, and a debate on the difference between interventions and counterfactuals in disentanglement and CRL that took place during Dhanya Sridhar’s lecture, which motivated us to significantly broaden the scope of the paper. We thank Dhanya and all participants of the workshop.","date_created":"2025-02-05T09:23:25Z","OA_place":"publisher","scopus_import":"1","title":"Unifying causal representation learning with the invariance principle","corr_author":"1","type":"conference","arxiv":1,"abstract":[{"text":"Causal representation learning aims at recovering latent causal variables from high-dimensional observations to solve causal downstream tasks, such as predicting the effect of new interventions or more robust classification. A plethora of methods have been developed, each tackling carefully crafted problem settings that lead to different types of identifiability. The folklore is that these different settings are important, as they are often linked to different rungs of Pearl's causal hierarchy, although not all neatly fit. Our main contribution is to show that many existing causal representation learning approaches methodologically align the representation to known data symmetries. Identification of the variables is guided by equivalence classes across different \"data pockets\" that are not necessarily causal. This result suggests important implications, allowing us to unify many existing approaches in a single method that can mix and match different assumptions, including non-causal ones, based on the invariances relevant to our application. It also significantly benefits applicability, which we demonstrate by improving treatment effect estimation on real-world high-dimensional ecological data. Overall, this paper clarifies the role of causality assumptions in the discovery of causal variables and shifts the focus to preserving data symmetries.","lang":"eng"}],"quality_controlled":"1","author":[{"full_name":"Yao, Dingling","last_name":"Yao","first_name":"Dingling","id":"d3e02e50-48a8-11ee-8f62-c108061797fa"},{"full_name":"Rancati, Dario","last_name":"Rancati","id":"feb58f2e-72ef-11ef-b75a-8f0894539cd0","first_name":"Dario"},{"last_name":"Cadei","first_name":"Riccardo","id":"0fa8b76f-72f0-11ef-b75a-a5da96e5ad6b","full_name":"Cadei, Riccardo"},{"last_name":"Fumero","first_name":"Marco","id":"1c1593eb-393f-11ef-bb8e-ab4f1e979650","full_name":"Fumero, Marco"},{"last_name":"Locatello","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","first_name":"Francesco","full_name":"Locatello, Francesco"}],"department":[{"_id":"FrLo"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"has_accepted_license":"1","citation":{"ieee":"D. Yao, D. Rancati, R. Cadei, M. Fumero, and F. Locatello, “Unifying causal representation learning with the invariance principle,” in <i>13th International Conference on Learning Representations</i>, Singapore, 2025.","apa":"Yao, D., Rancati, D., Cadei, R., Fumero, M., &#38; Locatello, F. (2025). Unifying causal representation learning with the invariance principle. In <i>13th International Conference on Learning Representations</i>. Singapore: ICLR.","ama":"Yao D, Rancati D, Cadei R, Fumero M, Locatello F. Unifying causal representation learning with the invariance principle. In: <i>13th International Conference on Learning Representations</i>. ICLR; 2025.","chicago":"Yao, Dingling, Dario Rancati, Riccardo Cadei, Marco Fumero, and Francesco Locatello. “Unifying Causal Representation Learning with the Invariance Principle.” In <i>13th International Conference on Learning Representations</i>. ICLR, 2025.","mla":"Yao, Dingling, et al. “Unifying Causal Representation Learning with the Invariance Principle.” <i>13th International Conference on Learning Representations</i>, ICLR, 2025.","short":"D. Yao, D. Rancati, R. Cadei, M. Fumero, F. Locatello, in:, 13th International Conference on Learning Representations, ICLR, 2025.","ista":"Yao D, Rancati D, Cadei R, Fumero M, Locatello F. 2025. Unifying causal representation learning with the invariance principle. 13th International Conference on Learning Representations. ICLR: International Conference on Learning Representations."},"year":"2025","external_id":{"arxiv":["2409.02772"]},"publication":"13th International Conference on Learning Representations","month":"01","publication_status":"published","status":"public","ddc":["000"],"date_updated":"2026-02-09T05:52:14Z","file_date_updated":"2026-01-27T12:43:25Z","file":[{"date_created":"2026-01-27T12:43:25Z","file_name":"4356_Unifying_Causal_Represent (1).pdf","file_id":"21048","success":1,"checksum":"c4b5a4a644228c6d1b0283e1368bce9e","access_level":"open_access","relation":"main_file","creator":"flocatel","file_size":877014,"date_updated":"2026-01-27T12:43:25Z","content_type":"application/pdf"}],"day":"22","article_processing_charge":"No","OA_type":"gold","oa_version":"Published Version","oa":1},{"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","citation":{"ama":"Basile L, Maiorca V, Bortolussi L, Rodolà E, Locatello F. ResiDual transformer alignment with spectral decomposition. <i>arXiv</i>. doi:<a href=\"https://doi.org/10.48550/arXiv.2411.00246\">10.48550/arXiv.2411.00246</a>","apa":"Basile, L., Maiorca, V., Bortolussi, L., Rodolà, E., &#38; Locatello, F. (n.d.). ResiDual transformer alignment with spectral decomposition. <i>arXiv</i>. <a href=\"https://doi.org/10.48550/arXiv.2411.00246\">https://doi.org/10.48550/arXiv.2411.00246</a>","short":"L. Basile, V. Maiorca, L. Bortolussi, E. Rodolà, F. Locatello, ArXiv (n.d.).","mla":"Basile, Lorenzo, et al. “ResiDual Transformer Alignment with Spectral Decomposition.” <i>ArXiv</i>, 2411.00246, doi:<a href=\"https://doi.org/10.48550/arXiv.2411.00246\">10.48550/arXiv.2411.00246</a>.","ista":"Basile L, Maiorca V, Bortolussi L, Rodolà E, Locatello F. ResiDual transformer alignment with spectral decomposition. arXiv, 2411.00246.","chicago":"Basile, Lorenzo, Valentino Maiorca, Luca Bortolussi, Emanuele Rodolà, and Francesco Locatello. “ResiDual Transformer Alignment with Spectral Decomposition.” <i>ArXiv</i>, n.d. <a href=\"https://doi.org/10.48550/arXiv.2411.00246\">https://doi.org/10.48550/arXiv.2411.00246</a>.","ieee":"L. Basile, V. Maiorca, L. Bortolussi, E. Rodolà, and F. Locatello, “ResiDual transformer alignment with spectral decomposition,” <i>arXiv</i>. ."},"year":"2025","external_id":{"arxiv":["2411.00246"]},"OA_place":"repository","date_created":"2025-05-11T22:02:41Z","publication":"arXiv","language":[{"iso":"eng"}],"month":"04","acknowledgement":"The authors gratefully acknowledge Volkan Cevher for an insightful discussion about sparse recovery algorithms, Alex Smola for valuable feedback on the experiments, and Marco Baroni for an engaging conversation on the phenomenon of head specialization in NLP.\r\n","date_published":"2025-04-14T00:00:00Z","_id":"19674","article_number":"2411.00246","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2411.00246","open_access":"1"}],"date_updated":"2025-05-19T07:03:16Z","type":"preprint","arxiv":1,"status":"public","publication_status":"submitted","title":"ResiDual transformer alignment with spectral decomposition","doi":"10.48550/arXiv.2411.00246","department":[{"_id":"FrLo"}],"oa":1,"author":[{"full_name":"Basile, Lorenzo","first_name":"Lorenzo","last_name":"Basile"},{"first_name":"Valentino","last_name":"Maiorca","full_name":"Maiorca, Valentino"},{"full_name":"Bortolussi, Luca","first_name":"Luca","last_name":"Bortolussi"},{"first_name":"Emanuele","last_name":"Rodolà","full_name":"Rodolà, Emanuele"},{"full_name":"Locatello, Francesco","last_name":"Locatello","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","first_name":"Francesco"}],"OA_type":"green","oa_version":"Preprint","abstract":[{"text":"When examined through the lens of their residual streams, a puzzling property emerges in transformer networks: residual contributions (e.g., attention heads) sometimes specialize in specific tasks or input attributes. In this paper, we analyze this phenomenon in vision transformers, focusing on the spectral geometry of residuals, and explore its implications for modality alignment in vision-language models. First, we link it to the intrinsically low-dimensional structure of visual head representations, zooming into their principal components and showing that they encode specialized roles across a wide variety of input data distributions. Then, we analyze the effect of head specialization in multimodal models, focusing on how improved alignment between text and specialized heads impacts zero-shot classification performance. This specialization-performance link consistently holds across diverse pre-training data, network sizes, and objectives, demonstrating a powerful new mechanism for boosting zero-shot classification through targeted alignment. Ultimately, we translate these insights into actionable terms by introducing ResiDual, a technique for spectral alignment of the residual stream. Much like panning for gold, it lets the noise from irrelevant unit principal components (i.e., attributes) wash away to amplify task-relevant ones. Remarkably, this dual perspective on modality alignment yields fine-tuning level performance on different data distributions while modelling an extremely interpretable and parameter-efficient transformation, as we extensively show on 70 pre-trained network-dataset combinations (7 models, 10 datasets).","lang":"eng"}],"day":"14","article_processing_charge":"No"},{"month":"04","publication":"13th International Conference on Learning Representations","has_accepted_license":"1","citation":{"ieee":"J. Chen, D. Yao, A. A. Pervez, D.-A. Alistarh, and F. Locatello, “Scalable mechanistic neural networks,” in <i>13th International Conference on Learning Representations</i>, Singapore, Singapore, 2025, pp. 63716–63737.","chicago":"Chen, Jiale, Dingling Yao, Adeel A Pervez, Dan-Adrian Alistarh, and Francesco Locatello. “Scalable Mechanistic Neural Networks.” In <i>13th International Conference on Learning Representations</i>, 63716–37. ICLR, 2025.","mla":"Chen, Jiale, et al. “Scalable Mechanistic Neural Networks.” <i>13th International Conference on Learning Representations</i>, ICLR, 2025, pp. 63716–37.","ista":"Chen J, Yao D, Pervez AA, Alistarh D-A, Locatello F. 2025. Scalable mechanistic neural networks. 13th International Conference on Learning Representations. ICLR: International Conference on Learning Representations, 63716–63737.","short":"J. Chen, D. Yao, A.A. Pervez, D.-A. Alistarh, F. Locatello, in:, 13th International Conference on Learning Representations, ICLR, 2025, pp. 63716–63737.","ama":"Chen J, Yao D, Pervez AA, Alistarh D-A, Locatello F. Scalable mechanistic neural networks. In: <i>13th International Conference on Learning Representations</i>. ICLR; 2025:63716-63737.","apa":"Chen, J., Yao, D., Pervez, A. A., Alistarh, D.-A., &#38; Locatello, F. (2025). Scalable mechanistic neural networks. In <i>13th International Conference on Learning Representations</i> (pp. 63716–63737). Singapore, Singapore: ICLR."},"year":"2025","external_id":{"arxiv":["2410.06074"]},"file":[{"file_size":732745,"date_updated":"2025-07-22T07:58:22Z","creator":"dernst","content_type":"application/pdf","file_name":"2025_ICLR_Chen.pdf","file_id":"20065","success":1,"date_created":"2025-07-22T07:58:22Z","relation":"main_file","access_level":"open_access","checksum":"64cfdb12ae3e4e8ba57b1403e1066776"}],"file_date_updated":"2025-07-22T07:58:22Z","article_processing_charge":"No","day":"01","OA_type":"diamond","oa_version":"Published Version","oa":1,"publication_status":"published","status":"public","ddc":["000"],"date_updated":"2025-08-04T08:03:11Z","_id":"20032","page":"63716-63737","date_published":"2025-04-01T00:00:00Z","language":[{"iso":"eng"}],"date_created":"2025-07-20T22:02:01Z","OA_place":"publisher","scopus_import":"1","conference":{"location":"Singapore, Singapore","name":"ICLR: International Conference on Learning Representations","start_date":"2025-04-24","end_date":"2025-04-28"},"publisher":"ICLR","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","abstract":[{"lang":"eng","text":"We propose Scalable Mechanistic Neural Network (S-MNN), an enhanced neural network framework designed for scientific machine learning applications involving long temporal sequences. By reformulating the original Mechanistic Neural Network (MNN) (Pervez et al., 2024), we reduce the computational time and space complexities from cubic and quadratic with respect to the sequence length, respectively, to linear. This significant improvement enables efficient modeling of long-term dynamics without sacrificing accuracy or interpretability. Extensive experiments demonstrate that S-MNN matches the original MNN in precision while substantially reducing computational resources. Consequently, S-MNN can drop-in replace the original MNN in applications, providing a practical and efficient tool for integrating mechanistic bottlenecks into neural network models of complex dynamical systems. Source code is available at https://github.com/IST-DASLab/ScalableMNN."}],"quality_controlled":"1","author":[{"last_name":"Chen","id":"4d0a9064-1ff6-11ee-9fa6-ec046c604785","first_name":"Jiale","orcid":"0000-0001-5337-5875","full_name":"Chen, Jiale"},{"last_name":"Yao","id":"d3e02e50-48a8-11ee-8f62-c108061797fa","first_name":"Dingling","full_name":"Yao, Dingling"},{"last_name":"Pervez","first_name":"Adeel A","id":"fca6d90c-d47f-11ee-bc87-93ff51604981","full_name":"Pervez, Adeel A"},{"last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian"},{"full_name":"Locatello, Francesco","last_name":"Locatello","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","orcid":"0000-0002-4850-0683"}],"department":[{"_id":"DaAl"},{"_id":"FrLo"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"title":"Scalable mechanistic neural networks","related_material":{"link":[{"url":"https://github.com/IST-DASLab/ScalableMNN","relation":"software"}]},"type":"conference","corr_author":"1","publication_identifier":{"isbn":["9798331320850"]},"arxiv":1},{"publication":"13th International Conference on Learning Representations","month":"04","citation":{"apa":"Pariza, V., Salehi, M., Burghouts, G., Locatello, F., &#38; Asano, Y. M. (2025). Near, far: Patch-ordering enhances vision foundation models’ scene understanding. In <i>13th International Conference on Learning Representations</i> (pp. 72303–72330). Singapore, Singapore: ICLR.","ama":"Pariza V, Salehi M, Burghouts G, Locatello F, Asano YM. Near, far: Patch-ordering enhances vision foundation models’ scene understanding. In: <i>13th International Conference on Learning Representations</i>. ICLR; 2025:72303-72330.","chicago":"Pariza, Valentinos, Mohammadreza Salehi, Gertjan Burghouts, Francesco Locatello, and Yuki M. Asano. “Near, Far: Patch-Ordering Enhances Vision Foundation Models’ Scene Understanding.” In <i>13th International Conference on Learning Representations</i>, 72303–30. ICLR, 2025.","mla":"Pariza, Valentinos, et al. “Near, Far: Patch-Ordering Enhances Vision Foundation Models’ Scene Understanding.” <i>13th International Conference on Learning Representations</i>, ICLR, 2025, pp. 72303–30.","ista":"Pariza V, Salehi M, Burghouts G, Locatello F, Asano YM. 2025. Near, far: Patch-ordering enhances vision foundation models’ scene understanding. 13th International Conference on Learning Representations. ICLR: International Conference on Learning Representations, 72303–72330.","short":"V. Pariza, M. Salehi, G. Burghouts, F. Locatello, Y.M. Asano, in:, 13th International Conference on Learning Representations, ICLR, 2025, pp. 72303–72330.","ieee":"V. Pariza, M. Salehi, G. Burghouts, F. Locatello, and Y. M. Asano, “Near, far: Patch-ordering enhances vision foundation models’ scene understanding,” in <i>13th International Conference on Learning Representations</i>, Singapore, Singapore, 2025, pp. 72303–72330."},"year":"2025","external_id":{"arxiv":["2408.11054"]},"has_accepted_license":"1","oa":1,"OA_type":"diamond","oa_version":"Published Version","article_processing_charge":"No","day":"01","file":[{"checksum":"ddbe981f3ad3f6cb6daf12c954822eb8","access_level":"open_access","relation":"main_file","date_created":"2025-08-04T08:09:43Z","success":1,"file_id":"20109","file_name":"2025_ICLR_Pariza.pdf","content_type":"application/pdf","creator":"dernst","date_updated":"2025-08-04T08:09:43Z","file_size":37788223}],"file_date_updated":"2025-08-04T08:09:43Z","date_updated":"2025-08-04T08:10:55Z","ddc":["000"],"status":"public","publication_status":"published","OA_place":"publisher","date_created":"2025-07-20T22:02:03Z","scopus_import":"1","language":[{"iso":"eng"}],"date_published":"2025-04-01T00:00:00Z","_id":"20036","page":"72303-72330","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publisher":"ICLR","conference":{"end_date":"2025-04-28","location":"Singapore, Singapore","name":"ICLR: International Conference on Learning Representations","start_date":"2025-04-24"},"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"department":[{"_id":"FrLo"}],"author":[{"first_name":"Valentinos","last_name":"Pariza","full_name":"Pariza, Valentinos"},{"last_name":"Salehi","first_name":"Mohammadreza","full_name":"Salehi, Mohammadreza"},{"full_name":"Burghouts, Gertjan","last_name":"Burghouts","first_name":"Gertjan"},{"full_name":"Locatello, Francesco","last_name":"Locatello","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","orcid":"0000-0002-4850-0683"},{"first_name":"Yuki M.","last_name":"Asano","full_name":"Asano, Yuki M."}],"abstract":[{"text":"We introduce NeCo: Patch Neighbor Consistency, a novel self-supervised training loss that enforces patch-level nearest neighbor consistency across a student and teacher model. Compared to contrastive approaches that only yield binary learning signals, i.e. \"attract\" and \"repel\", this approach benefits from the more fine-grained learning signal of sorting spatially dense features relative to reference patches. Our method leverages differentiable sorting applied on top of pretrained representations, such as DINOv2-registers to bootstrap the learning signal and further improve upon them. This dense post-pretraining leads to superior performance across various models and datasets, despite requiring only 19 hours on a single GPU. This method generates high-quality dense feature encoders and establishes several new state-of-the-art results such as +2.3 % and +4.2% for non-parametric in-context semantic segmentation on ADE20k and Pascal VOC, +1.6% and +4.8% for linear segmentation evaluations on COCO-Things and -Stuff and improvements in the 3D understanding of multi-view consistency on SPair-71k, by more than 1.5%.","lang":"eng"}],"quality_controlled":"1","type":"conference","arxiv":1,"publication_identifier":{"isbn":["9798331320850"]},"title":"Near, far: Patch-ordering enhances vision foundation models' scene understanding"},{"date_created":"2025-12-14T23:02:04Z","OA_place":"publisher","scopus_import":"1","language":[{"iso":"eng"}],"acknowledgement":"AP. This project has received funding from the European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie Grant Agreement No. 101034413.\r\nFL. This research was funded in whole or in part by the Austrian Science Fund (FWF) 10.55776/COE12. For open access purposes, the author has applied a CC BY public\r\ncopyright license to any author accepted manuscript version arising from this submission.","date_published":"2025-05-01T00:00:00Z","alternative_title":["PMLR"],"_id":"20817","intvolume":"       267","page":"48962-48973","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publisher":"ML Research Press","conference":{"end_date":"2025-07-19","name":"ICML: International Conference on Machine Learning","location":"Vancouver, Canada","start_date":"2025-07-13"},"department":[{"_id":"FrLo"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"author":[{"id":"fca6d90c-d47f-11ee-bc87-93ff51604981","first_name":"Adeel A","last_name":"Pervez","full_name":"Pervez, Adeel A"},{"last_name":"Gavves","first_name":"Efstratios","full_name":"Gavves, Efstratios"},{"full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","last_name":"Locatello"}],"abstract":[{"lang":"eng","text":"We present Mechanistic PDE Networks -- a model for discovery of governing partial differential equations from data. Mechanistic PDE Networks represent spatiotemporal data as space-time dependent linear partial differential equations in neural network hidden representations. The represented PDEs are then solved and decoded for specific tasks. The learned PDE representations naturally express the spatiotemporal dynamics in data in neural network hidden space, enabling increased modeling power. Solving the PDE representations in a compute and memory-efficient way, however, is a significant challenge. We develop a native, GPU-capable, parallel, sparse and differentiable multigrid solver specialized for linear partial differential equations that acts as a module in Mechanistic PDE Networks. Leveraging the PDE solver we propose a discovery architecture that can discovers nonlinear PDEs in complex settings, while being robust to noise. We validate PDE discovery on a number of PDEs including reaction-diffusion and Navier-Stokes equations."}],"quality_controlled":"1","corr_author":"1","type":"conference","publication_identifier":{"eissn":["2640-3498"]},"arxiv":1,"related_material":{"link":[{"relation":"software","url":"https://github.com/ alpz/mech-nn-discovery-pde"}]},"title":"Mechanistic PDE networks for discovery of governing equations","month":"05","publication":"42nd International Conference on Machine Learning","ec_funded":1,"year":"2025","citation":{"apa":"Pervez, A. A., Gavves, E., &#38; Locatello, F. (2025). Mechanistic PDE networks for discovery of governing equations. In <i>42nd International Conference on Machine Learning</i> (Vol. 267, pp. 48962–48973). Vancouver, Canada: ML Research Press.","ama":"Pervez AA, Gavves E, Locatello F. Mechanistic PDE networks for discovery of governing equations. In: <i>42nd International Conference on Machine Learning</i>. Vol 267. ML Research Press; 2025:48962-48973.","chicago":"Pervez, Adeel A, Efstratios Gavves, and Francesco Locatello. “Mechanistic PDE Networks for Discovery of Governing Equations.” In <i>42nd International Conference on Machine Learning</i>, 267:48962–73. ML Research Press, 2025.","ista":"Pervez AA, Gavves E, Locatello F. 2025. Mechanistic PDE networks for discovery of governing equations. 42nd International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 267, 48962–48973.","mla":"Pervez, Adeel A., et al. “Mechanistic PDE Networks for Discovery of Governing Equations.” <i>42nd International Conference on Machine Learning</i>, vol. 267, ML Research Press, 2025, pp. 48962–73.","short":"A.A. Pervez, E. Gavves, F. Locatello, in:, 42nd International Conference on Machine Learning, ML Research Press, 2025, pp. 48962–48973.","ieee":"A. A. Pervez, E. Gavves, and F. Locatello, “Mechanistic PDE networks for discovery of governing equations,” in <i>42nd International Conference on Machine Learning</i>, Vancouver, Canada, 2025, vol. 267, pp. 48962–48973."},"external_id":{"arxiv":["2502.18377"]},"has_accepted_license":"1","oa":1,"OA_type":"gold","oa_version":"Published Version","volume":267,"day":"01","article_processing_charge":"No","file_date_updated":"2025-12-16T12:21:49Z","project":[{"name":"IST-BRIDGE: International postdoctoral program","_id":"fc2ed2f7-9c52-11eb-aca3-c01059dda49c","call_identifier":"H2020","grant_number":"101034413"}],"file":[{"relation":"main_file","checksum":"933cb673fb41416f537278fb990df6c3","access_level":"open_access","file_name":"2025_ICML_Pervez.pdf","file_id":"20827","success":1,"date_created":"2025-12-16T12:21:49Z","content_type":"application/pdf","file_size":993381,"date_updated":"2025-12-16T12:21:49Z","creator":"dernst"}],"date_updated":"2025-12-16T12:24:55Z","ddc":["000"],"publication_status":"published","status":"public"},{"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2024","citation":{"ieee":"D. Lao, Z. Hu, F. Locatello, Y. Yang, and S. Soatto, “Divided attention: Unsupervised multi-object discovery with contextually separated slots,” in <i>1st Conference on Parsimony and Learning</i>, Hong Kong, China, 2024.","ista":"Lao D, Hu Z, Locatello F, Yang Y, Soatto S. 2024. Divided attention: Unsupervised multi-object discovery with contextually separated slots. 1st Conference on Parsimony and Learning. CPAL: Conference on Parsimony and Learning.","short":"D. Lao, Z. Hu, F. Locatello, Y. Yang, S. Soatto, in:, 1st Conference on Parsimony and Learning, 2024.","mla":"Lao, Dong, et al. “Divided Attention: Unsupervised Multi-Object Discovery with Contextually Separated Slots.” <i>1st Conference on Parsimony and Learning</i>, 2024.","chicago":"Lao, Dong, Zhengyang Hu, Francesco Locatello, Yanchao Yang, and Stefano Soatto. “Divided Attention: Unsupervised Multi-Object Discovery with Contextually Separated Slots.” In <i>1st Conference on Parsimony and Learning</i>, 2024.","apa":"Lao, D., Hu, Z., Locatello, F., Yang, Y., &#38; Soatto, S. (2024). Divided attention: Unsupervised multi-object discovery with contextually separated slots. In <i>1st Conference on Parsimony and Learning</i>. Hong Kong, China.","ama":"Lao D, Hu Z, Locatello F, Yang Y, Soatto S. Divided attention: Unsupervised multi-object discovery with contextually separated slots. In: <i>1st Conference on Parsimony and Learning</i>. ; 2024."},"external_id":{"arxiv":["2304.01430"]},"conference":{"end_date":"2024-01-03","start_date":"2024-01-03","location":"Hong Kong, China","name":"CPAL: Conference on Parsimony and Learning"},"has_accepted_license":"1","date_created":"2023-08-22T14:19:59Z","publication":"1st Conference on Parsimony and Learning","language":[{"iso":"eng"}],"month":"01","date_published":"2024-01-03T00:00:00Z","_id":"14213","type":"conference","date_updated":"2024-02-12T08:56:23Z","arxiv":1,"ddc":["000"],"publication_status":"published","status":"public","title":"Divided attention: Unsupervised multi-object discovery with contextually separated slots","department":[{"_id":"FrLo"}],"oa":1,"author":[{"full_name":"Lao, Dong","first_name":"Dong","last_name":"Lao"},{"first_name":"Zhengyang","last_name":"Hu","full_name":"Hu, Zhengyang"},{"first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","last_name":"Locatello","full_name":"Locatello, Francesco"},{"full_name":"Yang, Yanchao","first_name":"Yanchao","last_name":"Yang"},{"full_name":"Soatto, Stefano","first_name":"Stefano","last_name":"Soatto"}],"oa_version":"Published Version","article_processing_charge":"No","day":"03","abstract":[{"lang":"eng","text":"We introduce a method to segment the visual field into independently moving regions, trained with no ground truth or supervision. It consists of an adversarial conditional encoder-decoder architecture based on Slot Attention, modified to use the image as context to decode optical flow without attempting to reconstruct the image itself. In the resulting multi-modal representation, one modality (flow) feeds the encoder to produce separate latent codes (slots), whereas the other modality (image) conditions the decoder to generate the first (flow) from the slots. This design frees the representation from having to encode complex nuisance variability in the image due to, for instance, illumination and reflectance properties of the scene. Since customary autoencoding based on minimizing the reconstruction error does not preclude the entire flow from being encoded into a single slot, we modify the loss to an adversarial criterion based on Contextual Information Separation. The resulting min-max optimization fosters the separation of objects and their assignment to different attention slots, leading to Divided Attention, or DivA. DivA outperforms recent unsupervised multi-object motion segmentation methods while tripling run-time speed up to 104FPS and reducing the performance gap from supervised methods to 12% or less. DivA can handle different numbers of objects and different image sizes at training and test time, is invariant to permutation of object labels, and does not require explicit regularization."}],"quality_controlled":"1","file":[{"date_updated":"2024-02-12T08:40:36Z","file_size":8038511,"creator":"dernst","content_type":"application/pdf","success":1,"file_id":"14978","file_name":"2024_CPAL_Lao.pdf","date_created":"2024-02-12T08:40:36Z","relation":"main_file","access_level":"open_access","checksum":"8fad894c34f1b3d5a14fb8ffb12f7277"}],"file_date_updated":"2024-02-12T08:40:36Z"},{"month":"11","publication":"12th International Conference on Learning Representations","has_accepted_license":"1","external_id":{"arxiv":["2311.04056"]},"citation":{"ieee":"D. Yao <i>et al.</i>, “Multi-view causal representation learning with partial observability,” in <i>12th International Conference on Learning Representations</i>, Vienna, Austria, 2024.","ama":"Yao D, Xu D, Lachapelle S, et al. Multi-view causal representation learning with partial observability. In: <i>12th International Conference on Learning Representations</i>. Curran Associates; 2024.","apa":"Yao, D., Xu, D., Lachapelle, S., Magliacane, S., Taslakian, P., Martius, G., … Locatello, F. (2024). Multi-view causal representation learning with partial observability. In <i>12th International Conference on Learning Representations</i>. Vienna, Austria: Curran Associates.","chicago":"Yao, Dingling, Danru Xu, Sébastien Lachapelle, Sara Magliacane, Perouz Taslakian, Georg Martius, Julius von Kügelgen, and Francesco Locatello. “Multi-View Causal Representation Learning with Partial Observability.” In <i>12th International Conference on Learning Representations</i>. Curran Associates, 2024.","mla":"Yao, Dingling, et al. “Multi-View Causal Representation Learning with Partial Observability.” <i>12th International Conference on Learning Representations</i>, Curran Associates, 2024.","ista":"Yao D, Xu D, Lachapelle S, Magliacane S, Taslakian P, Martius G, Kügelgen J von, Locatello F. 2024. Multi-view causal representation learning with partial observability. 12th International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","short":"D. Yao, D. Xu, S. Lachapelle, S. Magliacane, P. Taslakian, G. Martius, J. von Kügelgen, F. Locatello, in:, 12th International Conference on Learning Representations, Curran Associates, 2024."},"year":"2024","day":"07","article_processing_charge":"No","file_date_updated":"2025-02-04T12:34:23Z","file":[{"content_type":"application/pdf","file_size":1713606,"date_updated":"2025-02-04T12:34:23Z","creator":"dernst","relation":"main_file","checksum":"8ed3c34706eeec622c7e8968dc0f747a","access_level":"open_access","file_name":"2024_ICLR_Yao.pdf","file_id":"18995","success":1,"date_created":"2025-02-04T12:34:23Z"}],"oa":1,"oa_version":"Published Version","OA_type":"green","ddc":["000"],"status":"public","publication_status":"published","date_updated":"2025-02-11T10:34:32Z","date_published":"2024-11-07T00:00:00Z","_id":"14946","OA_place":"repository","date_created":"2024-02-07T14:28:34Z","acknowledgement":"This work was initiated at the Second Bellairs Workshop on Causality held at the Bellairs Research Institute, January 6–13, 2022; we thank all workshop participants for providing a stimulating research environment. Further, we thank Cian Eastwood, Luigi Gresele, Stefano Soatto, Marco Bagatella and A. René Geist for helpful discussion. GM is a member of the Machine Learning Cluster of Excellence, EXC number 2064/1 – Project number 390727645. JvK and GM acknowledge support from the German Federal Ministry of Education and Research (BMBF) through the Tübingen AI Center (FKZ: 01IS18039B). The research of DX and SM was supported by the Air Force Office of Scientific Research under award number FA8655-22-1-7155. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the United States Air Force. We also thank SURF for the support in using the Dutch National Supercomputer Snellius. SL was supported by an IVADO excellence PhD scholarship and by Samsung Electronics Co., Ldt. DY was supported by an Amazon fellowship, the International Max Planck Research School for Intelligent Systems (IMPRS-IS) and the ISTA graduate school. Work done outside of Amazon.","language":[{"iso":"eng"}],"conference":{"end_date":"2024-05-07","start_date":"2024-05-07","location":"Vienna, Austria","name":"ICLR: International Conference on Learning Representations"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publisher":"Curran Associates","abstract":[{"lang":"eng","text":"We present a unified framework for studying the identifiability of representations learned from simultaneously observed views, such as different data modalities. We allow a partially observed setting in which each view constitutes a nonlinear mixture of a subset of underlying latent variables, which can be causally related. We prove that the information shared across all subsets of any number of views can be learned up to a smooth bijection using contrastive learning and a single encoder per view. We also provide graphical criteria indicating which latent variables can be identified through a simple set of rules, which we refer to as identifiability algebra. Our general framework and theoretical results unify and extend several previous work on multi-view nonlinear ICA, disentanglement, and causal representation learning. We experimentally validate our claims on numerical, image, and multi-modal data sets. Further, we demonstrate that the performance of prior methods is recovered in different special cases of our setup. Overall, we find that access to multiple partial views offers unique opportunities for identifiable representation learning, enabling the discovery of latent structures from purely observational data."}],"quality_controlled":"1","department":[{"_id":"FrLo"}],"author":[{"first_name":"Dingling","id":"d3e02e50-48a8-11ee-8f62-c108061797fa","last_name":"Yao","full_name":"Yao, Dingling"},{"last_name":"Xu","first_name":"Danru","full_name":"Xu, Danru"},{"first_name":"Sébastien","last_name":"Lachapelle","full_name":"Lachapelle, Sébastien"},{"first_name":"Sara","last_name":"Magliacane","full_name":"Magliacane, Sara"},{"full_name":"Taslakian, Perouz","last_name":"Taslakian","first_name":"Perouz"},{"last_name":"Martius","first_name":"Georg","full_name":"Martius, Georg"},{"first_name":"Julius von","last_name":"Kügelgen","full_name":"Kügelgen, Julius von"},{"full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","last_name":"Locatello"}],"title":"Multi-view causal representation learning with partial observability","arxiv":1,"corr_author":"1","type":"conference"},{"ddc":["000","570"],"publication_status":"published","status":"public","date_updated":"2025-07-10T11:51:50Z","volume":38,"article_processing_charge":"No","day":"25","file_date_updated":"2025-01-27T11:42:24Z","file":[{"content_type":"application/pdf","date_updated":"2025-01-27T11:42:24Z","file_size":4453014,"creator":"dernst","relation":"main_file","checksum":"beedf05388bbdb7ddda81ec3d5ec7026","access_level":"open_access","success":1,"file_name":"2024_ICML_Cadei.pdf","file_id":"18896","date_created":"2025-01-27T11:42:24Z"}],"oa":1,"OA_type":"gold","oa_version":"Published Version","has_accepted_license":"1","citation":{"ieee":"R. Cadei, L. Lindorfer, S. Cremer, C. Schmid, and F. Locatello, “Smoke and mirrors in causal downstream tasks,” in <i>ICML 2024 Workshop AI4Science</i>, 2024, vol. 38.","ista":"Cadei R, Lindorfer L, Cremer S, Schmid C, Locatello F. 2024. Smoke and mirrors in causal downstream tasks. ICML 2024 Workshop AI4Science. ICML: International Conference on Machine Learning vol. 38.","short":"R. Cadei, L. Lindorfer, S. Cremer, C. Schmid, F. Locatello, in:, ICML 2024 Workshop AI4Science, Curran Associates, 2024.","mla":"Cadei, Riccardo, et al. “Smoke and Mirrors in Causal Downstream Tasks.” <i>ICML 2024 Workshop AI4Science</i>, vol. 38, Curran Associates, 2024.","chicago":"Cadei, Riccardo, Lukas Lindorfer, Sylvia Cremer, Cordelia Schmid, and Francesco Locatello. “Smoke and Mirrors in Causal Downstream Tasks.” In <i>ICML 2024 Workshop AI4Science</i>, Vol. 38. Curran Associates, 2024.","apa":"Cadei, R., Lindorfer, L., Cremer, S., Schmid, C., &#38; Locatello, F. (2024). Smoke and mirrors in causal downstream tasks. In <i>ICML 2024 Workshop AI4Science</i> (Vol. 38). Curran Associates.","ama":"Cadei R, Lindorfer L, Cremer S, Schmid C, Locatello F. Smoke and mirrors in causal downstream tasks. In: <i>ICML 2024 Workshop AI4Science</i>. Vol 38. Curran Associates; 2024."},"year":"2024","external_id":{"arxiv":["2405.17151"]},"publication":"ICML 2024 Workshop AI4Science","month":"09","related_material":{"record":[{"id":"18895","relation":"research_data","status":"public"},{"id":"19509","status":"for_moderation","relation":"is_continued_by"}],"link":[{"relation":"software","url":"https://github.com/CausalLearningAI/ISTAnt"}]},"title":"Smoke and mirrors in causal downstream tasks","corr_author":"1","type":"conference","arxiv":1,"abstract":[{"lang":"eng","text":"Machine Learning and AI have the potential to transform data-driven\r\nscientific discovery, enabling accurate predictions for several scientific\r\nphenomena. As many scientific questions are inherently causal, this paper looks\r\nat the causal inference task of treatment effect estimation, where the outcome\r\nof interest is recorded in high-dimensional observations in a Randomized\r\nControlled Trial (RCT). Despite being the simplest possible causal setting and\r\na perfect fit for deep learning, we theoretically find that many common choices\r\nin the literature may lead to biased estimates. To test the practical impact of\r\nthese considerations, we recorded ISTAnt, the first real-world benchmark for\r\ncausal inference downstream tasks on high-dimensional observations as an RCT\r\nstudying how garden ants (Lasius neglectus) respond to microparticles applied\r\nonto their colony members by hygienic grooming. Comparing 6 480 models\r\nfine-tuned from state-of-the-art visual backbones, we find that the sampling\r\nand modeling choices significantly affect the accuracy of the causal estimate,\r\nand that classification accuracy is not a proxy thereof. We further validated\r\nthe analysis, repeating it on a synthetically generated visual data set\r\ncontrolling the causal model. Our results suggest that future benchmarks should\r\ncarefully consider real downstream scientific questions, especially causal\r\nones. Further, we highlight guidelines for representation learning methods to\r\nhelp answer causal questions in the sciences."}],"quality_controlled":"1","tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"department":[{"_id":"SyCr"},{"_id":"FrLo"},{"_id":"GradSch"}],"author":[{"last_name":"Cadei","id":"0fa8b76f-72f0-11ef-b75a-a5da96e5ad6b","first_name":"Riccardo","full_name":"Cadei, Riccardo"},{"full_name":"Lindorfer, Lukas","id":"85f0e6d3-06b3-11ec-8982-8c5049fa4455","first_name":"Lukas","last_name":"Lindorfer"},{"full_name":"Cremer, Sylvia","last_name":"Cremer","first_name":"Sylvia","orcid":"0000-0002-2193-3868","id":"2F64EC8C-F248-11E8-B48F-1D18A9856A87"},{"full_name":"Schmid, Cordelia","last_name":"Schmid","first_name":"Cordelia"},{"last_name":"Locatello","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","full_name":"Locatello, Francesco"}],"conference":{"end_date":"2024-07-26","name":"ICML: International Conference on Machine Learning","start_date":"2024-07-26"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publisher":"Curran Associates","date_published":"2024-09-25T00:00:00Z","_id":"18847","intvolume":"        38","date_created":"2025-01-14T07:27:26Z","OA_place":"publisher","scopus_import":"1","language":[{"iso":"eng"}],"acknowledgement":"We thank Piersilvio De Bartolomeis, and the full Causal Learning and Artificial Intelligence (CLAI) group at ISTA for the extremely helpful discussions. Riccardo Cadei was supported by a Google Research Scholar Award and a Google Initiated Gift to Francesco Locatello. We thank the Social Immunity team at ISTA particularly Michaela Hönigsberger and Wilfrid Jean Louis, for supporting the ecological experiment and Farnaz Beikzadeh Abbasi, Luisa Fiebig and Martin Estermann for annotating ant behavior in ISTAnt."},{"_id":"18895","date_published":"2024-10-23T00:00:00Z","month":"10","OA_place":"repository","date_created":"2025-01-27T11:45:43Z","citation":{"ieee":"R. Cadei, F. Locatello, S. Cremer, L. Lindorfer, and C. Schmid, “ISTAnt.” Institute of Science and Technology Austria, 2024.","ista":"Cadei R, Locatello F, Cremer S, Lindorfer L, Schmid C. 2024. ISTAnt, Institute of Science and Technology Austria, <a href=\"https://doi.org/10.6084/M9.FIGSHARE.26484934.V2\">10.6084/M9.FIGSHARE.26484934.V2</a>.","mla":"Cadei, Riccardo, et al. <i>ISTAnt</i>. Institute of Science and Technology Austria, 2024, doi:<a href=\"https://doi.org/10.6084/M9.FIGSHARE.26484934.V2\">10.6084/M9.FIGSHARE.26484934.V2</a>.","short":"R. Cadei, F. Locatello, S. Cremer, L. Lindorfer, C. Schmid, (2024).","chicago":"Cadei, Riccardo, Francesco Locatello, Sylvia Cremer, Lukas Lindorfer, and Cordelia Schmid. “ISTAnt.” Institute of Science and Technology Austria, 2024. <a href=\"https://doi.org/10.6084/M9.FIGSHARE.26484934.V2\">https://doi.org/10.6084/M9.FIGSHARE.26484934.V2</a>.","ama":"Cadei R, Locatello F, Cremer S, Lindorfer L, Schmid C. ISTAnt. 2024. doi:<a href=\"https://doi.org/10.6084/M9.FIGSHARE.26484934.V2\">10.6084/M9.FIGSHARE.26484934.V2</a>","apa":"Cadei, R., Locatello, F., Cremer, S., Lindorfer, L., &#38; Schmid, C. (2024). ISTAnt. Institute of Science and Technology Austria. <a href=\"https://doi.org/10.6084/M9.FIGSHARE.26484934.V2\">https://doi.org/10.6084/M9.FIGSHARE.26484934.V2</a>"},"year":"2024","publisher":"Institute of Science and Technology Austria","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","abstract":[{"text":"ISTAnt is a new ecological dataset for social immunity and represents the first real-world benchmark for causal inference downstream tasks on high-dimensional observations. It analyzes grooming behavior in the ant Lasius neglectus in groups of three worker ants. The workers for the experiment were obtained from their laboratory stock colony, which had been collected from the field in 2022 in the Botanical Garden Jena, Germany. Ant collection and all experimental work were performed in compliance with international, national and institutional regulations and ethical guidelines. For the experiment, the body surface of one of the three ants was treated with a suspension of either of two microparticle types (diameter ~5 µm) to induce grooming by the two nestmates, which were individually color-coded by application of a dot of blue or orange paint, respectively. The three ants were housed in small plastic containers (diameter 28mm, height 30mm) with moistened, plastered ground and the interior walls covered with PTFE (polytetrafluoroethane) to hamper climbing by the ants. Filming occurred in a temperature- and humidity-controlled room at 23°C within a custom-made filming box with controlled lighting and ventilation conditions. We set up nine ant groups at a time (always containing both treatments) and placed them randomly on positions 1-9 marked on the floor in a 3x3 grid, about 3mm from each other. The experiment was performed on two consecutive days. Videos were acquired using a USB camera (FLIR blackfly S BFS-U3-120S4C, Teledyne FLIR) with a high-performance lens (HP Series 25mm Focal Length, Edmund optics 86-572) in OBS studio 29.0.0 \\citep{bailey2017obs} at a framerate of 30 FPS and a resolution of 2500x2500 pixels. From each original video (105x105 mm), we generated nine individual videos .mkv (each ~32x32 mm, 770x770 pixels) by determining exact coordinates per container from one frame in GIMP 2.10.36 and cropping of the videos with FFmpeg 6.1.1. Annotation was performed over two consecutive days by three observers who had not been involved in the experimental setup or recording and were unaware of the treatment assignments to ensure bias-free behavioral annotation. They annotated the behavior of the ants during video observations, using custom-made software that saves the start and end frames of behaviors marked in a .csv file (see 'annotations' folder). In one of the videos, one of the nestmates' legs got inadvertently stuck to its body surface during the color-coding, interfering with its behavior, so the video was discarded. This left 44 videos from 5 independent setups (n=24 of treatment 1 and n=20 of treatment 2) of 10 minutes each for a total of 792 000 annotated frames (see 'video' folder). For each video, we provide the following information: the number of the set to which it belongs (1-5); the number of the position within the set reflecting the position of the ant group under the camera (1-9), for which we also provide ‘coordinates’ in the 3x3 grid (taking values -1/0/1 for both X and Y axis); treatment (1 or 2); the hour of the day when the recording was started (in 24h CEST); experimental day (A or B); the top left coordinate of the cropping square from the original video (CropX/CropY); the person annotating the video (given as A, B, C); the date of annotation (1: first day, 2: second day) and in which order the videos were annotated by each person, both reflecting a possible training effect of the person (see 'experiments_settings.csv' file).","lang":"eng"}],"article_processing_charge":"No","day":"23","OA_type":"gold","author":[{"last_name":"Cadei","first_name":"Riccardo","id":"0fa8b76f-72f0-11ef-b75a-a5da96e5ad6b","full_name":"Cadei, Riccardo"},{"last_name":"Locatello","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco"},{"first_name":"Sylvia M","id":"2F64EC8C-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-2193-3868","last_name":"Cremer","full_name":"Cremer, Sylvia M"},{"full_name":"Lindorfer, Lukas","first_name":"Lukas","id":"85f0e6d3-06b3-11ec-8982-8c5049fa4455","last_name":"Lindorfer"},{"last_name":"Schmid","first_name":"Cordelia","full_name":"Schmid, Cordelia"}],"oa_version":"Published Version","department":[{"_id":"SyCr"},{"_id":"FrLo"},{"_id":"GradSch"}],"oa":1,"status":"public","title":"ISTAnt","doi":"10.6084/M9.FIGSHARE.26484934.V2","related_material":{"record":[{"relation":"used_in_publication","status":"public","id":"18847"}]},"ddc":["570"],"corr_author":"1","type":"research_data_reference","date_updated":"2025-01-27T11:58:38Z","main_file_link":[{"open_access":"1","url":"https://10.6084/M9.FIGSHARE.26484934.V2"}]},{"user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","publisher":"IEEE","isi":1,"conference":{"end_date":"2024-06-22","start_date":"2024-06-16","location":"Seattle, WA, United States","name":"CVPR: Conference on Computer Vision and Pattern Recognition"},"OA_place":"repository","date_created":"2025-01-29T14:27:39Z","acknowledgement":"Yanwei Fu is the corresponding authour. Yanwei Fu is with School of Data Science, Fudan University, Shanghai Key Lab of Intelligent Information Processing, Fudan University, and Fudan ISTBI-ZJNU Algorithm Centre for Brain-inspired Intelligence, Zhejiang Normal University, Jinhua, China.","language":[{"iso":"eng"}],"date_published":"2024-06-15T00:00:00Z","_id":"18964","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2406.09196","open_access":"1"}],"publication_identifier":{"eisbn":["9798350353006"]},"arxiv":1,"type":"conference","related_material":{"link":[{"relation":"software","url":"https://kfan21.github.io/AdaSlot/"}]},"title":"Adaptive slot attention: Object discovery with dynamic slot number","department":[{"_id":"FrLo"}],"author":[{"last_name":"Fan","first_name":"Ke","full_name":"Fan, Ke"},{"full_name":"Bai, Zechen","first_name":"Zechen","last_name":"Bai"},{"full_name":"Xiao, Tianjun","last_name":"Xiao","first_name":"Tianjun"},{"full_name":"He, Tong","last_name":"He","first_name":"Tong"},{"first_name":"Max","last_name":"Horn","full_name":"Horn, Max"},{"full_name":"Fu, Yanwei","last_name":"Fu","first_name":"Yanwei"},{"full_name":"Locatello, Francesco","last_name":"Locatello","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","first_name":"Francesco"},{"full_name":"Zhang, Zheng","first_name":"Zheng","last_name":"Zhang"}],"quality_controlled":"1","abstract":[{"text":"Object-centric learning (OCL) extracts the representation of objects with slots, offering an exceptional blend of flexibility and interpretability for abstracting low-level perceptual features. A widely adopted method within OCL is slot attention, which utilizes attention mechanisms to iteratively refine slot representations. However, a major draw-back of most object-centric models, including slot attention, is their reliance on predefining the number of slots. This not only necessitates prior knowledge of the dataset but also overlooks the inherent variability in the number of objects present in each instance. To overcome this fundamental limitation, we present a novel complexity-aware object auto-encoder framework. Within this framework, we introduce an adaptive slot attention (AdaSlot) mecha-nism that dynamically determines the optimal number of slots based on the content of the data. This is achieved by proposing a discrete slot sampling module that is responsible for selecting an appropriate number of slots from a candidate list. Furthermore, we introduce a masked slot decoder that suppresses unselected slots during the decoding process. Our framework, tested extensively on object discovery tasks with various datasets, shows performance matching or exceeding top fixed-slot models. Moreover, our analysis substantiates that our method exhibits the capability to dynamically adapt the slot number according to each instance's complexity, offering the potential for further exploration in slot attention research. Project will be available at https://kfan21.github.io/AdaSlot/","lang":"eng"}],"external_id":{"arxiv":["2406.09196"],"isi":["001342515506043"]},"citation":{"ama":"Fan K, Bai Z, Xiao T, et al. Adaptive slot attention: Object discovery with dynamic slot number. In: <i>2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/cvpr52733.2024.02176\">10.1109/cvpr52733.2024.02176</a>","apa":"Fan, K., Bai, Z., Xiao, T., He, T., Horn, M., Fu, Y., … Zhang, Z. (2024). Adaptive slot attention: Object discovery with dynamic slot number. In <i>2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>. Seattle, WA, United States: IEEE. <a href=\"https://doi.org/10.1109/cvpr52733.2024.02176\">https://doi.org/10.1109/cvpr52733.2024.02176</a>","short":"K. Fan, Z. Bai, T. Xiao, T. He, M. Horn, Y. Fu, F. Locatello, Z. Zhang, in:, 2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition, IEEE, 2024.","ista":"Fan K, Bai Z, Xiao T, He T, Horn M, Fu Y, Locatello F, Zhang Z. 2024. Adaptive slot attention: Object discovery with dynamic slot number. 2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition. CVPR: Conference on Computer Vision and Pattern Recognition.","mla":"Fan, Ke, et al. “Adaptive Slot Attention: Object Discovery with Dynamic Slot Number.” <i>2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/cvpr52733.2024.02176\">10.1109/cvpr52733.2024.02176</a>.","chicago":"Fan, Ke, Zechen Bai, Tianjun Xiao, Tong He, Max Horn, Yanwei Fu, Francesco Locatello, and Zheng Zhang. “Adaptive Slot Attention: Object Discovery with Dynamic Slot Number.” In <i>2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/cvpr52733.2024.02176\">https://doi.org/10.1109/cvpr52733.2024.02176</a>.","ieee":"K. Fan <i>et al.</i>, “Adaptive slot attention: Object discovery with dynamic slot number,” in <i>2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition</i>, Seattle, WA, United States, 2024."},"year":"2024","month":"06","publication":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition","date_updated":"2025-09-09T12:15:17Z","doi":"10.1109/cvpr52733.2024.02176","publication_status":"published","status":"public","oa":1,"oa_version":"Preprint","OA_type":"green","day":"15","article_processing_charge":"No"},{"conference":{"end_date":"2024-07-27","start_date":"2024-07-21","name":"ICML: International Conference on Machine Learning","location":"Vienna, Austria"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publisher":"ML Research Press","date_published":"2024-07-30T00:00:00Z","_id":"18971","alternative_title":["PMLR"],"intvolume":"       235","page":"1672-1688","OA_place":"repository","date_created":"2025-01-30T07:21:57Z","scopus_import":"1","language":[{"iso":"eng"}],"acknowledgement":"We acknowledge the support of the Canada CIFAR AI Chair Program and IVADO. We thank Mila and Compute Canada for providing computational resources.\r\n","related_material":{"link":[{"url":"https://github.com/rarefin/CoBalT","relation":"software"}]},"title":"Unsupervised concept discovery mitigates spurious correlations","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2402.13368","open_access":"1"}],"type":"conference","publication_identifier":{"eissn":["2640-3498"]},"arxiv":1,"abstract":[{"text":"Models prone to spurious correlations in training data often produce brittle predictions and introduce unintended biases. Addressing this challenge typically involves methods relying on prior knowledge and group annotation to remove spurious correlations, which may not be readily available in many applications. In this paper, we establish a novel connection between unsupervised object-centric learning and mitigation of spurious correlations. Instead of directly inferring subgroups with varying correlations with labels, our approach focuses on discovering concepts: discrete ideas that are shared across input samples. Leveraging existing object-centric representation learning, we introduce CoBalT: a concept balancing technique that effectively mitigates spurious correlations without requiring human labeling of subgroups. Evaluation across the benchmark datasets for sub-population shifts demonstrate superior or competitive performance compared state-of-the-art baselines, without the need for group annotation. Code is available at https://github.com/rarefin/CoBalT","lang":"eng"}],"quality_controlled":"1","department":[{"_id":"FrLo"}],"author":[{"full_name":"Arefin, Rifat","last_name":"Arefin","first_name":"Rifat"},{"last_name":"Zhang","first_name":"Yan","full_name":"Zhang, Yan"},{"last_name":"Baratin","first_name":"Aristide","full_name":"Baratin, Aristide"},{"full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","last_name":"Locatello"},{"full_name":"Rish, Irina","last_name":"Rish","first_name":"Irina"},{"full_name":"Liu, Dianbo","last_name":"Liu","first_name":"Dianbo"},{"first_name":"Kenji","last_name":"Kawaguchi","full_name":"Kawaguchi, Kenji"}],"year":"2024","citation":{"ieee":"R. Arefin <i>et al.</i>, “Unsupervised concept discovery mitigates spurious correlations,” in <i>Proceedings of the 41st International Conference on Machine Learning</i>, Vienna, Austria, 2024, vol. 235, pp. 1672–1688.","mla":"Arefin, Rifat, et al. “Unsupervised Concept Discovery Mitigates Spurious Correlations.” <i>Proceedings of the 41st International Conference on Machine Learning</i>, vol. 235, ML Research Press, 2024, pp. 1672–88.","short":"R. Arefin, Y. Zhang, A. Baratin, F. Locatello, I. Rish, D. Liu, K. Kawaguchi, in:, Proceedings of the 41st International Conference on Machine Learning, ML Research Press, 2024, pp. 1672–1688.","ista":"Arefin R, Zhang Y, Baratin A, Locatello F, Rish I, Liu D, Kawaguchi K. 2024. Unsupervised concept discovery mitigates spurious correlations. Proceedings of the 41st International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 235, 1672–1688.","chicago":"Arefin, Rifat, Yan Zhang, Aristide Baratin, Francesco Locatello, Irina Rish, Dianbo Liu, and Kenji Kawaguchi. “Unsupervised Concept Discovery Mitigates Spurious Correlations.” In <i>Proceedings of the 41st International Conference on Machine Learning</i>, 235:1672–88. ML Research Press, 2024.","ama":"Arefin R, Zhang Y, Baratin A, et al. Unsupervised concept discovery mitigates spurious correlations. In: <i>Proceedings of the 41st International Conference on Machine Learning</i>. Vol 235. ML Research Press; 2024:1672-1688.","apa":"Arefin, R., Zhang, Y., Baratin, A., Locatello, F., Rish, I., Liu, D., &#38; Kawaguchi, K. (2024). Unsupervised concept discovery mitigates spurious correlations. In <i>Proceedings of the 41st International Conference on Machine Learning</i> (Vol. 235, pp. 1672–1688). Vienna, Austria: ML Research Press."},"external_id":{"arxiv":["2402.13368"]},"publication":"Proceedings of the 41st International Conference on Machine Learning","month":"07","publication_status":"published","status":"public","date_updated":"2025-01-30T07:23:10Z","volume":235,"article_processing_charge":"No","day":"30","oa":1,"OA_type":"green","oa_version":"Preprint"},{"date_updated":"2025-07-07T13:23:49Z","ddc":["000"],"status":"public","publication_status":"published","oa":1,"OA_type":"green","oa_version":"Published Version","volume":37,"article_processing_charge":"No","day":"25","file_date_updated":"2025-02-04T13:09:08Z","file":[{"date_updated":"2025-02-04T13:09:08Z","file_size":5659119,"creator":"dernst","content_type":"application/pdf","success":1,"file_name":"2024_NeurIPS_Chen.pdf","file_id":"18997","date_created":"2025-02-04T13:09:08Z","relation":"main_file","checksum":"75c3091e70bd2916cd94afbf40a0c425","access_level":"open_access"}],"citation":{"ieee":"T. Chen, K. Bello, F. Locatello, B. Aragam, and P. K. Ravikumar, “Identifying general mechanism shifts in linear causal representations,” in <i>38th Conference on Neural Information Processing Systems</i>, Vancouver, Canada, 2024, vol. 37.","mla":"Chen, Tianyu, et al. “Identifying General Mechanism Shifts in Linear Causal Representations.” <i>38th Conference on Neural Information Processing Systems</i>, vol. 37, Neural Information Processing Systems Foundation, 2024.","short":"T. Chen, K. Bello, F. Locatello, B. Aragam, P.K. Ravikumar, in:, 38th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2024.","ista":"Chen T, Bello K, Locatello F, Aragam B, Ravikumar PK. 2024. Identifying general mechanism shifts in linear causal representations. 38th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 37.","chicago":"Chen, Tianyu, Kevin Bello, Francesco Locatello, Bryon Aragam, and Pradeep Kumar Ravikumar. “Identifying General Mechanism Shifts in Linear Causal Representations.” In <i>38th Conference on Neural Information Processing Systems</i>, Vol. 37. Neural Information Processing Systems Foundation, 2024.","ama":"Chen T, Bello K, Locatello F, Aragam B, Ravikumar PK. Identifying general mechanism shifts in linear causal representations. In: <i>38th Conference on Neural Information Processing Systems</i>. Vol 37. Neural Information Processing Systems Foundation; 2024.","apa":"Chen, T., Bello, K., Locatello, F., Aragam, B., &#38; Ravikumar, P. K. (2024). Identifying general mechanism shifts in linear causal representations. In <i>38th Conference on Neural Information Processing Systems</i> (Vol. 37). Vancouver, Canada: Neural Information Processing Systems Foundation."},"year":"2024","external_id":{"arxiv":["2410.24059"]},"has_accepted_license":"1","publication":"38th Conference on Neural Information Processing Systems","month":"09","type":"conference","arxiv":1,"publication_identifier":{"eissn":["1049-5258"]},"title":"Identifying general mechanism shifts in linear causal representations","department":[{"_id":"FrLo"}],"tmp":{"short":"CC BY (4.0)","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode"},"author":[{"full_name":"Chen, Tianyu","last_name":"Chen","first_name":"Tianyu"},{"last_name":"Bello","first_name":"Kevin","full_name":"Bello, Kevin"},{"orcid":"0000-0002-4850-0683","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","last_name":"Locatello","full_name":"Locatello, Francesco"},{"last_name":"Aragam","first_name":"Bryon","full_name":"Aragam, Bryon"},{"last_name":"Ravikumar","first_name":"Pradeep Kumar","full_name":"Ravikumar, Pradeep Kumar"}],"quality_controlled":"1","abstract":[{"lang":"eng","text":"We consider the linear causal representation learning setting where we observe a linear mixing of d unknown latent factors, which follow a linear structural causal model. Recent work has shown that it is possible to recover the latent factors as well as the underlying structural causal model over them, up to permutation and scaling, provided that we have at least d environments, each of which corresponds to perfect interventions on a single latent node (factor). After this powerful result, a key open problem faced by the community has been to relax these conditions: allow for coarser than perfect single-node interventions, and allow for fewer than d of them, since the number of latent factors d could be very large. In this work, we consider precisely such a setting, where we allow a smaller than d number of environments, and also allow for very coarse interventions that can very coarsely \\textit{change the entire causal graph over the latent factors}. On the flip side, we relax what we wish to extract to simply the \\textit{list of nodes that have shifted between one or more environments}. We provide a surprising identifiability result that it is indeed possible, under some very mild standard assumptions, to identify the set of shifted nodes. Our identifiability proof moreover is a constructive one: we explicitly provide necessary and sufficient conditions for a node to be a shifted node, and show that we can check these conditions given observed data. Our algorithm lends itself very naturally to the sample setting where instead of just interventional distributions, we are provided datasets of samples from each of these distributions. We corroborate our results on both synthetic experiments as well as an interesting psychometric dataset. The code can be found at https://github.com/TianyuCodings/iLCS."}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publisher":"Neural Information Processing Systems Foundation","conference":{"end_date":"2024-12-16","name":"NeurIPS: Neural Information Processing Systems","location":"Vancouver, Canada","start_date":"2024-12-16"},"date_created":"2025-02-04T13:09:34Z","OA_place":"repository","scopus_import":"1","language":[{"iso":"eng"}],"date_published":"2024-09-25T00:00:00Z","_id":"18996","alternative_title":["Advances in Neural Information Processing Systems"],"intvolume":"        37"}]