@inproceedings{21076,
  abstract     = {In many scientific experiments, the data annotating cost constraints the pace for testing novel hypotheses. Yet, modern machine learning pipelines offer a promising solution—provided their predictions yield correct conclusions. We focus on Prediction-Powered Causal Inferences (PPCI), i.e., estimating the treatment effect in an unlabeled target experiment, relying on training data with the same outcome annotated but potentially different treatment or effect modifiers. We first show that conditional calibration guarantees valid PPCI at population level. Then, we introduce a sufficient representation constraint transferring validity across experiments, which we propose to enforce in practice in Deconfounded Empirical Risk Minimization, our new model-agnostic training objective. We validate our method on synthetic and real-world scientific data, solving impossible problem instances for Empirical Risk Minimization even with standard invariance constraints. In particular, for the first time, we achieve valid causal inference on a scientific experiment with complex recording and no human annotations, fine-tuning a foundational model on our similar annotated experiment.},
  author       = {Cadei, Riccardo and Demirel, Ilker and De Bartolomeis, Piersilvio and Lindorfer, Lukas and Cremer, Sylvia and Schmid, Cordelia and Locatello, Francesco},
  booktitle    = {39th Annual Conference on Neural Information Processing Systems},
  issn         = {1049-5258},
  location     = {San Diego, CA, United States},
  publisher    = {Neural Information Processing Systems Foundation},
  title        = {{Prediction-powered causal inferences}},
  volume       = {38},
  year         = {2025},
}

@article{18892,
  abstract     = {Sick individuals often conceal their disease status to group members, thereby preventing social exclusion or aggression. Here we show by behavioural, chemical, immunological and infection load analyses that sick ant pupae instead actively emit a chemical signal that in itself is sufficient to trigger their own destruction by colony members. In our experiments, this altruistic disease-signalling was performed only by worker but not queen pupae. The lack of signalling by queen pupae did not constitute cheating behaviour, but reflected their superior immune capabilities. Worker pupae suffered from extensive pathogen replication whereas queen pupae were able to restrain their infection. Our data suggest the evolution of a finely-tuned signalling system in which it is not the induction of an individual’s immune response, but rather its failure to overcome the infection, that triggers pupal signalling for sacrifice. This demonstrates a balanced interplay between individual and social immunity that efficiently achieves whole-colony health.},
  author       = {Dawson, Erika and Hönigsberger, Michaela and Kampleitner, Niklas and Grasse, Anna V and Lindorfer, Lukas and Robb, Jennifer and Beikzadeh Abbasi, Farnaz and Strahodinsky, Florian and Leitner, Hanna and Rajendran, Harikrishnan and Schmitt, Thomas and Cremer, Sylvia},
  issn         = {2041-1723},
  journal      = {Nature Communications},
  publisher    = {Springer Nature},
  title        = {{Altruistic disease signalling in ant colonies}},
  doi          = {10.1038/s41467-025-66175-z},
  volume       = {16},
  year         = {2025},
}

@inproceedings{18847,
  abstract     = {Machine Learning and AI have the potential to transform data-driven
scientific discovery, enabling accurate predictions for several scientific
phenomena. As many scientific questions are inherently causal, this paper looks
at the causal inference task of treatment effect estimation, where the outcome
of interest is recorded in high-dimensional observations in a Randomized
Controlled Trial (RCT). Despite being the simplest possible causal setting and
a perfect fit for deep learning, we theoretically find that many common choices
in the literature may lead to biased estimates. To test the practical impact of
these considerations, we recorded ISTAnt, the first real-world benchmark for
causal inference downstream tasks on high-dimensional observations as an RCT
studying how garden ants (Lasius neglectus) respond to microparticles applied
onto their colony members by hygienic grooming. Comparing 6 480 models
fine-tuned from state-of-the-art visual backbones, we find that the sampling
and modeling choices significantly affect the accuracy of the causal estimate,
and that classification accuracy is not a proxy thereof. We further validated
the analysis, repeating it on a synthetically generated visual data set
controlling the causal model. Our results suggest that future benchmarks should
carefully consider real downstream scientific questions, especially causal
ones. Further, we highlight guidelines for representation learning methods to
help answer causal questions in the sciences.},
  author       = {Cadei, Riccardo and Lindorfer, Lukas and Cremer, Sylvia and Schmid, Cordelia and Locatello, Francesco},
  booktitle    = {ICML 2024 Workshop AI4Science},
  publisher    = {Curran Associates},
  title        = {{Smoke and mirrors in causal downstream tasks}},
  volume       = {38},
  year         = {2024},
}

@misc{18895,
  abstract     = {ISTAnt is a new ecological dataset for social immunity and represents the first real-world benchmark for causal inference downstream tasks on high-dimensional observations. It analyzes grooming behavior in the ant Lasius neglectus in groups of three worker ants. The workers for the experiment were obtained from their laboratory stock colony, which had been collected from the field in 2022 in the Botanical Garden Jena, Germany. Ant collection and all experimental work were performed in compliance with international, national and institutional regulations and ethical guidelines. For the experiment, the body surface of one of the three ants was treated with a suspension of either of two microparticle types (diameter ~5 µm) to induce grooming by the two nestmates, which were individually color-coded by application of a dot of blue or orange paint, respectively. The three ants were housed in small plastic containers (diameter 28mm, height 30mm) with moistened, plastered ground and the interior walls covered with PTFE (polytetrafluoroethane) to hamper climbing by the ants. Filming occurred in a temperature- and humidity-controlled room at 23°C within a custom-made filming box with controlled lighting and ventilation conditions. We set up nine ant groups at a time (always containing both treatments) and placed them randomly on positions 1-9 marked on the floor in a 3x3 grid, about 3mm from each other. The experiment was performed on two consecutive days. Videos were acquired using a USB camera (FLIR blackfly S BFS-U3-120S4C, Teledyne FLIR) with a high-performance lens (HP Series 25mm Focal Length, Edmund optics 86-572) in OBS studio 29.0.0 \citep{bailey2017obs} at a framerate of 30 FPS and a resolution of 2500x2500 pixels. From each original video (105x105 mm), we generated nine individual videos .mkv (each ~32x32 mm, 770x770 pixels) by determining exact coordinates per container from one frame in GIMP 2.10.36 and cropping of the videos with FFmpeg 6.1.1. Annotation was performed over two consecutive days by three observers who had not been involved in the experimental setup or recording and were unaware of the treatment assignments to ensure bias-free behavioral annotation. They annotated the behavior of the ants during video observations, using custom-made software that saves the start and end frames of behaviors marked in a .csv file (see 'annotations' folder). In one of the videos, one of the nestmates' legs got inadvertently stuck to its body surface during the color-coding, interfering with its behavior, so the video was discarded. This left 44 videos from 5 independent setups (n=24 of treatment 1 and n=20 of treatment 2) of 10 minutes each for a total of 792 000 annotated frames (see 'video' folder). For each video, we provide the following information: the number of the set to which it belongs (1-5); the number of the position within the set reflecting the position of the ant group under the camera (1-9), for which we also provide ‘coordinates’ in the 3x3 grid (taking values -1/0/1 for both X and Y axis); treatment (1 or 2); the hour of the day when the recording was started (in 24h CEST); experimental day (A or B); the top left coordinate of the cropping square from the original video (CropX/CropY); the person annotating the video (given as A, B, C); the date of annotation (1: first day, 2: second day) and in which order the videos were annotated by each person, both reflecting a possible training effect of the person (see 'experiments_settings.csv' file).},
  author       = {Cadei, Riccardo and Locatello, Francesco and Cremer, Sylvia M and Lindorfer, Lukas and Schmid, Cordelia},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{ISTAnt}},
  doi          = {10.6084/M9.FIGSHARE.26484934.V2},
  year         = {2024},
}