@phdthesis{12378,
  abstract     = {Environmental cues influence the highly dynamic morphology of microglia. Strategies to 
characterize these changes usually involve user-selected morphometric features, which 
preclude the identification of a spectrum of context-dependent morphological phenotypes. 
Here, we develop MorphOMICs, a topological data analysis approach, which enables semiautomatic mapping of microglial morphology into an atlas of cue-dependent phenotypes,
overcomes feature-selection bias and minimizes biological variability. 
First, with MorphOMICs we derive the morphological spectrum of microglia across seven 
brain regions during postnatal development and in two distinct Alzheimer’s disease 
degeneration mouse models. We uncover region-specific and sexually dimorphic
morphological trajectories, with females showing an earlier morphological shift than males in 
the degenerating brain. Overall, we demonstrate that both long primary- and short terminal 
processes provide distinct insights to morphological phenotypes. Moreover, using machine 
learning to map novel condition on the spectrum, we observe that microglia morphologies 
reflect a dose-dependent adaptation upon ketamine anesthesia and do not recover to control 
morphologies.
Next, we took advantage of MorphOMICs to build a high-resolution and layer-specific map of 
microglial morphological spectrum in the retina, covering postnatal development and rd10 
degeneration. Here, following photoreceptor death, microglia assume an early developmentlike morphology. Finally, we map microglial morphology following optic nerve crush on the 
retinal spectrum and observe a layer- and sex-dependent response. 
Overall, MorphOMICs opens a new perspective to analyze microglial morphology across 
multiple conditions, and provides a novel tool to characterize microglial morphology beyond 
the traditionally dichotomized view of microglia.},
  author       = {Colombo, Gloria},
  issn         = {2663-337X},
  pages        = {142},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{MorphOMICs, a tool for mapping microglial morphology, reveals brain region- and sex-dependent phenotypes}},
  doi          = {10.15479/at:ista:12378},
  year         = {2022},
}

@phdthesis{11388,
  abstract     = {In evolve and resequence experiments, a population is sequenced, subjected to selection and
then sequenced again, so that genetic changes before and after selection can be observed at
the genetic level. Here, I use these studies to better understand the genetic basis of complex
traits - traits which depend on more than a few genes.
In the first chapter, I discuss the first evolve and resequence experiment, in which a population
of mice, the so-called "Longshanks" mice, were selected for tibia length while their body mass
was kept constant. The full pedigree is known. We observed a selection response on all
chromosomes and used the infinitesimal model with linkage, a model which assumes an infinite
number of genes with infinitesimally small effect sizes, as a null model. Results implied a very
polygenic basis with a few loci of major effect standing out and changing in parallel. There
was large variability between the different chromosomes in this study, probably due to LD.
In chapter two, I go on to discuss the impact of LD, on the variability in an allele-frequency
based summary statistic, giving an equation based on the initial allele frequencies, average
pairwise LD, and the first four moments of the haplotype block copy number distribution. I
describe this distribution by referring back to the founder generation. I then demonstrate
how to infer selection via a maximum likelihood scheme on the example of a single locus and
discuss how to extend this to more realistic scenarios.
In chapter three, I discuss the second evolve and resequence experiment, in which a small
population of Drosophila melanogaster was selected for increased pupal case size over 6
generations. The experiment was highly replicated with 27 lines selected within family and a
known pedigree. We observed a phenotypic selection response of over one standard deviation.
I describe the patterns in allele frequency data, including allele frequency changes and patterns
of heterozygosity, and give ideas for future work.},
  author       = {Belohlavy, Stefanie},
  isbn         = {978-3-99078-018-3},
  pages        = {98},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{The genetic basis of complex traits studied via analysis of evolve and resequence experiments}},
  doi          = {10.15479/at:ista:11388},
  year         = {2022},
}

@article{10703,
  abstract     = {When crawling through the body, leukocytes often traverse tissues that are densely packed with extracellular matrix and other cells, and this raises the question: How do leukocytes overcome compressive mechanical loads? Here, we show that the actin cortex of leukocytes is mechanoresponsive and that this responsiveness requires neither force sensing via the nucleus nor adhesive interactions with a substrate. Upon global compression of the cell body as well as local indentation of the plasma membrane, Wiskott-Aldrich syndrome protein (WASp) assembles into dot-like structures, providing activation platforms for Arp2/3 nucleated actin patches. These patches locally push against the external load, which can be obstructing collagen fibers or other cells, and thereby create space to facilitate forward locomotion. We show in vitro and in vivo that this WASp function is rate limiting for ameboid leukocyte migration in dense but not in loose environments and is required for trafficking through diverse tissues such as skin and lymph nodes.},
  author       = {Gaertner, Florian and Dos Reis Rodrigues, Patricia and De Vries, Ingrid and Hons, Miroslav and Aguilera, Juan and Riedl, Michael and Leithner, Alexander F and Tasciyan, Saren and Kopf, Aglaja and Merrin, Jack and Zheden, Vanessa and Kaufmann, Walter and Hauschild, Robert and Sixt, Michael K},
  issn         = {1878-1551},
  journal      = {Developmental Cell},
  number       = {1},
  pages        = {47--62.e9},
  publisher    = {Cell Press},
  title        = {{WASp triggers mechanosensitive actin patches to facilitate immune cell migration in dense tissues}},
  doi          = {10.1016/j.devcel.2021.11.024},
  volume       = {57},
  year         = {2022},
}

@phdthesis{12401,
  abstract     = {Detachment of the cancer cells from the bulk of the tumor is the first step of metastasis, which
is the primary cause of cancer related deaths. It is unclear, which factors contribute to this step.
Recent studies indicate a crucial role of the tumor microenvironment in malignant
transformation and metastasis. Studying cancer cell invasion and detachments quantitatively in
the context of its physiological microenvironment is technically challenging. Especially, precise
control of microenvironmental properties in vivo is currently not possible. Here, I studied the
role of microenvironment geometry in the invasion and detachment of cancer cells from the
bulk with a simplistic and reductionist approach. In this approach, I engineered microfluidic
devices to mimic a pseudo 3D extracellular matrix environment, where I was able to
quantitatively tune the geometrical configuration of the microenvironment and follow tumor
cells with fluorescence live imaging. To aid quantitative analysis I developed a widely applicable
software application to automatically analyze and visualize particle tracking data.
Quantitative analysis of tumor cell invasion in isotropic and anisotropic microenvironments
showed that heterogeneity in the microenvironment promotes faster invasion and more
frequent detachment of cells. These observations correlated with overall higher speed of cells at
the edge of the bulk of the cells. In heterogeneous microenvironments cells preferentially
passed through larger pores, thus invading areas of least resistance and generating finger-like
invasive structures. The detachments occurred mostly at the tips of these structures.
To investigate the potential mechanism, we established a two dimensional model to simulate
active Brownian particles representing the cell nuclei dynamics. These simulations backed our in
vitro observations without the need of precise fitting the simulation parameters. Our model
suggests the importance of the pore heterogeneity in the direction perpendicular to the
orientation of bias field (lateral heterogeneity), which causes the interface roughening.},
  author       = {Tasciyan, Saren},
  issn         = {2663-337X},
  pages        = {105},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Role of microenvironment heterogeneity in cancer cell invasion}},
  doi          = {10.15479/at:ista:12401},
  year         = {2022},
}

@article{12248,
  abstract     = {Eurasian brine shrimp (genus Artemia) have closely related sexual and asexual lineages of parthenogenetic females, which produce rare males at low frequencies. Although they are known to have ZW chromosomes, these are not well characterized, and it is unclear whether they are shared across the clade. Furthermore, the underlying genetic architecture of the transmission of asexuality, which can occur when rare males mate with closely related sexual females, is not well understood. We produced a chromosome-level assembly for the sexual Eurasian species Artemia sinica and characterized in detail the pair of sex chromosomes of this species. We combined this new assembly with short-read genomic data for the sexual species Artemia sp. Kazakhstan and several asexual lineages of Artemia parthenogenetica, allowing us to perform an in-depth characterization of sex-chromosome evolution across the genus. We identified a small differentiated region of the ZW pair that is shared by all sexual and asexual lineages, supporting the shared ancestry of the sex chromosomes. We also inferred that recombination suppression has spread to larger sections of the chromosome independently in the American and Eurasian lineages. Finally, we took advantage of a rare male, which we backcrossed to sexual females, to explore the genetic basis of asexuality. Our results suggest that parthenogenesis is likely partly controlled by a locus on the Z chromosome, highlighting the interplay between sex determination and asexuality.},
  author       = {Elkrewi, Marwan N and Khauratovich, Uladzislava and Toups, Melissa A and Bett, Vincent K and Mrnjavac, Andrea and Macon, Ariana and Fraisse, Christelle and Sax, Luca and Huylmans, Ann K and Hontoria, Francisco and Vicoso, Beatriz},
  issn         = {1943-2631},
  journal      = {Genetics},
  keywords     = {Genetics},
  number       = {2},
  publisher    = {Oxford University Press},
  title        = {{ZW sex-chromosome evolution and contagious parthenogenesis in Artemia brine shrimp}},
  doi          = {10.1093/genetics/iyac123},
  volume       = {222},
  year         = {2022},
}

@article{10767,
  abstract     = {The t-haplotype of mice is a classical model for autosomal transmission distortion. A largely non-recombining variant of the proximal region of chromosome 17, it is transmitted to more than 90% of the progeny of heterozygous males through the disabling of sperm carrying a standard chromosome. While extensive genetic and functional work has shed light on individual genes involved in drive, much less is known about the evolution and function of the rest of its hundreds of genes. Here, we characterize the sequence and expression of dozens of t-specific transcripts and of their chromosome 17 homologues. Many genes showed reduced expression of the t-allele, but an equal number of genes showed increased expression of their t-copy, consistent with increased activity or a newly evolved function. Genes on the t-haplotype had a significantly higher non-synonymous substitution rate than their homologues on the standard chromosome, with several genes harbouring dN/dS ratios above 1. Finally, the t-haplotype has acquired at least two genes from other chromosomes, which show high and tissue-specific expression. These results provide a first overview of the gene content of this selfish element, and support a more dynamic evolutionary scenario than expected of a large genomic region with almost no recombination.},
  author       = {Kelemen, Réka K and Elkrewi, Marwan N and Lindholm, Anna K. and Vicoso, Beatriz},
  issn         = {1471-2954},
  journal      = {Proceedings of the Royal Society B: Biological Sciences},
  number       = {1968},
  pages        = {20211985},
  publisher    = {The Royal Society},
  title        = {{Novel patterns of expression and recruitment of new genes on the t-haplotype, a mouse selfish chromosome}},
  doi          = {10.1098/rspb.2021.1985},
  volume       = {289},
  year         = {2022},
}

@article{10924,
  abstract     = {Solid-state microwave systems offer strong interactions for fast quantum logic and sensing but photons at telecom wavelength are the ideal choice for high-density low-loss quantum interconnects. A general-purpose interface that can make use of single photon effects requires < 1 input noise quanta, which has remained elusive due to either low efficiency or pump induced heating. Here we demonstrate coherent electro-optic modulation on nanosecond-timescales with only 0.16+0.02−0.01 microwave input noise photons with a total bidirectional transduction efficiency of 8.7% (or up to 15% with 0.41+0.02−0.02), as required for near-term heralded quantum network protocols. The use of short and high-power optical pump pulses also enables near-unity cooperativity of the electro-optic interaction leading to an internal pure conversion efficiency of up to 99.5%. Together with the low mode occupancy this provides evidence for electro-optic laser cooling and vacuum amplification as predicted a decade ago.},
  author       = {Sahu, Rishabh and Hease, William J and Rueda Sanchez, Alfredo R and Arnold, Georg M and Qiu, Liu and Fink, Johannes M},
  issn         = {2041-1723},
  journal      = {Nature Communications},
  publisher    = {Springer Nature},
  title        = {{Quantum-enabled operation of a microwave-optical interface}},
  doi          = {10.1038/s41467-022-28924-2},
  volume       = {13},
  year         = {2022},
}

@phdthesis{11196,
  abstract     = {One of the fundamental questions in Neuroscience is how the structure of synapses and their physiological properties are related. While synaptic transmission remains a dynamic process, electron microscopy provides images with comparably low temporal resolution (Studer et al., 2014). The current work overcomes this challenge and describes an improved “Flash and Freeze” technique (Watanabe et al., 2013a; Watanabe et al., 2013b) to study synaptic transmission at the hippocampal mossy fiber-CA3 pyramidal neuron synapses, using mouse acute brain slices and organotypic slices culture. The improved method allowed for selective stimulation of presynaptic mossy fiber boutons and the observation of synaptic vesicle pool dynamics at the active zones. Our results uncovered several intriguing morphological features of mossy fiber boutons. First, the docked vesicle pool was largely depleted (more than 70%) after stimulation, implying that the docked synaptic vesicles pool and readily releasable pool are vastly overlapping in mossy fiber boutons. Second, the synaptic vesicles are skewed towards larger diameters, displaying a wide range of sizes. An increase in the mean diameter of synaptic vesicles, after single and repetitive stimulation, suggests that smaller vesicles have a higher release probability. Third, we observed putative endocytotic structures after moderate light stimulation, matching the timing of previously described ultrafast endocytosis (Watanabe et al., 2013a; Delvendahl et al., 2016). 
	In addition, synaptic transmission depends on a sophisticated system of protein machinery and calcium channels (Südhof, 2013b), which amplifies the challenge in studying synaptic communication as these interactions can be potentially modified during synaptic plasticity. And although recent study elucidated the potential correlation between physiological and morphological properties of synapses during synaptic plasticity (Vandael et al., 2020), the molecular underpinning of it remains unknown. Thus, the presented work tries to overcome this challenge and aims to pinpoint changes in the molecular architecture at hippocampal mossy fiber bouton synapses during short- and long-term potentiation (STP and LTP), we combined chemical potentiation, with the application of a cyclic adenosine monophosphate agonist (i.e. forskolin) and freeze-fracture replica immunolabelling. This method allowed the localization of membrane-bound proteins with nanometer precision within the active zone, in particular, P/Q-type calcium channels and synaptic vesicle priming proteins Munc13-1/2. First, we found that the number of clusters of Munc13-1 in the mossy fiber bouton active zone increased significantly during STP, but decreased to lower than the control value during LTP. Secondly, although the distance between the calcium channels and Munc13-1s did not change after induction of STP, it shortened during the LTP phase. Additionally, forskolin did not affect Munc13-2 distribution during STP and LTP. These results indicate the existence of two distinct mechanisms that govern STP and LTP at mossy fiber bouton synapses: an increase in the readily realizable pool in the case of STP and a potential increase in release probability during LTP. “Flash and freeze” and functional electron microscopy, are versatile methods that can be successfully applied to intact brain circuits to study synaptic transmission even at the molecular level.
},
  author       = {Kim, Olena},
  issn         = {2663-337X},
  pages        = {132},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Nanoarchitecture of hippocampal mossy fiber-CA3 pyramidal neuron synapses}},
  doi          = {10.15479/at:ista:11196},
  year         = {2022},
}

@article{12272,
  abstract     = {Reading, interpreting and crawling along gradients of chemotactic cues is one of the most complex questions in cell biology. In this issue, Georgantzoglou et al. (2022. J. Cell. Biol.https://doi.org/10.1083/jcb.202103207) use in vivo models to map the temporal sequence of how neutrophils respond to an acutely arising gradient of chemoattractant.},
  author       = {Stopp, Julian A and Sixt, Michael K},
  issn         = {1540-8140},
  journal      = {Journal of Cell Biology},
  keywords     = {Cell Biology},
  number       = {8},
  publisher    = {Rockefeller University Press},
  title        = {{Plan your trip before you leave: The neutrophils’ search-and-run journey}},
  doi          = {10.1083/jcb.202206127},
  volume       = {221},
  year         = {2022},
}

@article{11420,
  abstract     = {Understanding the properties of neural networks trained via stochastic gradient descent (SGD) is at the heart of the theory of deep learning. In this work, we take a mean-field view, and consider a two-layer ReLU network trained via noisy-SGD for a univariate regularized regression problem. Our main result is that SGD with vanishingly small noise injected in the gradients is biased towards a simple solution: at convergence, the ReLU network implements a piecewise linear map of the inputs, and the number of “knot” points -- i.e., points where the tangent of the ReLU network estimator changes -- between two consecutive training inputs is at most three. In particular, as the number of neurons of the network grows, the SGD dynamics is captured by the solution of a gradient flow and, at convergence, the distribution of the weights approaches the unique minimizer of a related free energy, which has a Gibbs form. Our key technical contribution consists in the analysis of the estimator resulting from this minimizer: we show that its second derivative vanishes everywhere, except at some specific locations which represent the “knot” points. We also provide empirical evidence that knots at locations distinct from the data points might occur, as predicted by our theory.},
  author       = {Shevchenko, Aleksandr and Kungurtsev, Vyacheslav and Mondelli, Marco},
  issn         = {1533-7928},
  journal      = {Journal of Machine Learning Research},
  number       = {130},
  pages        = {1--55},
  publisher    = {Journal of Machine Learning Research},
  title        = {{Mean-field analysis of piecewise linear solutions for wide ReLU networks}},
  volume       = {23},
  year         = {2022},
}

@article{12186,
  abstract     = {Activation of cell-surface and intracellular receptor-mediated immunity results in rapid transcriptional reprogramming that underpins disease resistance. However, the mechanisms by which co-activation of both immune systems lead to transcriptional changes are not clear. Here, we combine RNA-seq and ATAC-seq to define changes in gene expression and chromatin accessibility. Activation of cell-surface or intracellular receptor-mediated immunity, or both, increases chromatin accessibility at induced defence genes. Analysis of ATAC-seq and RNA-seq data combined with publicly available information on transcription factor DNA-binding motifs enabled comparison of individual gene regulatory networks activated by cell-surface or intracellular receptor-mediated immunity, or by both. These results and analyses reveal overlapping and conserved transcriptional regulatory mechanisms between the two immune systems.},
  author       = {Ding, Pingtao and Sakai, Toshiyuki and Krishna Shrestha, Ram and Manosalva Perez, Nicolas and Guo, Wenbin and Ngou, Bruno Pok Man and He, Shengbo and Liu, Chang and Feng, Xiaoqi and Zhang, Runxuan and Vandepoele, Klaas and MacLean, Dan and Jones, Jonathan D G},
  issn         = {0022-0957},
  journal      = {Journal of Experimental Botany},
  keywords     = {Plant Science, Physiology},
  number       = {22},
  pages        = {7927--7941},
  publisher    = {Oxford University Press},
  title        = {{Chromatin accessibility landscapes activated by cell-surface and intracellular immune receptors}},
  doi          = {10.1093/jxb/erab373},
  volume       = {72},
  year         = {2021},
}

@article{14117,
  abstract     = {The two fields of machine learning and graphical causality arose and are developed separately. However, there is, now, cross-pollination and increasing interest in both fields to benefit from the advances of the other. In this article, we review fundamental concepts of causal inference and relate them to crucial open problems of machine learning, including transfer and generalization, thereby assaying how causality can contribute to modern machine learning research. This also applies in the opposite direction: we note that most work in causality starts from the premise that the causal variables are given. A central problem for AI and causality is, thus, causal representation learning, that is, the discovery of high-level causal variables from low-level observations. Finally, we delineate some implications of causality for machine learning and propose key research areas at the intersection of both communities.},
  author       = {Scholkopf, Bernhard and Locatello, Francesco and Bauer, Stefan and Ke, Nan Rosemary and Kalchbrenner, Nal and Goyal, Anirudh and Bengio, Yoshua},
  issn         = {1558-2256},
  journal      = {Proceedings of the IEEE},
  keywords     = {Electrical and Electronic Engineering},
  number       = {5},
  pages        = {612--634},
  publisher    = {Institute of Electrical and Electronics Engineers},
  title        = {{Toward causal representation learning}},
  doi          = {10.1109/jproc.2021.3058954},
  volume       = {109},
  year         = {2021},
}

@inproceedings{14176,
  abstract     = {Intensive care units (ICU) are increasingly looking towards machine learning for methods to provide online monitoring of critically ill patients. In machine learning, online monitoring is often formulated as a supervised learning problem. Recently, contrastive learning approaches have demonstrated promising improvements over competitive supervised benchmarks. These methods rely on well-understood data augmentation techniques developed for image data which do not apply to online monitoring. In this work, we overcome this limitation by
supplementing time-series data augmentation techniques with a novel contrastive
learning objective which we call neighborhood contrastive learning (NCL). Our objective explicitly groups together contiguous time segments from each patient while maintaining state-specific information. Our experiments demonstrate a marked improvement over existing work applying contrastive methods to medical time-series.},
  author       = {Yèche, Hugo and Dresdner, Gideon and Locatello, Francesco and Hüser, Matthias and Rätsch, Gunnar},
  booktitle    = {Proceedings of 38th International Conference on Machine Learning},
  location     = {Virtual},
  pages        = {11964--11974},
  publisher    = {ML Research Press},
  title        = {{Neighborhood contrastive learning applied to online patient monitoring}},
  volume       = {139},
  year         = {2021},
}

@inproceedings{14177,
  abstract     = {The focus of disentanglement approaches has been on identifying independent factors of variation in data. However, the causal variables underlying real-world observations are often not statistically independent. In this work, we bridge the gap to real-world scenarios by analyzing the behavior of the most prominent disentanglement approaches on correlated data in a large-scale empirical study (including 4260 models). We show and quantify that systematically induced correlations in the dataset are being learned and reflected in the latent representations, which has implications for downstream applications of disentanglement such as fairness. We also demonstrate how to resolve these latent correlations, either using weak supervision during
training or by post-hoc correcting a pre-trained model with a small number of labels.},
  author       = {Träuble, Frederik and Creager, Elliot and Kilbertus, Niki and Locatello, Francesco and Dittadi, Andrea and Goyal, Anirudh and Schölkopf, Bernhard and Bauer, Stefan},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning},
  location     = {Virtual},
  pages        = {10401--10412},
  publisher    = {ML Research Press},
  title        = {{On disentangled representations learned from correlated data}},
  volume       = {139},
  year         = {2021},
}

@inproceedings{14178,
  abstract     = {Learning meaningful representations that disentangle the underlying structure of the data generating process is considered to be of key importance in machine learning. While disentangled representations were found to be useful for diverse tasks such as abstract reasoning and fair classification, their scalability and real-world impact remain questionable. We introduce a new high-resolution dataset with 1M simulated images and over 1,800 annotated real-world images of the same setup. In contrast to previous work, this new dataset exhibits correlations, a complex underlying structure, and allows to evaluate transfer to unseen simulated and real-world settings where the encoder i) remains in distribution or ii) is out of distribution. We propose new architectures in order to scale disentangled representation learning to realistic high-resolution settings and conduct a large-scale empirical study of disentangled representations on this dataset. We observe that disentanglement is a good predictor for out-of-distribution (OOD) task performance.},
  author       = {Dittadi, Andrea and Träuble, Frederik and Locatello, Francesco and Wüthrich, Manuel and Agrawal, Vaibhav and Winther, Ole and Bauer, Stefan and Schölkopf, Bernhard},
  booktitle    = {The Ninth International Conference on Learning Representations},
  location     = {Virtual},
  title        = {{On the transfer of disentangled representations in realistic settings}},
  year         = {2021},
}

@inproceedings{14179,
  abstract     = {Self-supervised representation learning has shown remarkable success in a number of domains. A common practice is to perform data augmentation via hand-crafted transformations intended to leave the semantics of the data invariant. We seek to understand the empirical success of this approach from a theoretical perspective. We formulate the augmentation process as a latent variable model by postulating a partition of the latent representation into a content component, which is assumed invariant to augmentation, and a style component, which is allowed to change. Unlike prior work on disentanglement and independent component analysis, we allow for both nontrivial statistical and causal dependencies in the latent space. We study the identifiability of the latent representation based on pairs of views of the observations and prove sufficient conditions that allow us to identify the invariant content partition up to an invertible mapping in both generative and discriminative settings. We find numerical simulations with dependent latent variables are consistent with our theory. Lastly, we introduce Causal3DIdent, a dataset of high-dimensional, visually complex images with rich causal dependencies, which we use to study the effect of data augmentations performed in practice.},
  author       = {Kügelgen, Julius von and Sharma, Yash and Gresele, Luigi and Brendel, Wieland and Schölkopf, Bernhard and Besserve, Michel and Locatello, Francesco},
  booktitle    = {Advances in Neural Information Processing Systems},
  isbn         = {9781713845393},
  location     = {Virtual},
  pages        = {16451--16467},
  title        = {{Self-supervised learning with data augmentations provably isolates content from style}},
  volume       = {34},
  year         = {2021},
}

@inproceedings{14180,
  abstract     = {Modern neural network architectures can leverage large amounts of data to generalize well within the training distribution. However, they are less capable of systematic generalization to data drawn from unseen but related distributions, a feat that is hypothesized to require compositional reasoning and reuse of knowledge. In this work, we present Neural Interpreters, an architecture that factorizes inference in a self-attention network as a system of modules, which we call \emph{functions}. Inputs to the model are routed through a sequence of functions in a way that is end-to-end learned. The proposed architecture can flexibly compose computation along width and depth, and lends itself well to capacity extension after training. To demonstrate the versatility of Neural Interpreters, we evaluate it in two distinct settings: image classification and visual abstract reasoning on Raven Progressive Matrices. In the former, we show that Neural Interpreters perform on par with the vision transformer using fewer parameters, while being transferrable to a new task in a sample efficient manner. In the latter, we find that Neural Interpreters are competitive with respect to the state-of-the-art in terms of systematic generalization. },
  author       = {Rahaman, Nasim and Gondal, Muhammad Waleed and Joshi, Shruti and Gehler, Peter and Bengio, Yoshua and Locatello, Francesco and Schölkopf, Bernhard},
  booktitle    = {Advances in Neural Information Processing Systems},
  isbn         = {9781713845393},
  location     = {Virtual},
  pages        = {10985--10998},
  title        = {{Dynamic inference with neural interpreters}},
  volume       = {34},
  year         = {2021},
}

@inproceedings{14181,
  abstract     = {Variational Inference makes a trade-off between the capacity of the variational family and the tractability of finding an approximate posterior distribution. Instead, Boosting Variational Inference allows practitioners to obtain increasingly good posterior approximations by spending more compute. The main obstacle to widespread adoption of Boosting Variational Inference is the amount of resources necessary to improve over a strong Variational Inference baseline. In our work, we trace this limitation back to the global curvature of the KL-divergence. We characterize how the global curvature impacts time and memory consumption, address the problem with the notion of local curvature, and provide a novel approximate backtracking algorithm for estimating local curvature. We give new theoretical convergence rates for our algorithms and provide experimental validation on synthetic and real-world datasets.},
  author       = {Dresdner, Gideon and Shekhar, Saurav and Pedregosa, Fabian and Locatello, Francesco and Rätsch, Gunnar},
  booktitle    = {Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence},
  location     = {Montreal, Canada},
  pages        = {2337--2343},
  publisher    = {International Joint Conferences on Artificial Intelligence},
  title        = {{Boosting variational inference with locally adaptive step-sizes}},
  doi          = {10.24963/ijcai.2021/322},
  year         = {2021},
}

@inproceedings{14182,
  abstract     = {When machine learning systems meet real world applications, accuracy is only
one of several requirements. In this paper, we assay a complementary
perspective originating from the increasing availability of pre-trained and
regularly improving state-of-the-art models. While new improved models develop
at a fast pace, downstream tasks vary more slowly or stay constant. Assume that
we have a large unlabelled data set for which we want to maintain accurate
predictions. Whenever a new and presumably better ML models becomes available,
we encounter two problems: (i) given a limited budget, which data points should
be re-evaluated using the new model?; and (ii) if the new predictions differ
from the current ones, should we update? Problem (i) is about compute cost,
which matters for very large data sets and models. Problem (ii) is about
maintaining consistency of the predictions, which can be highly relevant for
downstream applications; our demand is to avoid negative flips, i.e., changing
correct to incorrect predictions. In this paper, we formalize the Prediction
Update Problem and present an efficient probabilistic approach as answer to the
above questions. In extensive experiments on standard classification benchmark
data sets, we show that our method outperforms alternative strategies along key
metrics for backward-compatible prediction updates.},
  author       = {Träuble, Frederik and Kügelgen, Julius von and Kleindessner, Matthäus and Locatello, Francesco and Schölkopf, Bernhard and Gehler, Peter},
  booktitle    = {35th Conference on Neural Information Processing Systems},
  isbn         = {9781713845393},
  location     = {Virtual},
  pages        = {116--128},
  title        = {{Backward-compatible prediction updates: A probabilistic approach}},
  volume       = {34},
  year         = {2021},
}

@misc{14185,
  abstract     = {A method involves receiving a perceptual representation including a plurality of feature vectors, and initializing a plurality of slot vectors represented by a neural network memory unit. Each respective slot vector is configured to represent a corresponding entity in the perceptual representation. The method also involves determining an attention matrix based on a product of the plurality of feature vectors transformed by a key function and the plurality of slot vectors transformed by a query function. Each respective value of a plurality of values along each respective dimension of the attention matrix is normalized with respect to the plurality of values. The method additionally involves determining an update matrix based on the plurality of feature vectors transformed by a value function and the attention matrix, and updating the plurality of slot vectors based on the update matrix by way of the neural network memory unit.},
  author       = {Weissenborn, Dirk and Uszkoreit, Jakob and Unterthiner, Thomas and Mahendran, Aravindh and Locatello, Francesco and Kipf, Thomas and Heigold, Georg and Dosovitskiy, Alexey},
  title        = {{Object-centric learning with slot attention}},
  year         = {2021},
}