@article{12238,
  abstract     = {Upon the initiation of collective cell migration, the cells at the free edge are specified as leader cells; however, the mechanism underlying the leader cell specification remains elusive. Here, we show that lamellipodial extension after the release from mechanical confinement causes sustained extracellular signal-regulated kinase (ERK) activation and underlies the leader cell specification. Live-imaging of Madin-Darby canine kidney (MDCK) cells and mouse epidermis through the use of Förster resonance energy transfer (FRET)-based biosensors showed that leader cells exhibit sustained ERK activation in a hepatocyte growth factor (HGF)-dependent manner. Meanwhile, follower cells exhibit oscillatory ERK activation waves in an epidermal growth factor (EGF) signaling-dependent manner. Lamellipodial extension at the free edge increases the cellular sensitivity to HGF. The HGF-dependent ERK activation, in turn, promotes lamellipodial extension, thereby forming a positive feedback loop between cell extension and ERK activation and specifying the cells at the free edge as the leader cells. Our findings show that the integration of physical and biochemical cues underlies the leader cell specification during collective cell migration.},
  author       = {Hino, Naoya and Matsuda, Kimiya and Jikko, Yuya and Maryu, Gembu and Sakai, Katsuya and Imamura, Ryu and Tsukiji, Shinya and Aoki, Kazuhiro and Terai, Kenta and Hirashima, Tsuyoshi and Trepat, Xavier and Matsuda, Michiyuki},
  issn         = {1534-5807},
  journal      = {Developmental Cell},
  keywords     = {Developmental Biology, Cell Biology, General Biochemistry, Genetics and Molecular Biology, Molecular Biology},
  number       = {19},
  pages        = {2290--2304.e7},
  publisher    = {Elsevier},
  title        = {{A feedback loop between lamellipodial extension and HGF-ERK signaling specifies leader cells during collective cell migration}},
  doi          = {10.1016/j.devcel.2022.09.003},
  volume       = {57},
  year         = {2022},
}

@article{12239,
  abstract     = {Biological systems are the sum of their dynamic three-dimensional (3D) parts. Therefore, it is critical to study biological structures in 3D and at high resolution to gain insights into their physiological functions. Electron microscopy of metal replicas of unroofed cells and isolated organelles has been a key technique to visualize intracellular structures at nanometer resolution. However, many of these methods require specialized equipment and personnel to complete them. Here, we present novel accessible methods to analyze biological structures in unroofed cells and biochemically isolated organelles in 3D and at nanometer resolution, focusing on Arabidopsis clathrin-coated vesicles (CCVs). While CCVs are essential trafficking organelles, their detailed structural information is lacking due to their poor preservation when observed via classical electron microscopy protocols experiments. First, we establish a method to visualize CCVs in unroofed cells using scanning transmission electron microscopy tomography, providing sufficient resolution to define the clathrin coat arrangements. Critically, the samples are prepared directly on electron microscopy grids, removing the requirement to use extremely corrosive acids, thereby enabling the use of this method in any electron microscopy lab. Secondly, we demonstrate that this standardized sample preparation allows the direct comparison of isolated CCV samples with those visualized in cells. Finally, to facilitate the high-throughput and robust screening of metal replicated samples, we provide a deep learning analysis method to screen the “pseudo 3D” morphologies of CCVs imaged with 2D modalities. Collectively, our work establishes accessible ways to examine the 3D structure of biological samples and provide novel insights into the structure of plant CCVs.},
  author       = {Johnson, Alexander J and Kaufmann, Walter and Sommer, Christoph M and Costanzo, Tommaso and Dahhan, Dana A. and Bednarek, Sebastian Y. and Friml, Jiří},
  issn         = {1674-2052},
  journal      = {Molecular Plant},
  keywords     = {Plant Science, Molecular Biology},
  number       = {10},
  pages        = {1533--1542},
  publisher    = {Elsevier},
  title        = {{Three-dimensional visualization of planta clathrin-coated vesicles at ultrastructural resolution}},
  doi          = {10.1016/j.molp.2022.09.003},
  volume       = {15},
  year         = {2022},
}

@article{12243,
  abstract     = {We consider the eigenvalues of a large dimensional real or complex Ginibre matrix in the region of the complex plane where their real parts reach their maximum value. This maximum follows the Gumbel distribution and that these extreme eigenvalues form a Poisson point process as the dimension asymptotically tends to infinity. In the complex case, these facts have already been established by Bender [Probab. Theory Relat. Fields 147, 241 (2010)] and in the real case by Akemann and Phillips [J. Stat. Phys. 155, 421 (2014)] even for the more general elliptic ensemble with a sophisticated saddle point analysis. The purpose of this article is to give a very short direct proof in the Ginibre case with an effective error term. Moreover, our estimates on the correlation kernel in this regime serve as a key input for accurately locating [Formula: see text] for any large matrix X with i.i.d. entries in the companion paper [G. Cipolloni et al., arXiv:2206.04448 (2022)]. },
  author       = {Cipolloni, Giorgio and Erdös, László and Schröder, Dominik J and Xu, Yuanyuan},
  issn         = {1089-7658},
  journal      = {Journal of Mathematical Physics},
  keywords     = {Mathematical Physics, Statistical and Nonlinear Physics},
  number       = {10},
  publisher    = {AIP Publishing},
  title        = {{Directional extremal statistics for Ginibre eigenvalues}},
  doi          = {10.1063/5.0104290},
  volume       = {63},
  year         = {2022},
}

@article{12245,
  abstract     = {MicroRNAs (miRs) have an important role in tuning dynamic gene expression. However, the mechanism by which they are quantitatively controlled is unknown. We show that the amount of mature miR-9, a key regulator of neuronal development, increases during zebrafish neurogenesis in a sharp stepwise manner. We characterize the spatiotemporal profile of seven distinct microRNA primary transcripts (pri-mir)-9s that produce the same mature miR-9 and show that they are sequentially expressed during hindbrain neurogenesis. Expression of late-onset pri-mir-9-1 is added on to, rather than replacing, the expression of early onset pri-mir-9-4 and -9-5 in single cells. CRISPR/Cas9 mutation of the late-onset pri-mir-9-1 prevents the developmental increase of mature miR-9, reduces late neuronal differentiation and fails to downregulate Her6 at late stages. Mathematical modelling shows that an adaptive network containing Her6 is insensitive to linear increases in miR-9 but responds to stepwise increases of miR-9. We suggest that a sharp stepwise increase of mature miR-9 is created by sequential and additive temporal activation of distinct loci. This may be a strategy to overcome adaptation and facilitate a transition of Her6 to a new dynamic regime or steady state.},
  author       = {Soto, Ximena and Burton, Joshua and Manning, Cerys S. and Minchington, Thomas and Lea, Robert and Lee, Jessica and Kursawe, Jochen and Rattray, Magnus and Papalopulu, Nancy},
  issn         = {1477-9129},
  journal      = {Development},
  keywords     = {Developmental Biology, Molecular Biology},
  number       = {19},
  publisher    = {The Company of Biologists},
  title        = {{Sequential and additive expression of miR-9 precursors control timing of neurogenesis}},
  doi          = {10.1242/dev.200474},
  volume       = {149},
  year         = {2022},
}

@article{12246,
  abstract     = {The Lieb–Oxford inequality provides a lower bound on the Coulomb energy of a classical system of N identical charges only in terms of their one-particle density. We prove here a new estimate on the best constant in this inequality. Numerical evaluation provides the value 1.58, which is a significant improvement to the previously known value 1.64. The best constant has recently been shown to be larger than 1.44. In a second part, we prove that the constant can be reduced to 1.25 when the inequality is restricted to Hartree–Fock states. This is the first proof that the exchange term is always much lower than the full indirect Coulomb energy.},
  author       = {Lewin, Mathieu and Lieb, Elliott H. and Seiringer, Robert},
  issn         = {1573-0530},
  journal      = {Letters in Mathematical Physics},
  keywords     = {Mathematical Physics, Statistical and Nonlinear Physics},
  number       = {5},
  publisher    = {Springer Nature},
  title        = {{Improved Lieb–Oxford bound on the indirect and exchange energies}},
  doi          = {10.1007/s11005-022-01584-5},
  volume       = {112},
  year         = {2022},
}

@article{12247,
  abstract     = {Chromosomal inversions have been shown to play a major role in a local adaptation by suppressing recombination between alternative arrangements and maintaining beneficial allele combinations. However, so far, their importance relative to the remaining genome remains largely unknown. Understanding the genetic architecture of adaptation requires better estimates of how loci of different effect sizes contribute to phenotypic variation. Here, we used three Swedish islands where the marine snail Littorina saxatilis has repeatedly evolved into two distinct ecotypes along a habitat transition. We estimated the contribution of inversion polymorphisms to phenotypic divergence while controlling for polygenic effects in the remaining genome using a quantitative genetics framework. We confirmed the importance of inversions but showed that contributions of loci outside inversions are of similar magnitude, with variable proportions dependent on the trait and the population. Some inversions showed consistent effects across all sites, whereas others exhibited site-specific effects, indicating that the genomic basis for replicated phenotypic divergence is only partly shared. The contributions of sexual dimorphism as well as environmental factors to phenotypic variation were significant but minor compared to inversions and polygenic background. Overall, this integrated approach provides insight into the multiple mechanisms contributing to parallel phenotypic divergence.},
  author       = {Koch, Eva L. and Ravinet, Mark and Westram, Anja M and Johannesson, Kerstin and Butlin, Roger K.},
  issn         = {1558-5646},
  journal      = {Evolution},
  keywords     = {General Agricultural and Biological Sciences, Genetics, Ecology, Evolution, Behavior and Systematics},
  number       = {10},
  pages        = {2332--2346},
  publisher    = {Wiley},
  title        = {{Genetic architecture of repeated phenotypic divergence in Littorina saxatilis evolution}},
  doi          = {10.1111/evo.14602},
  volume       = {76},
  year         = {2022},
}

@article{12249,
  abstract     = {The chemical potential of a component in a solution is defined as the free energy change as the amount of that component changes. Computing this fundamental thermodynamic property from atomistic simulations is notoriously difficult because of the convergence issues involved in free energy methods and finite size effects. This Communication presents the so-called S0 method, which can be used to obtain chemical potentials from static structure factors computed from equilibrium molecular dynamics simulations under the isothermal–isobaric ensemble. This new method is demonstrated on the systems of binary Lennard-Jones particles, urea–water mixtures, a NaCl aqueous solution, and a high-pressure carbon–hydrogen mixture. },
  author       = {Cheng, Bingqing},
  issn         = {1089-7690},
  journal      = {The Journal of Chemical Physics},
  keywords     = {Physical and Theoretical Chemistry, General Physics and Astronomy},
  number       = {12},
  publisher    = {AIP Publishing},
  title        = {{Computing chemical potentials of solutions from structure factors}},
  doi          = {10.1063/5.0107059},
  volume       = {157},
  year         = {2022},
}

@article{12251,
  abstract     = {Amyloid formation is linked to devastating neurodegenerative diseases, motivating detailed studies of the mechanisms of amyloid formation. For Aβ, the peptide associated with Alzheimer’s disease, the mechanism and rate of aggregation have been established for a range of variants and conditions <jats:italic>in vitro</jats:italic> and in bodily fluids. A key outstanding question is how the relative stabilities of monomers, fibrils and intermediates affect each step in the fibril formation process. By monitoring the kinetics of aggregation of Aβ42, in the presence of urea or guanidinium hydrochloride (GuHCl), we here determine the rates of the underlying microscopic steps and establish the importance of changes in relative stability induced by the presence of denaturant for each individual step. Denaturants shift the equilibrium towards the unfolded state of each species. We find that a non-ionic denaturant, urea, reduces the overall aggregation rate, and that the effect on nucleation is stronger than the effect on elongation. Urea reduces the rate of secondary nucleation by decreasing the coverage of fibril surfaces and the rate of nucleus formation. It also reduces the rate of primary nucleation, increasing its reaction order. The ionic denaturant, GuHCl, accelerates the aggregation at low denaturant concentrations and decelerates the aggregation at high denaturant concentrations. Below approximately 0.25 M GuHCl, the screening of repulsive electrostatic interactions between peptides by the charged denaturant dominates, leading to an increased aggregation rate. At higher GuHCl concentrations, the electrostatic repulsion is completely screened, and the denaturing effect dominates. The results illustrate how the differential effects of denaturants on stability of monomer, oligomer and fibril translate to differential effects on microscopic steps, with the rate of nucleation being most strongly reduced.},
  author       = {Weiffert, Tanja and Meisl, Georg and Curk, Samo and Cukalevski, Risto and Šarić, Anđela and Knowles, Tuomas P. J. and Linse, Sara},
  issn         = {1662-453X},
  journal      = {Frontiers in Neuroscience},
  keywords     = {General Neuroscience},
  publisher    = {Frontiers Media},
  title        = {{Influence of denaturants on amyloid β42 aggregation kinetics}},
  doi          = {10.3389/fnins.2022.943355},
  volume       = {16},
  year         = {2022},
}

@article{12252,
  abstract     = {The COVID−19 pandemic not only resulted in a global crisis, but also accelerated vaccine development and antibody discovery. Herein we report a synthetic humanized VHH library development pipeline for nanomolar-range affinity VHH binders to SARS-CoV-2 variants of concern (VoC) receptor binding domains (RBD) isolation. Trinucleotide-based randomization of CDRs by Kunkel mutagenesis with the subsequent rolling-cycle amplification resulted in more than 10<jats:sup>11</jats:sup> diverse phage display library in a manageable for a single person number of electroporation reactions. We identified a number of nanomolar-range affinity VHH binders to SARS-CoV-2 variants of concern (VoC) receptor binding domains (RBD) by screening a novel synthetic humanized antibody library. In order to explore the most robust and fast method for affinity improvement, we performed affinity maturation by CDR1 and CDR2 shuffling and avidity engineering by multivalent trimeric VHH fusion protein construction. As a result, H7-Fc and G12x3-Fc binders were developed with the affinities in nM and pM range respectively. Importantly, these affinities are weakly influenced by most of SARS-CoV-2 VoC mutations and they retain moderate binding to BA.4\5. The plaque reduction neutralization test (PRNT) resulted in IC50 = 100 ng\ml and 9.6 ng\ml for H7-Fc and G12x3-Fc antibodies, respectively, for the emerging Omicron BA.1 variant. Therefore, these VHH could expand the present landscape of SARS-CoV-2 neutralization binders with the therapeutic potential for present and future SARS-CoV-2 variants.},
  author       = {Dormeshkin, Dmitri and Shapira, Michail and Dubovik, Simon and Kavaleuski, Anton and Katsin, Mikalai and Migas, Alexandr and Meleshko, Alexander and Semyonov, Sergei},
  issn         = {1664-3224},
  journal      = {Frontiers in Immunology},
  keywords     = {Immunology, Immunology and Allergy, COVID-19, SARS-CoV-2, synthetic library, RBD, neutralization nanobody, VHH},
  publisher    = {Frontiers Media},
  title        = {{Isolation of an escape-resistant SARS-CoV-2 neutralizing nanobody from a novel synthetic nanobody library}},
  doi          = {10.3389/fimmu.2022.965446},
  volume       = {13},
  year         = {2022},
}

@article{12253,
  abstract     = {The sculpting of germ layers during gastrulation relies on the coordinated migration of progenitor cells, yet the cues controlling these long-range directed movements remain largely unknown. While directional migration often relies on a chemokine gradient generated from a localized source, we find that zebrafish ventrolateral mesoderm is guided by a self-generated gradient of the initially uniformly expressed and secreted protein Toddler/ELABELA/Apela. We show that the Apelin receptor, which is specifically expressed in mesodermal cells, has a dual role during gastrulation, acting as a scavenger receptor to generate a Toddler gradient, and as a chemokine receptor to sense this guidance cue. Thus, we uncover a single receptor–based self-generated gradient as the enigmatic guidance cue that can robustly steer the directional migration of mesoderm through the complex and continuously changing environment of the gastrulating embryo.},
  author       = {Stock, Jessica and Kazmar, Tomas and Schlumm, Friederike and Hannezo, Edouard B and Pauli, Andrea},
  issn         = {2375-2548},
  journal      = {Science Advances},
  number       = {37},
  publisher    = {American Association for the Advancement of Science},
  title        = {{A self-generated Toddler gradient guides mesodermal cell migration}},
  doi          = {10.1126/sciadv.add2488},
  volume       = {8},
  year         = {2022},
}

@article{12259,
  abstract     = {Theoretical foundations of chaos have been predominantly laid out for finite-dimensional dynamical systems, such as the three-body problem in classical mechanics and the Lorenz model in dissipative systems. In contrast, many real-world chaotic phenomena, e.g., weather, arise in systems with many (formally infinite) degrees of freedom, which limits direct quantitative analysis of such systems using chaos theory. In the present work, we demonstrate that the hydrodynamic pilot-wave systems offer a bridge between low- and high-dimensional chaotic phenomena by allowing for a systematic study of how the former connects to the latter. Specifically, we present experimental results, which show the formation of low-dimensional chaotic attractors upon destabilization of regular dynamics and a final transition to high-dimensional chaos via the merging of distinct chaotic regions through a crisis bifurcation. Moreover, we show that the post-crisis dynamics of the system can be rationalized as consecutive scatterings from the nonattracting chaotic sets with lifetimes following exponential distributions. },
  author       = {Choueiri, George H and Suri, Balachandra and Merrin, Jack and Serbyn, Maksym and Hof, Björn and Budanur, Nazmi B},
  issn         = {1089-7682},
  journal      = {Chaos: An Interdisciplinary Journal of Nonlinear Science},
  keywords     = {Applied Mathematics, General Physics and Astronomy, Mathematical Physics, Statistical and Nonlinear Physics},
  number       = {9},
  publisher    = {AIP Publishing},
  title        = {{Crises and chaotic scattering in hydrodynamic pilot-wave experiments}},
  doi          = {10.1063/5.0102904},
  volume       = {32},
  year         = {2022},
}

@inproceedings{14093,
  abstract     = { We propose a stochastic conditional gradient method (CGM) for minimizing convex finite-sum objectives formed as a sum of smooth and non-smooth terms. Existing CGM variants for this template either suffer from slow convergence rates, or require carefully increasing the batch size over the course of the algorithm’s execution, which leads to computing full gradients. In contrast, the proposed method, equipped with a stochastic average gradient (SAG) estimator, requires only one sample per iteration. Nevertheless, it guarantees fast convergence rates on par with more sophisticated variance reduction techniques. In applications we put special emphasis on problems with a large number of separable constraints. Such problems are prevalent among semidefinite programming (SDP) formulations arising in machine learning and theoretical computer science. We provide numerical experiments on matrix completion, unsupervised clustering, and sparsest-cut SDPs. },
  author       = {Dresdner, Gideon and Vladarean, Maria-Luiza and Rätsch, Gunnar and Locatello, Francesco and Cevher, Volkan and Yurtsever, Alp},
  booktitle    = {Proceedings of the 25th International Conference on Artificial Intelligence and Statistics},
  issn         = {2640-3498},
  location     = {Virtual},
  pages        = {8439--8457},
  publisher    = {ML Research Press},
  title        = {{ Faster one-sample stochastic conditional gradient method for composite convex minimization}},
  volume       = {151},
  year         = {2022},
}

@inproceedings{14106,
  abstract     = {We show that deep networks trained to satisfy demographic parity often do so
through a form of race or gender awareness, and that the more we force a network
to be fair, the more accurately we can recover race or gender from the internal state
of the network. Based on this observation, we investigate an alternative fairness
approach: we add a second classification head to the network to explicitly predict
the protected attribute (such as race or gender) alongside the original task. After
training the two-headed network, we enforce demographic parity by merging the
two heads, creating a network with the same architecture as the original network.
We establish a close relationship between existing approaches and our approach
by showing (1) that the decisions of a fair classifier are well-approximated by our
approach, and (2) that an unfair and optimally accurate classifier can be recovered
from a fair classifier and our second head predicting the protected attribute. We use
our explicit formulation to argue that the existing fairness approaches, just as ours,
demonstrate disparate treatment and that they are likely to be unlawful in a wide
range of scenarios under US law.},
  author       = {Lohaus, Michael and Kleindessner, Matthäus and Kenthapadi, Krishnaram and Locatello, Francesco and Russell, Chris},
  booktitle    = {36th Conference on Neural Information Processing Systems},
  isbn         = {9781713871088},
  location     = {New Orleans, LA, United States},
  pages        = {16548--16562},
  publisher    = {Neural Information Processing Systems Foundation},
  title        = {{Are two heads the same as one? Identifying disparate treatment in fair neural networks}},
  volume       = {35},
  year         = {2022},
}

@inproceedings{14107,
  abstract     = {Amodal perception requires inferring the full shape of an object that is partially occluded. This task is particularly challenging on two levels: (1) it requires more information than what is contained in the instant retina or imaging sensor, (2) it is difficult to obtain enough well-annotated amodal labels for supervision. To this end, this paper develops a new framework of
Self-supervised amodal Video object segmentation (SaVos). Our method efficiently leverages the visual information of video temporal sequences to infer the amodal mask of objects. The key intuition is that the occluded part of an object can be explained away if that part is visible in other frames, possibly deformed as long as the deformation can be reasonably learned.
Accordingly, we derive a novel self-supervised learning paradigm that efficiently utilizes the visible object parts as the supervision to guide the training on videos. In addition to learning type prior to complete masks for known types, SaVos also learns the spatiotemporal prior, which is also useful for the amodal task and could generalize to unseen types. The proposed
framework achieves the state-of-the-art performance on the synthetic amodal segmentation benchmark FISHBOWL and the real world benchmark KINS-Video-Car. Further, it lends itself well to being transferred to novel distributions using test-time adaptation, outperforming existing models even after the transfer to a new distribution.},
  author       = {Yao, Jian and Hong, Yuxin and Wang, Chiyu and Xiao, Tianjun and He, Tong and Locatello, Francesco and Wipf, David and Fu, Yanwei and Zhang, Zheng},
  booktitle    = {36th Conference on Neural Information Processing Systems},
  location     = {New Orleans, LA, United States},
  title        = {{Self-supervised amodal video object segmentation}},
  doi          = {10.48550/arXiv.2210.12733},
  year         = {2022},
}

@inproceedings{14114,
  abstract     = {Algorithmic fairness is frequently motivated in terms of a trade-off in which overall performance is decreased so as to improve performance on disadvantaged groups where the algorithm would otherwise be less accurate. Contrary to this, we find that applying existing fairness approaches to computer vision improve fairness by degrading the performance of classifiers across all groups (with increased degradation on the best performing groups). Extending the bias-variance decomposition for classification to fairness, we theoretically explain why the majority of fairness methods designed for low capacity models should not be used in settings involving high-capacity models, a scenario common to computer vision. We corroborate this analysis with extensive experimental support that shows that many of the fairness heuristics used in computer vision also degrade performance on the most disadvantaged groups. Building on these insights, we propose an adaptive augmentation strategy that, uniquely, of all methods tested, improves performance for the disadvantaged groups.},
  author       = {Zietlow, Dominik and Lohaus, Michael and Balakrishnan, Guha and Kleindessner, Matthaus and Locatello, Francesco and Scholkopf, Bernhard and Russell, Chris},
  booktitle    = {2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  isbn         = {9781665469470},
  issn         = {2575-7075},
  location     = {New Orleans, LA, United States},
  pages        = {10400--10411},
  publisher    = {Institute of Electrical and Electronics Engineers},
  title        = {{Leveling down in computer vision: Pareto inefficiencies in fair deep classifiers}},
  doi          = {10.1109/cvpr52688.2022.01016},
  year         = {2022},
}

@inproceedings{14168,
  abstract     = {Recent work has seen the development of general purpose neural architectures
that can be trained to perform tasks across diverse data modalities. General
purpose models typically make few assumptions about the underlying
data-structure and are known to perform well in the large-data regime. At the
same time, there has been growing interest in modular neural architectures that
represent the data using sparsely interacting modules. These models can be more
robust out-of-distribution, computationally efficient, and capable of
sample-efficient adaptation to new data. However, they tend to make
domain-specific assumptions about the data, and present challenges in how
module behavior (i.e., parameterization) and connectivity (i.e., their layout)
can be jointly learned. In this work, we introduce a general purpose, yet
modular neural architecture called Neural Attentive Circuits (NACs) that
jointly learns the parameterization and a sparse connectivity of neural modules
without using domain knowledge. NACs are best understood as the combination of
two systems that are jointly trained end-to-end: one that determines the module
configuration and the other that executes it on an input. We demonstrate
qualitatively that NACs learn diverse and meaningful module configurations on
the NLVR2 dataset without additional supervision. Quantitatively, we show that
by incorporating modularity in this way, NACs improve upon a strong non-modular
baseline in terms of low-shot adaptation on CIFAR and CUBs dataset by about
10%, and OOD robustness on Tiny ImageNet-R by about 2.5%. Further, we find that
NACs can achieve an 8x speedup at inference time while losing less than 3%
performance. Finally, we find NACs to yield competitive results on diverse data
modalities spanning point-cloud classification, symbolic processing and
text-classification from ASCII bytes, thereby confirming its general purpose
nature.},
  author       = {Rahaman, Nasim and Weiss, Martin and Locatello, Francesco and Pal, Chris and Bengio, Yoshua and Schölkopf, Bernhard and Li, Li Erran and Ballas, Nicolas},
  booktitle    = {36th Conference on Neural Information Processing Systems},
  location     = {New Orleans, United States},
  title        = {{Neural attentive circuits}},
  volume       = {35},
  year         = {2022},
}

@inproceedings{14170,
  abstract     = {The idea behind object-centric representation learning is that natural scenes can better be modeled as compositions of objects and their relations as opposed to distributed representations. This inductive bias can be injected into neural networks to potentially improve systematic generalization and performance of downstream tasks in scenes with multiple objects. In this paper, we train state-of-the-art unsupervised models on five common multi-object datasets and evaluate segmentation metrics and downstream object property prediction. In addition, we study generalization and robustness by investigating the settings where either a single object is out of distribution -- e.g., having an unseen color, texture, or shape -- or global properties of the scene are altered -- e.g., by occlusions, cropping, or increasing the number of objects. From our experimental study, we find object-centric representations to be useful for
downstream tasks and generally robust to most distribution shifts affecting objects. However, when the distribution shift affects the input in a less structured manner, robustness in terms of segmentation and downstream task performance may vary significantly across models and distribution shifts. },
  author       = {Dittadi, Andrea and Papa, Samuele and Vita, Michele De and Schölkopf, Bernhard and Winther, Ole and Locatello, Francesco},
  booktitle    = {Proceedings of the 39th International Conference on Machine Learning},
  location     = {Baltimore, MD, United States},
  pages        = {5221--5285},
  publisher    = {ML Research Press},
  title        = {{Generalization and robustness implications in object-centric learning}},
  volume       = {2022},
  year         = {2022},
}

@inproceedings{14171,
  abstract     = {This paper demonstrates how to recover causal graphs from the score of the
data distribution in non-linear additive (Gaussian) noise models. Using score
matching algorithms as a building block, we show how to design a new generation
of scalable causal discovery methods. To showcase our approach, we also propose
a new efficient method for approximating the score's Jacobian, enabling to
recover the causal graph. Empirically, we find that the new algorithm, called
SCORE, is competitive with state-of-the-art causal discovery methods while
being significantly faster.},
  author       = {Rolland, Paul and Cevher, Volkan and Kleindessner, Matthäus and Russel, Chris and Schölkopf, Bernhard and Janzing, Dominik and Locatello, Francesco},
  booktitle    = {Proceedings of the 39th International Conference on Machine Learning},
  location     = {Baltimore, MD, United States},
  pages        = {18741--18753},
  publisher    = {ML Research Press},
  title        = {{Score matching enables causal discovery of nonlinear additive noise  models}},
  volume       = {162},
  year         = {2022},
}

@inproceedings{14172,
  abstract     = {An important component for generalization in machine learning is to uncover underlying latent factors of variation as well as the mechanism through which each factor acts in the world. In this paper, we test whether 17 unsupervised, weakly supervised, and fully supervised representation learning approaches correctly infer the generative factors of variation in simple datasets (dSprites, Shapes3D, MPI3D) from controlled environments, and on our contributed CelebGlow dataset. In contrast to prior robustness work that introduces novel factors of variation during test time, such as blur or other (un)structured noise, we here recompose, interpolate, or extrapolate only existing factors of variation from the training data set (e.g., small and medium-sized objects during training and large objects during testing). Models
that learn the correct mechanism should be able to generalize to this benchmark. In total, we train and test 2000+ models and observe that all of them struggle to learn the underlying mechanism regardless of supervision signal and architectural bias. Moreover, the generalization capabilities of all tested models drop significantly as we move from artificial datasets towards
more realistic real-world datasets. Despite their inability to identify the correct mechanism, the models are quite modular as their ability to infer other in-distribution factors remains fairly stable, providing only a single factoris out-of-distribution. These results point to an important yet understudied problem of learning mechanistic models of observations that can facilitate
generalization.},
  author       = {Schott, Lukas and Kügelgen, Julius von and Träuble, Frederik and Gehler, Peter and Russell, Chris and Bethge, Matthias and Schölkopf, Bernhard and Locatello, Francesco and Brendel, Wieland},
  booktitle    = {10th International Conference on Learning Representations},
  location     = {Virtual},
  title        = {{Visual representation learning does not generalize strongly within the  same domain}},
  year         = {2022},
}

@inproceedings{14173,
  abstract     = {Since out-of-distribution generalization is a generally ill-posed problem, various proxy targets (e.g., calibration, adversarial robustness, algorithmic corruptions, invariance across shifts) were studied across different research programs resulting in different recommendations. While sharing the same aspirational goal, these approaches have never been tested under the same
experimental conditions on real data. In this paper, we take a unified view of previous work, highlighting message discrepancies that we address empirically, and providing recommendations on how to measure the robustness of a model and how to improve it. To this end, we collect 172 publicly available dataset pairs for training and out-of-distribution evaluation of accuracy, calibration error, adversarial attacks, environment invariance, and synthetic corruptions. We fine-tune over 31k networks, from nine different architectures in the many- and
few-shot setting. Our findings confirm that in- and out-of-distribution accuracies tend to increase jointly, but show that their relation is largely dataset-dependent, and in general more nuanced and more complex than posited by previous, smaller scale studies.},
  author       = {Wenzel, Florian and Dittadi, Andrea and Gehler, Peter Vincent and Carl-Johann Simon-Gabriel, Carl-Johann Simon-Gabriel and Horn, Max and Zietlow, Dominik and Kernert, David and Russell, Chris and Brox, Thomas and Schiele, Bernt and Schölkopf, Bernhard and Locatello, Francesco},
  booktitle    = {36th Conference on Neural Information Processing Systems},
  isbn         = {9781713871088},
  location     = {New Orleans, LA, United States},
  pages        = {7181--7198},
  publisher    = {Neural Information Processing Systems Foundation},
  title        = {{Assaying out-of-distribution generalization in transfer learning}},
  volume       = {35},
  year         = {2022},
}