@article{21264,
  abstract     = {Rodents' ability to encode the whisking phase has been extensively documented through neuronal recordings from ascending sensory pathways. Yet, while indicating that reafference originates from the mechanoreceptors, the mechanistic underpinnings of the whisking phase encoding within the follicle remain unclear. Here we present anatomical, histological, and biomechanical evidence for the presence of a distinctive elastic segment (ES) within the basal part of the whisker shaft inside the follicle. This ES, composed of immature keratin, is capable of both bending and twisting. Forces generated by whisker movement deform this segment, causing whisker shaft deflections that can stimulate specific mechanoreceptor subsets within the follicle at different phases of the whisking cycle. This mechanism appears to operate during both free‐air whisking and object contact. We propose that the ES enables torsion‐based mechanoreceptor activation, allowing encoding of the whisking phase.},
  author       = {Haidarliu, Sebastian and Nelinger, Guy and Gantar, Luka and Ahissar, Ehud and Saraf‐Sinik, Inbar},
  issn         = {1932-8494},
  journal      = {The Anatomical Record},
  publisher    = {Wiley},
  title        = {{An elastic segment of the whisker shaft enables coding of the whisking phase via whisker torsion in rats and mice}},
  doi          = {10.1002/ar.70051},
  year         = {2025},
}

@article{21265,
  abstract     = {We explain how the (shifted) Ratios Conjecture for $L(s,\chi )$ would extend a randomization argument of Harper from a conductor-limited range to an unlimited range of “beyond square-root cancellation” for character twists of the Liouville function. As a corollary, the Liouville function would have nontrivial cancellation in arithmetic progressions of modulus just exceeding the well-known square-root barrier. Morally, the paper passes from random matrices to random multiplicative functions.},
  author       = {Wang, Victor and Xu, Max Wenqiang},
  issn         = {1687-0247},
  journal      = {International Mathematics Research Notices},
  number       = {18},
  publisher    = {Oxford University Press},
  title        = {{Harper’s beyond square-root conjecture}},
  doi          = {10.1093/imrn/rnaf279},
  volume       = {2025},
  year         = {2025},
}

@article{21266,
  abstract     = {For a given elliptic curve E in short Weierstrass form, we show that almost all quadratic twists E 
D have no integral points, as D ranges over square-free integers ordered by size. Our result is conditional on a weak form of the Hall–Lang conjecture in the case that E has partial 2-torsion. The proof uses a correspondence of Mordell and the reduction theory of binary quartic forms in order to transfer the problem to counting rational points of bounded height on a certain singular cubic surface, together with extensive use of cancellation in character sum estimates, drawn from Heath-Brown’s analysis of Selmer group statistics for the congruent number curve.},
  author       = {Browning, Timothy D and Chan, Yik Tung},
  issn         = {1435-9863},
  journal      = {Journal of the European Mathematical Society},
  publisher    = {European Mathematical Society Press},
  title        = {{Almost all quadratic twists of an elliptic curve have no integral points}},
  doi          = {10.4171/jems/1704},
  year         = {2025},
}

@inproceedings{21268,
  abstract     = {We consider multiple-environment Markov decision processes (MEMDP), which consist of a finite set of MDPs over the same state space, representing different scenarios of transition structure and probability. The value of a strategy is the probability to satisfy the objective, here a parity objective, in the worst-case scenario, and the value of an MEMDP is the supremum of the values achievable by a strategy.
We show that deciding whether the value is 1 is a PSPACE-complete problem, and even in P when the number of environments is fixed, along with new insights to the almost-sure winning problem, which is to decide if there exists a strategy with value 1. Pure strategies are sufficient for theses problems, whereas randomization is necessary in general when the value is smaller than 1. We present an algorithm to approximate the value, running in double exponential space. Our results are in contrast to the related model of partially-observable MDPs where all these problems are known to be undecidable.},
  author       = {Chatterjee, Krishnendu and Doyen, Laurent and Raskin, Jean-Francois and Sankur, Ocan},
  booktitle    = {52nd International Colloquium on Automata, Languages, and Programming},
  isbn         = {9783959773720},
  location     = {Aarhus, Denmark},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{The value problem for multiple-environment MDPs with parity objective}},
  doi          = {10.4230/LIPIcs.ICALP.2025.150},
  year         = {2025},
}

@article{21269,
  abstract     = {The spatial organization of chromatin within the nucleus plays a crucial role in gene expression and genome function. However, the quantitative relationship between this organization and nuclear biochemical processes remains under debate. In this study, we present a graph-based generative model, bioSBM, designed to capture long-range chromatin interaction patterns from Hi-C data and, importantly, simultaneously link these patterns to biochemical features. Applying bioSBM to Hi-C maps of the GM12878 lymphoblastoid cell line, we identified a latent structure of chromatin interactions, revealing seven distinct communities that strongly align with known biological annotations. Additionally, we infer a linear transformation that maps biochemical observables, such as histone marks, to the parameters of the generative graph model, enabling accurate genome-wide predictions of chromatin contact maps on out-of-sample data, both within the same cell line and on the completely unseen HCT116 cell line under RAD21 depletion. These findings highlight bioSBM's potential as a powerful tool for elucidating the relationship between biochemistry and chromatin architecture and predicting long-range genome organization from independent biochemical data.},
  author       = {Zhang, Chen Y and Rosa, Angelo and Sanguinetti, Guido},
  issn         = {2835-8279},
  journal      = {PRX Life},
  number       = {4},
  publisher    = {American Physical Society},
  title        = {{bioSBM: A random graph model to integrate epigenomic data in chromatin structure prediction}},
  doi          = {10.1103/gy1p-4256},
  volume       = {3},
  year         = {2025},
}

@article{21270,
  abstract     = {The one-dimensional Fröhlich model describing the motion of a single electron interacting with optical phonons is a paradigmatic model of quantum many-body physics. We predict the existence of an arbitrarily large number of bound excited states in the strong-coupling limit and calculate their excitation energies. Numerical simulations of a discretized model demonstrate the complete amelioration of the projector Monte Carlo sign problem by walker annihilation in an infinite Hilbert space. They reveal the threshold for the occurrence of the first bound excited states at a value of 𝛼≈1.73 for the dimensionless coupling constant. This puts the threshold into the regime of intermediate interaction strength. We find a significant spectral weight and increased phonon number of the bound excited state at threshold.},
  author       = {Taylor, J. and Čufar, M. and Mitrouskas, David Johannes and Seiringer, Robert and Pahl, E. and Brand, J.},
  issn         = {2469-9969},
  journal      = {Physical Review B},
  number       = {18},
  publisher    = {American Physical Society},
  title        = {{Bound excited states of Fröhlich polarons in one dimension}},
  doi          = {10.1103/s9p9-jflq},
  volume       = {112},
  year         = {2025},
}

@article{21271,
  abstract     = {For general non-Hermitian large random matrices X and deterministic deformation matrices A, we prove that the local eigenvalue statistics of A+X close to the typical edge points of its spectrum are universal. Furthermore, we show that, under natural assumptions, on A the spectrum of A+X does not have outliers at a distance larger than the natural fluctuation scale of the eigenvalues. As a consequence, the number of eigenvalues in each component of Spec(A+X) is deterministic.},
  author       = {Campbell, Andrew J and Cipolloni, Giorgio and Erdös, László and Ji, Hong Chang},
  issn         = {2168-894X},
  journal      = {The Annals of Probability},
  number       = {6},
  pages        = {2256--2308},
  publisher    = {Institute of Mathematical Statistics},
  title        = {{On the spectral edge of non-Hermitian random matrices}},
  doi          = {10.1214/25-aop1761},
  volume       = {53},
  year         = {2025},
}

@inproceedings{21272,
  abstract     = {Finding the ground state of Ising spin glasses is notoriously difficult due to disorder and frustration. Often, this challenge is framed as a combinatorial optimization problem, for which a common strategy employs simulated annealing, a Monte Carlo (MC)-based algorithm that updates spins one at a time. Yet, these localized updates can cause the system to become trapped in local minima. Cluster algorithms (CAs) were developed to address this limitation and have demonstrated considerable success in studying ferromagnetic systems; however, they tend to encounter percolation issues when applied to generic spin glasses. In this work, we introduce a novel CA designed to tackle these challenges by leveraging precomputed two-point correlations, aiming solve combinatorial optimization problems in the form of Max-Cut more efficiently. In our approach, clusters are formed probabilistically based on these correlations. Various classical and quantum algorithms can be employed to generate correlations that embody information about the energy landscape of the problem. By utilizing this information, the algorithm aims to identify groups of spins whose simultaneous flipping induces large transitions in configuration space with high acceptance probability - even at low energy levels - thereby escaping local minima more effectively. Notably, clusters generated using correlations from the Quantum Approximate Optimization Algorithm exhibit high acceptance rates at low temperatures. These acceptance rates often increase with circuit depth, accelerating the algorithm and enabling more efficient exploration of the solution space.},
  author       = {Eder, Peter J. and Kerschbaumer, Aron and Finžgar, Jernej Rudi and Medina Ramos, Raimel A and Schuetz, Martin J. A. and Katzgraber, Helmut G. and Braun, Sarah and Mendl, Christian B.},
  booktitle    = {2025 IEEE International Conference on Quantum Computing and Engineering},
  location     = {Albuquerque, NM, United States},
  publisher    = {IEEE},
  title        = {{Quantum-guided cluster algorithms for combinatorial optimization}},
  doi          = {10.1109/qce65121.2025.00033},
  year         = {2025},
}

@inproceedings{21280,
  abstract     = {We give an algorithm that, with high probability, maintains a (1-ε)-approximate s-t maximum flow in undirected, uncapacitated n-vertex graphs undergoing m edge insertions in Õ(m+ n F^*/ε) total update time, where F^{*} is the maximum flow on the final graph. This is the first algorithm to achieve polylogarithmic amortized update time for dense graphs (m = Ω(n²)), and more generally, for graphs where F^* = Õ(m/n). At the heart of our incremental algorithm is the residual graph sparsification technique of Karger and Levine [SICOMP '15], originally designed for computing exact maximum flows in the static setting. Our main contributions are (i) showing how to maintain such sparsifiers for approximate maximum flows in the incremental setting and (ii) generalizing the cut sparsification framework of Fung et al. [SICOMP '19] from undirected graphs to balanced directed graphs.},
  author       = {Goranci, Gramoz and Henzinger, Monika H and Räcke, Harald and Sricharan, A.},
  booktitle    = {52nd International Colloquium on Automata, Languages, and Programming},
  isbn         = {9783959773720},
  location     = {Aarhus, Denmark},
  pages        = {91:1--91:20},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Incremental approximate maximum flow via residual graph sparsification}},
  doi          = {10.4230/lipics.icalp.2025.91},
  volume       = {334},
  year         = {2025},
}

@inproceedings{21281,
  abstract     = {A strategy profile in a multi-player game is a Nash equilibrium if no player can unilaterally deviate to achieve a strictly better payoff. A profile is an ε-Nash equilibrium if no player can gain more than ε by unilaterally deviating from their strategy. In this work, we use ε-Nash equilibria to approximate the computation of Nash equilibria. Specifically, we focus on turn-based, multiplayer stochastic games played on graphs, where players are restricted to stationary strategies - strategies that use randomness but not memory.
The problem of deciding the constrained existence of stationary Nash equilibria - where each player’s payoff must lie within a given interval - is known to be ∃ℝ-complete in such a setting (Hansen and Sølvsten, 2020). We extend this line of work to stationary ε-Nash equilibria and present an algorithm that solves the following promise problem: given a game with a Nash equilibrium satisfying the constraints, compute an ε-Nash equilibrium that ε-satisfies those same constraints - satisfies the constraints up to an ε additive error. Our algorithm runs in FNP^NP time.
To achieve this, we first show that if a constrained Nash equilibrium exists, then one exists where the non-zero probabilities are at least an inverse of a double-exponential in the input. We further prove that such a strategy can be encoded using floating-point representations, as in the work of Frederiksen and Miltersen (2013), which finally gives us our FNP^NP algorithm. 
We further show that the decision version of the promise problem is NP-hard. Finally, we show a partial tightness result by proving a lower bound for such techniques: if a constrained Nash equilibrium exists, then there must be one where the probabilities in the strategies are double-exponentially small.},
  author       = {Asadi, Ali and Brice, Leonard and Chatterjee, Krishnendu and Thejaswini, K. S.},
  booktitle    = {45th Annual Conference on Foundations of Software Technology and Theoretical Computer Science},
  isbn         = {9783959774062},
  location     = {Pilani, India},
  pages        = {9:1--9:17},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{ε-stationary Nash equilibria in multi-player stochastic graph games}},
  doi          = {10.4230/lipics.fsttcs.2025.9},
  volume       = {360},
  year         = {2025},
}

@unpublished{21309,
  abstract     = {The polarization of light is a critically under-utilized, rich source of information in astronomy. For stars in particular, surface magnetism polarization that can be detected and measured with spectro-polarimetry. Many questions about these surface fields remain unanswered due to a lack of dedicated instruments capable of probing weak and strong surface magnetic fields for the entire mass range of stars, from M-dwarfs (and even substellar objects) to massive O-type stars at different evolutionary stages and metallicities. These questions range from the origin of these fields to their true incidence rate throughout the stellar population and the dependence on metallicity. Magnetic fields, although currently often excluded from stellar evolution models, play an important role in stellar evolution. Connecting the surface fields to internal fields through asteroseismology will instigate a new era of understanding stellar evolution and the transport of angular momentum and chemical elements throughout stellar interiors, also impacting our understanding of star-planet interactions and stellar remnants. Polarimetry is also an under-utilized tool to observationally constrain the mode identification of nonradial oscillations, which lies at the basis of accurate asteroseismic parameter estimation at percentage-level for stellar radii, masses, ages, internal rotation, and magnetic field strengths. Combining strong constraints on mode identification and surface magnetic properties through the acquisition of time-resolved, high-resolution and high-signal-to-noise (S/N) spectro-polarimetry and spectroscopy promises to bring leaps forward in our understanding of stellar structure, particularly when combined with long-term space photometric data from past, current, and future missions.},
  author       = {Vandersnickt, J. and Armenta, R. Ochoa and Vanlaer, V. and A. David-Uraz, A. David-Uraz and Aerts, C. and Das, S. B. and Bouret, J. -C. and Bowman, D. M. and Bugnet, Lisa Annabelle and Khalack, V. and J. Labadie-Bartz, J. Labadie-Bartz and Mathis, S. and Nazé, Y. and Neiner, C. and Petit, P. and Petit, V. and K. Thomson-Paressant, K. Thomson-Paressant and Doorsselaere, T. Van and Vanrespaille, M.},
  booktitle    = {arXiv},
  title        = {{Expanding stellar horizons with polarized light}},
  doi          = {10.48550/arXiv.2512.15170},
  year         = {2025},
}

@article{21317,
  abstract     = {Accreting white dwarfs (WDs) in close binary systems, commonly known as cataclysmic variables (CVs), with orbital periods below the canonical period minimum (≈80 minutes) are rare. Such short periods can only be reached if the donor star in the CV is either significantly evolved before initiating mass transfer to the WD or is metal-poor. We present optical photometry and spectroscopy of Gaia19bxc, a high-amplitude variable identified as a polar CV with an exceptionally short orbital period of 64.42 minutes—well below the canonical CV period minimum. High-speed photometry confirms persistent double-peaked variability consistent with cyclotron beaming, thus indicating the presence of a magnetic WD. Phase-resolved Keck/Low-Resolution Imaging Spectrometer (LRIS) spectroscopy reveals strong hydrogen and helium emission lines but no donor features, indicating the accretor is a magnetic WD and the donor is hydrogen-rich, but cold and faint. The absence of a detectable donor and the low inferred temperature (≲3500 K) disfavor an evolved donor scenario. Instead, the short period and the system’s halo-like kinematics suggest Gaia19bxc may be the first known metal-poor polar. Because metal-poor donors are more compact than solar-metallicity donors of the same mass, they can reach shorter minimum periods. Gaia19bxc is one of only a handful of known metal-poor CVs below the canonical period minimum and has the shortest period of any such magnetic system discovered to date.},
  author       = {Galiullin, Ilkham and Rodriguez, Antonio C. and El-Badry, Kareem and Caiazzo, Ilaria and Szkody, Paula and Nagarajan, Pranav and Whitebook, Samuel},
  issn         = {2041-8213},
  journal      = {The Astrophysical Journal Letters},
  number       = {2},
  publisher    = {IOP Publishing},
  title        = {{Optical spectroscopy of the most compact accreting binary harboring a magnetic White Dwarf and a hydrogen-rich donor}},
  doi          = {10.3847/2041-8213/adff82},
  volume       = {990},
  year         = {2025},
}

@article{21318,
  abstract     = {Matter waves have been observed in double-slit experiments with microscopic objects, such as atoms or molecules. The wave function describing the motion of these objects must extend over a distance comparable to the slit separation, much larger than the characteristic size of the objects. Preparing such states for more massive objects, such as mechanical oscillators, remains an outstanding challenge. Here we delocalize the quantum ground state of an optically levitated nanosphere by modulating the stiffness of the confining potential. We show a more than threefold increase of the initial coherence length, which corresponds to mechanical momentum squeezing of more than 7 dB. Our work is a stepping stone toward the generation of coherence lengths comparable to the object size, a crucial regime for macroscopic quantum experiments.},
  author       = {Rossi, M. and Militaru, Andrei and Carlon Zambon, N. and Riera-Campeny, A. and Romero-Isart, O. and Frimmer, M. and Novotny, L.},
  issn         = {1079-7114},
  journal      = {Physical Review Letters},
  number       = {8},
  publisher    = {American Physical Society},
  title        = {{Quantum delocalization of a levitated nanoparticle}},
  doi          = {10.1103/2yzc-fsm3},
  volume       = {135},
  year         = {2025},
}

@inproceedings{21320,
  abstract     = {Prophet inequalities are a central object of study in optimal stopping theory. In the iid model, a gambler sees values in an online fashion, sampled independently from a given distribution. Upon observing each value, the gambler either accepts it as a reward, or irrevocably rejects it and proceeds to observe the next value. The goal of the gambler, who cannot see the future, is to maximise the expected value of the reward while competing against the expectation of a prophet (the offline maximum). In other words, one seeks to maximise the gambler-to-prophet ratio of the expectations. 
This model has been studied with infinite, finite and unknown number of values. When the gambler faces a random number of values, the model is said to have a random horizon. We consider the model in which the gambler is given a priori knowledge of the horizon’s distribution. Alijani et al. (2020) designed a single-threshold algorithm achieving a ratio of 1/2 when the random horizon has an increasing hazard rate and is independent of the values. We prove that with a single threshold, a ratio of 1/2 is actually achievable for several larger classes of horizon distributions, with the largest being known as the 𝒢 class in reliability theory. Moreover, we show that this does not extend to its dual, the  ̅𝒢 class (which includes the decreasing hazard rate class), while it can be extended to low-variance horizons. Finally, we construct the first example of a family of horizons, for which multiple thresholds are necessary to achieve a nonzero ratio. We establish that the Secretary Problem optimal stopping rule provides one such algorithm, paving the way towards the study of the model beyond single-threshold algorithms.},
  author       = {Giambartolomei, Giordano and Mallmann-Trenn, Frederik and Saona Urmeneta, Raimundo J},
  booktitle    = {52nd International Colloquium on Automata, Languages, and Programming},
  isbn         = {9783959773720},
  location     = {Aarhus, Denmark},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{IID prophet inequality with random horizon: Going beyond increasing hazard rates}},
  doi          = {10.4230/LIPIcs.ICALP.2025.87},
  volume       = {334},
  year         = {2025},
}

@article{21321,
  abstract     = {The development of cost-effective and high-performance thermoelectric (TE) materials faces significant challenges, particularly in improving the properties of promising copper-based TE materials such as Cu3SbSe4, which are limited by their poor electrical conductivity. This study presents a detailed comparative analysis of three strategies to promote the electrical transport properties of Cu3SbSe4 through Sn doping: conventional Sn atomic doping, surface treatment with SnSe molecular complexes, and blending with SnSe nanocrystals to form nanocomposites, all followed by annealing and hot pressing under identical conditions. Our results reveal that a surface treatment using SnSe molecular complexes significantly enhances TE performance over atomic doping and nanocomposite formation, achieving a power factor of 1.1 mW·m−1·K−2 and a maximum dimensionless figure of merit zT value of 0.80 at 640 K, representing an excellent performance among Cu3SbSe4-based materials produced via solution-processing methods. This work highlights the effectiveness of surface engineering in optimizing the transport properties of nanostructured materials, demonstrating the versatility and cost-efficiency of solution-based technologies in the development of advanced nanostructured materials for application in the field of TE among others.},
  author       = {Xiao, Shanshan and Zhao, Mingjun and Li, Mingquan and Wan, Shanhong and Genç, Aziz and Huang, Lulu and Chen, Lei and Zhang, Yu and Ibáñez, Maria and Lim, Khak Ho and Hong, Min and Liu, Yu and Cabot, Andreu},
  issn         = {1998-0000},
  journal      = {Nano Research},
  number       = {1},
  publisher    = {Tsinghua University Press},
  title        = {{Band and defect engineering in solution-processed nanocrystal building blocks to promote transport properties in nanomaterials: The case of thermoelectric Cu            <sub>3</sub>SbSe            <sub>4</sub>}},
  doi          = {10.26599/nr.2025.94907072},
  volume       = {18},
  year         = {2025},
}

@article{21322,
  abstract     = {Habitat fragmentation poses a significant risk to population survival, causing both demographic stochasticity and genetic drift within local populations to increase, thereby increasing genetic load. Higher load causes population numbers to decline, which reduces the efficiency of selection and further increases load, resulting in a positive feedback that may drive entire populations to extinction. Here, we investigate this eco-evolutionary feedback in a metapopulation consisting of local demes connected via migration, with individuals subject to deleterious mutation at a large number of loci. We first analyze the determinants of load under soft selection, where population sizes are fixed, and then build on this to understand hard selection, where population sizes and load coevolve. We show that under soft selection, very little gene flow (less than one migrant per generation) is enough to prevent fixation of deleterious alleles. By contrast, much higher levels of migration are required to mitigate load and prevent extinction when selection is hard, with critical migration thresholds for metapopulation persistence increasing sharply as the genome-wide deleterious mutation rate becomes comparable to the baseline population growth rate. Moreover, critical migration thresholds are highest if deleterious mutations have intermediate selection coefficients but lower if alleles are predominantly recessive rather than additive (due to more efficient purging of recessive load within local populations). Our analysis is based on a combination of analytical approximations and simulations, allowing for a more comprehensive understanding of the factors influencing load and extinction in fragmented populations.},
  author       = {Olusanya, Oluwafunmilola O and Khudiakova, Kseniia and Sachdeva, Himani},
  issn         = {1537-5323},
  journal      = {The American Naturalist},
  number       = {6},
  pages        = {617--636},
  publisher    = {University of Chicago Press},
  title        = {{Genetic load, eco-evolutionary feedback, and extinction in metapopulations}},
  doi          = {10.1086/735562},
  volume       = {205},
  year         = {2025},
}

@inproceedings{21323,
  abstract     = {We present a unifying framework for proving the knowledge-soundness of KZG-like polynomial commitment schemes, encompassing both univariate and multivariate variants. By conceptualizing the proof technique of Lipmaa, Parisella, and Siim for the univariate KZG scheme (EUROCRYPT 2024), we present tools and falsifiable hardness assumptions that permit black-box extraction of the multivariate KZG scheme. Central to our approach is the notion of a canonical Proof-of-Knowledge of a Polynomial (PoKoP) of a polynomial commitment scheme, which we use to capture the extractability notion required in constructions of practical zk-SNARKs. We further present an explicit polynomial decomposition lemma for multivariate polynomials, enabling a more direct analysis of interpolating extractors and bridging the gap between univariate and multivariate commitments. Our results provide the first standard-model proofs of extractability for the multivariate KZG scheme and many of its variants under falsifiable assumptions.},
  author       = {Belohorec, Juraj and Dvořák, Pavel and Hoffmann, Charlotte and Hubáček, Pavel and Mašková, Kristýna and Pastyřík, Martin},
  booktitle    = {45th Annual International Cryptology Conference},
  isbn         = {9783032018861},
  issn         = {1611-3349},
  location     = {Santa Barbara, CA, United States},
  pages        = {584--616},
  publisher    = {Springer Nature},
  title        = {{On extractability of the KZG family of polynomial commitment schemes}},
  doi          = {10.1007/978-3-032-01887-8_19},
  volume       = {16005},
  year         = {2025},
}

@inproceedings{21324,
  abstract     = {Learning models have been shown to rely on spurious correlations between non-predictive features and the associated labels in the training data, with negative implications on robustness, bias and fairness. In this work, we provide a statistical characterization of this phenomenon for high-dimensional regression, when the data contains a predictive core feature x and a spurious feature y. Specifically, we quantify the amount of spurious correlations C learned via linear regression, in terms of the data covariance and the strength λ of the ridge regularization. As a consequence, we first capture the simplicity of y through the spectrum of its covariance, and its correlation with x through the Schur complement of the full data covariance. Next, we prove a trade-off between C and the in-distribution test loss L, by showing that the value of λ that minimizes L lies in an interval where C is increasing. Finally, we investigate the effects of over-parameterization via the random features model, by showing its equivalence to regularized linear regression. Our theoretical results are supported by numerical experiments on Gaussian, Color-MNIST, and CIFAR-10 datasets.},
  author       = {Bombari, Simone and Mondelli, Marco},
  booktitle    = {Proceedings of the 42nd International Conference on Machine Learning},
  issn         = {2640-3498},
  location     = {Vancouver, Canada},
  pages        = {4839--4873},
  publisher    = {ML Research Press},
  title        = {{Spurious correlations in high dimensional regression: The roles of regularization, simplicity bias and over-parameterization}},
  volume       = {267},
  year         = {2025},
}

@inproceedings{21325,
  abstract     = {Test-time training (TTT) methods explicitly update the weights of a model to adapt to the specific test instance, and they have found success in a variety of settings, including most recently language modeling and reasoning. To demystify this success, we investigate a gradient-based TTT algorithm for in-context learning, where we train a transformer model on the in-context demonstrations provided in the test prompt. Specifically, we provide a comprehensive theoretical characterization of linear transformers when the update rule is a single gradient step. Our theory (i) delineates the role of alignment between pretraining distribution and target task, (ii) demystifies how TTT can alleviate distribution shift, and (iii) quantifies the sample complexity of TTT including how it can significantly reduce the eventual sample size required for in-context learning. As our empirical contribution, we study the benefits of TTT for TabPFN, a tabular foundation model. In line with our theory, we demonstrate that TTT significantly reduces the required sample size for tabular classification (3 to 5 times fewer) unlocking substantial inference efficiency with a negligible training cost.},
  author       = {Gozeten, Halil Alperen and Ildiz, Muhammed Emrullah and Zhang, Xuechen and Soltanolkotabi, Mahdi and Mondelli, Marco and Oymak, Samet},
  booktitle    = {Proceedings of the 42nd International Conference on Machine Learning},
  issn         = {2640-3498},
  location     = {Vancouver, Canada},
  pages        = {20266--20295},
  publisher    = {ML Research Press},
  title        = {{Test-time training provably improves transformers as in-context learners}},
  volume       = {267},
  year         = {2025},
}

@inproceedings{21326,
  abstract     = {Neural Collapse is a phenomenon where the last-layer representations of a well-trained neural network converge to a highly structured geometry. In this paper, we focus on its first (and most basic) property, known as NC1: the within-class variability vanishes. While prior theoretical studies establish the occurrence of NC1 via the data-agnostic unconstrained features model, our work adopts a data-specific perspective, analyzing NC1 in a three-layer neural network, with the first two layers operating in the mean-field regime and followed by a linear layer. In particular, we establish a fundamental connection between NC1 and the loss landscape: we prove that points with small empirical loss and gradient norm (thus, close to being stationary) approximately satisfy NC1, and the closeness to NC1 is controlled by the residual loss and gradient norm. We then show that (i) gradient flow on the mean squared error converges to NC1 solutions with small empirical loss, and (ii) for well-separated data distributions, both NC1 and vanishing test loss are achieved simultaneously. This aligns with the empirical observation that NC1 emerges during training while models attain near-zero test error. Overall, our results demonstrate that NC1 arises from gradient training due to the properties of the loss landscape, and they show the co-occurrence of NC1 and small test error for certain data distributions.},
  author       = {Wu, Diyuan and Mondelli, Marco},
  booktitle    = {Proceedings of the 42nd International Conference on Machine Learning},
  issn         = {2640-3498},
  location     = {Vancouver, Canada},
  pages        = {67499--67536},
  publisher    = {ML Research Press},
  title        = {{Neural collapse beyond the unconstrained features model: Landscape, dynamics, and generalization in the mean-field regime}},
  volume       = {267},
  year         = {2025},
}

