@article{13182,
  abstract     = {We characterize critical points of 1-dimensional maps paired in persistent homology
geometrically and this way get elementary proofs of theorems about the symmetry
of persistence diagrams and the variation of such maps. In particular, we identify
branching points and endpoints of networks as the sole source of asymmetry and
relate the cycle basis in persistent homology with a version of the stable marriage
problem. Our analysis provides the foundations of fast algorithms for maintaining a
collection of sorted lists together with its persistence diagram.},
  author       = {Biswas, Ranita and Cultrera Di Montesano, Sebastiano and Edelsbrunner, Herbert and Saghafian, Morteza},
  issn         = {2367-1734},
  journal      = {Journal of Applied and Computational Topology},
  pages        = {1101--1119},
  publisher    = {Springer Nature},
  title        = {{Geometric characterization of the persistence of 1D maps}},
  doi          = {10.1007/s41468-023-00126-9},
  volume       = {8},
  year         = {2024},
}

@inproceedings{15093,
  abstract     = {We present a dynamic data structure for maintaining the persistent homology of a time series of real numbers. The data structure supports local operations, including the insertion and deletion of an item and the cutting and concatenating of lists, each in time O(log n + k), in which n counts the critical items and k the changes in the augmented persistence diagram. To achieve this, we design a tailor-made tree structure with an unconventional representation, referred to as banana tree, which may be useful in its own right.},
  author       = {Cultrera di Montesano, Sebastiano and Edelsbrunner, Herbert and Henzinger, Monika H and Ost, Lara},
  booktitle    = {Proceedings of the 2024 Annual ACM-SIAM Symposium on Discrete Algorithms (SODA)},
  editor       = {Woodruff, David P.},
  location     = {Alexandria, VA, USA},
  pages        = {243 -- 295},
  publisher    = {Society for Industrial and Applied Mathematics},
  title        = {{Dynamically maintaining the persistent homology of time series}},
  doi          = {10.1137/1.9781611977912.11},
  year         = {2024},
}

@unpublished{15091,
  abstract     = {Motivated by applications in the medical sciences, we study finite chromatic
sets in Euclidean space from a topological perspective. Based on the persistent
homology for images, kernels and cokernels, we design provably stable
homological quantifiers that describe the geometric micro- and macro-structure
of how the color classes mingle. These can be efficiently computed using
chromatic variants of Delaunay and alpha complexes, and code that does these
computations is provided.},
  author       = {Cultrera di Montesano, Sebastiano and Draganov, Ondrej and Edelsbrunner, Herbert and Saghafian, Morteza},
  booktitle    = {arXiv},
  title        = {{Chromatic alpha complexes}},
  doi          = {10.48550/arXiv.2212.03128},
  year         = {2024},
}

@phdthesis{18766,
  abstract     = {Poxviruses are large pleomorphic double-stranded DNA viruses that include well known members such as variola virus, the causative agent of smallpox, Mpox virus, as well as Vaccinia virus (VACV), which serves as a vaccination strain for formerly mentioned viruses. VACV is a valuable model for studying large pleomorphic DNA viruses in general and poxviruses specifically, as many features, such as core morphology and structural proteins, are well conserved within this family. Despite decades of research, our understanding of the structural components and proteins that comprise the poxvirus core in mature virions remains limited. Although major core proteins were identified via indirect experimental evidence, the core's complexity, with its large size, structure and number of involved proteins, has hindered efforts to achieve high-resolution insights and to define the roles of the individual proteins. The specific protein composition of the core's individual layers, including the palisade layer and the inner core wall, has remained unclear. In this study, we have merged multiple approaches, including single particle cryo electron microscopy of purified virus cores, cryo-electron tomography and subtomogram averaging of mature virions and molecular modeling to elucidate the structural determinants of the VACV core. Due to the lack of experimentally derived structures, either in situ or reconstituted in vitro, we used Alphafold to predict models of the putative major core protein candidates, A10, 23k, A3, A4, and L4. Our results show that the VACV core is composed of several layers with varying local symmetries, forming more intricate interactions than observed previously. This allowed us to identify several molecular building blocks forming the viral core lattice. In particular, we identified trimers of protein A10 as a major core structure that forms the palisade layer of the viral core. Additionally, we revealed that six petals of a flower shaped core pore within the core wall are composed of A10 trimers. Furthermore, we obtained a cryo-EM density for the inner core wall that could potentially accommodate an A3 dimer. Integrating descriptions of protein interactions from previous studies enabled us to provide a detailed structural model of the poxvirus core wall, and our findings indicate that the interactions within A10 trimers are likely consistent across orthopox- and parapoxviruses. This combined application of cryo-SPA and cryo-ET can help overcome obstacles in studying complex virus structures in the future, including their key assembly proteins, interactions, and the formation into a core lattice. Our work provides important fundamental new insights into poxvirus core architecture, also considering the recent re-emergence of poxviruses.},
  author       = {Datler, Julia},
  isbn         = {978-3-99078-049-7},
  issn         = {2663-337X},
  keywords     = {cryo-EM, cryo-ET, cryo-SPA, Structural Virology, Poxvirus, Vaccinia Virus, Structural Biology},
  pages        = {106},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Elucidating the structural determinants of the poxvirus core using multi-modal cryo-EM}},
  doi          = {10.15479/at:ista:18766},
  year         = {2024},
}

@article{14979,
  abstract     = {Poxviruses are among the largest double-stranded DNA viruses, with members such as variola virus, monkeypox virus and the vaccination strain vaccinia virus (VACV). Knowledge about the structural proteins that form the viral core has remained sparse. While major core proteins have been annotated via indirect experimental evidence, their structures have remained elusive and they could not be assigned to individual core features. Hence, which proteins constitute which layers of the core, such as the palisade layer and the inner core wall, has remained enigmatic. Here we show, using a multi-modal cryo-electron microscopy (cryo-EM) approach in combination with AlphaFold molecular modeling, that trimers formed by the cleavage product of VACV protein A10 are the key component of the palisade layer. This allows us to place previously obtained descriptions of protein interactions within the core wall into perspective and to provide a detailed model of poxvirus core architecture. Importantly, we show that interactions within A10 trimers are likely generalizable over members of orthopox- and parapoxviruses.},
  author       = {Datler, Julia and Hansen, Jesse and Thader, Andreas and Schlögl, Alois and Bauer, Lukas W and Hodirnau, Victor-Valentin and Schur, Florian KM},
  issn         = {1545-9985},
  journal      = {Nature Structural & Molecular Biology},
  keywords     = {Molecular Biology, Structural Biology},
  pages        = {1114--1123},
  publisher    = {Springer Nature},
  title        = {{Multi-modal cryo-EM reveals trimers of protein A10 to form the palisade layer in poxvirus cores}},
  doi          = {10.1038/s41594-023-01201-6},
  volume       = {31},
  year         = {2024},
}

@phdthesis{15020,
  abstract     = {This thesis consists of four distinct pieces of work within theoretical biology, with two themes in common: the concept of optimization in biological systems, and the use of information-theoretic tools to quantify biological stochasticity and statistical uncertainty.
Chapter 2 develops a statistical framework for studying biological systems which we believe to be optimized for a particular utility function, such as retinal neurons conveying information about visual stimuli. We formalize such beliefs as maximum-entropy Bayesian priors, constrained by the expected utility. We explore how such priors aid inference of system parameters with limited data and enable optimality hypothesis testing: is the utility higher than by chance?
Chapter 3 examines the ultimate biological optimization process: evolution by natural selection. As some individuals survive and reproduce more successfully than others, populations evolve towards fitter genotypes and phenotypes. We formalize this as accumulation of genetic information, and use population genetics theory to study how much such information can be accumulated per generation and maintained in the face of random mutation and genetic drift. We identify the population size and fitness variance as the key quantities that control information accumulation and maintenance.
Chapter 4 reuses the concept of genetic information from Chapter 3, but from a different perspective: we ask how much genetic information organisms actually need, in particular in the context of gene regulation. For example, how much information is needed to bind transcription factors at correct locations within the genome? Population genetics provides us with a refined answer: with an increasing population size, populations achieve higher fitness by maintaining more genetic information. Moreover, regulatory parameters experience selection pressure to optimize the fitness-information trade-off, i.e. minimize the information needed for a given fitness. This provides an evolutionary derivation of the optimization priors introduced in Chapter 2.
Chapter 5 proves an upper bound on mutual information between a signal and a communication channel output (such as neural activity). Mutual information is an important utility measure for biological systems, but its practical use can be difficult due to the large dimensionality of many biological channels. Sometimes, a lower bound on mutual information is computed by replacing the high-dimensional channel outputs with decodes (signal estimates). Our result provides a corresponding upper bound, provided that the decodes are the maximum posterior estimates of the signal.},
  author       = {Hledik, Michal},
  issn         = {2663-337X},
  keywords     = {Theoretical biology, Optimality, Evolution, Information},
  pages        = {158},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Genetic information and biological optimization}},
  doi          = {10.15479/at:ista:15020},
  year         = {2024},
}

@phdthesis{18568,
  abstract     = {Locomotion is ubiquitous in the animal kingdom because an animal's survival depends on its ability to navigate its environment to find food, avoid predators and locate potential mates. These behaviours require control mechanisms that can extract information from the environment, particularly visual cues. Selective evolutionary pressures have thus refined such visuomotor transformations in a species-specific manner to meet the specific ecological and ethological challenges of each organism. However, a common challenge across organisms as visual information processing
becomes increasingly detailed is the mechanisms required to synthesise disparate pieces of information into a coherent percept or unified picture of the world. In this thesis, I investigate how disparate visual information is combined in the brain of Drosophila melanogaster to effectively guide locomotion.
For this, I first designed and built a behavioural setup to record locomotion and present visual stimuli to freely-walking fruit flies in a closed-loop manner. This setup allowed the investigation of innate visually-guided behaviours, including the optomotor reflex and courtship.
Second, taking advantage of my system I investigated the optomotor response, a reflexive visual stabilisation behaviour in which flies turn in the direction of global motion to minimise retinal slip. This behaviour is thought to be mediated by Lobula plate tangential cells (LPTCs); a complex network of optic-flow-sensitive neurons essential for self-motion estimation. Using a novel genetic mutant, I demonstrate that electrical coupling between two LPTC subtypes, contralateral HS and H2 neurons, regulates the balance between smooth optomotor turning and saccadic anti-optomotor responses. These findings underscore the critical role of binocular motion cue integration in guiding course control. Finally, I developed a novel behavioural paradigm in which a sexually aroused male fruit fly is presented with an optomotor distractor. This setup creates competition between two visual behaviours, courtship tracking and the  optomotor response, enabling me to explore how the visual system resolves this conflict. In this setting, males
engaged in courtship selectively suppress their optomotor response based on the female's location. Furthermore, when this experiment is replicated with an “artificial female”, optogenetically aroused males alternate between tracking and optomotor responses. The probability and dynamics of this switching are determined by the relative strengths of the two competing stimuli. In summary, the results presented in this thesis explore two mechanisms – integration and competition - through which visual information is combined in the brain of the fruit fly to drive locomotion.},
  author       = {Satapathy, Roshan K},
  isbn         = {978-3-99078-047-3},
  issn         = {2663-337X},
  pages        = {114},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Mechanisms of visual integration and competition in innate behaviours in Drosophila melanogaster}},
  doi          = {10.15479/at:ista:18568},
  year         = {2024},
}

@article{18444,
  abstract     = {Animals rely on compensatory actions to maintain stability and navigate their environment efficiently. These actions depend on global visual motion cues known as optic-flow. While the optomotor response has been the traditional focus for studying optic-flow compensation in insects, its simplicity has been insufficient to determine the role of the intricate optic-flow processing network involved in visual course control. Here, we reveal a series of course control behaviours in Drosophila and link them to specific neural circuits. We show that bilateral electrical coupling of optic-flow-sensitive neurons in the fly’s lobula plate are required for a proper course control. This electrical interaction works alongside chemical synapses within the HS-H2 network to control the dynamics and direction of turning behaviours. Our findings reveal how insects use bilateral motion cues for navigation, assigning a new functional significance to the HS-H2 network and suggesting a previously unknown role for gap junctions in non-linear operations.},
  author       = {Pokusaeva, Victoria and Satapathy, Roshan K and Symonova, Olga and Jösch, Maximilian A},
  issn         = {2041-1723},
  journal      = {Nature Communications},
  publisher    = {Springer Nature},
  title        = {{Bilateral interactions of optic-flow sensitive neurons coordinate course control in flies}},
  doi          = {10.1038/s41467-024-53173-w},
  volume       = {15},
  year         = {2024},
}

@phdthesis{17336,
  abstract     = {This thesis deals with the study of stochastic processes and their ergodicity properties. The
variety of problems encountered calls for a set of different approaches, ranging from classical to
modern ones: a special place is held by probabilistic methods based on couplings, by functional
inequalities, and by the theory of gradient flows in the space of measures.

The material is organized as follows. Chapter 1 contains the introduction to this thesis, starting
with a general presentation of some of the relevant topics. Section 1.1 is dedicated to the
theory of gradient flows in metric spaces, and introduces the first contribution of this thesis
[DSMP24], which is presented in detail in Chapter 2. Section 1.2 moves to the topic of
curvature of Markov chains, concluding with a brief description of our second contribution
[Ped23], which is included in Chapter 3. Section 1.3 discusses applications of stochastic
processes to the theory of sampling, in particular the recent framework of score-based diffusion
models, and our contribution [PMM24], which is contained in Chapter 4. Section 1.4 discusses
some related problems, concerning the regularization properties of the heat flow. It serves
as a motivation for the work [BP24], which we report in Chapter 5. Finally, Section 1.5
discusses the last contribution of this thesis, which can be found in Chapter 6. It deals with
the convergence to equilibrium of a particular stochastic model from quantitative genetics:
this is established via some functional inequalities, which we prove with probabilistic arguments
based on couplings.
},
  author       = {Pedrotti, Francesco},
  issn         = {2663-337X},
  pages        = {183},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Functional inequalities and convergence of stochastic processes}},
  doi          = {10.15479/at:ista:17336},
  year         = {2024},
}

@article{17143,
  abstract     = {This paper deals with local criteria for the convergence to a global minimiser for gradient flow trajectories and their discretisations. To obtain quantitative estimates on the speed of convergence, we consider variations on the classical Kurdyka–Łojasiewicz inequality for a large class of parameter functions. Our assumptions are given in terms of the initial data, without any reference to an equilibrium point. The main results are convergence statements for gradient flow curves and proximal point sequences to a global minimiser, together with sharp quantitative estimates on the speed of convergence. These convergence results apply in the general setting of lower semicontinuous functionals on complete metric spaces, generalising recent results for smooth functionals on Rn. While the non-smooth setting covers very general spaces, it is also useful for (non)-smooth functionals on Rn.
.},
  author       = {Dello Schiavo, Lorenzo and Maas, Jan and Pedrotti, Francesco},
  issn         = {1088-6850},
  journal      = {Transactions of the American Mathematical Society},
  number       = {6},
  pages        = {3779--3804},
  publisher    = {American Mathematical Society},
  title        = {{Local conditions for global convergence of gradient flows and proximal point sequences in metric spaces}},
  doi          = {10.1090/tran/9156},
  volume       = {377},
  year         = {2024},
}

@unpublished{17350,
  abstract     = {Score-based generative models (SGMs) are powerful tools to sample from
complex data distributions. Their underlying idea is to (i) run a forward
process for time $T_1$ by adding noise to the data, (ii) estimate its score
function, and (iii) use such estimate to run a reverse process. As the reverse
process is initialized with the stationary distribution of the forward one, the
existing analysis paradigm requires $T_1\to\infty$. This is however
problematic: from a theoretical viewpoint, for a given precision of the score
approximation, the convergence guarantee fails as $T_1$ diverges; from a
practical viewpoint, a large $T_1$ increases computational costs and leads to
error propagation. This paper addresses the issue by considering a version of
the popular predictor-corrector scheme: after running the forward process, we
first estimate the final distribution via an inexact Langevin dynamics and then
revert the process. Our key technical contribution is to provide convergence
guarantees which require to run the forward process only for a fixed finite
time $T_1$. Our bounds exhibit a mild logarithmic dependence on the input
dimension and the subgaussian norm of the target distribution, have minimal
assumptions on the data, and require only to control the $L^2$ loss on the
score approximation, which is the quantity minimized in practice.},
  author       = {Pedrotti, Francesco and Maas, Jan and Mondelli, Marco},
  booktitle    = {arXiv},
  title        = {{Improved convergence of score-based diffusion models via prediction-correction}},
  doi          = {10.48550/arXiv.2305.14164},
  year         = {2024},
}

@unpublished{17352,
  abstract     = {We prove upper bounds on the $L^\infty$-Wasserstein distance from optimal
transport between strongly log-concave probability densities and log-Lipschitz
perturbations. In the simplest setting, such a bound amounts to a
transport-information inequality involving the $L^\infty$-Wasserstein metric
and the relative $L^\infty$-Fisher information. We show that this inequality
can be sharpened significantly in situations where the involved densities are
anisotropic. Our proof is based on probabilistic techniques using Langevin
dynamics. As an application of these results, we obtain sharp exponential rates
of convergence in Fisher's infinitesimal model from quantitative genetics,
generalising recent results by Calvez, Poyato, and Santambrogio in dimension 1
to arbitrary dimensions.},
  author       = {Khudiakova, Kseniia and Maas, Jan and Pedrotti, Francesco},
  booktitle    = {arXiv},
  title        = {{L∞-optimal transport of anisotropic log-concave measures and exponential convergence in Fisher's infinitesimal model}},
  doi          = {10.48550/arXiv.2402.04151},
  year         = {2024},
}

@unpublished{17353,
  abstract     = {In this paper we derive estimates for the Hessian of the logarithm
(log-Hessian) for solutions to the heat equation. For initial data in the form
of log-Lipschitz perturbation of strongly log-concave measures, the log-Hessian
admits an explicit, uniform (in space) lower bound. This yields a new estimate
for the Lipschitz constant of a transport map pushing forward the standard
Gaussian to a measure in this class. Further connections are discussed with
score-based diffusion models and improved Gaussian logarithmic Sobolev
inequalities. Finally, we show that assuming only fast decay of the tails of
the initial datum does not suffice to guarantee uniform log-Hessian upper
bounds.},
  author       = {Brigati, Giovanni and Pedrotti, Francesco},
  booktitle    = {arXiv},
  title        = {{Heat flow, log-concavity, and Lipschitz transport maps}},
  doi          = {10.48550/arXiv.2404.15205},
  year         = {2024},
}

@phdthesis{18088,
  abstract     = {Instant messaging applications like Whatsapp, Signal or Telegram have become ubiquitous in today's society.
Many of them provide not only end-to-end encryption, but also security guarantees even when the key material gets compromised.
These are achieved through frequent key update performed by users.
In particular, the compromise of a group key should preserve confidentiality of previously exchanged messages (forward secrecy), and a subsequent key update will ensure security for future ones (post-compromise security).
Though great protocols for one-on-one communication have been known for some time, the design of ones that scale efficiently for larger groups while achieving akin security guarantees is a hard problem.
A great deal of research has been aimed at this topic, much of it under the umbrella of the Messaging Layer Security (MLS) working group at the IETF. 
Started in 2018, this joint effort by academics and industry culminated in 2023 with the publication of the first standard for secure group messaging [IETF, RFC9420].

At the core of secure group messaging is a cryptographic primitive termed Continuous Group Key Agreement, or CGKA [Alwen et al. 2021], that essentially allows a changing group of users to agree on a common key with the added functionality security against compromises is achieved by users asynchronously issuing a key update. In this thesis we contribute to the understanding of CGKA across different angles.
First, we present a new technique to effect dynamic operations in groups, i.e., add or remove members, that can be more efficient that the one employed by MLS in certain settings.
Considering the setting of users belonging to multiple overlapping groups, we then show lowerbounds on the communication cost of constructions that leverage said overlap, at the same time showing protocols that are asymptotically optimal and efficient for practical settings, respectively. Along the way, we show that the communication cost of key updates in MLS is average-cost optimal.
An important feature in CGKA protocols, particularly for big groups, is the possibility of executing several group operations concurrently. While later versions of MLS support this, they do at the cost of worsening the communication efficiency of future group operations.
In this thesis we introduce two new protocols that permit concurrency without any negative effect on efficiency. Our protocols circumvent previously existing lower bounds by satisfying a new notion of post-compromise security that only asks for security to be re-established after a certain number of key updates have taken place. While this can be slower than MLS in terms of rounds of communication, we show that it leads to more efficient overall communication. 
Additionally, we introduce a new technique that allows group members to decrease the information they need to store and download, which makes one of our protocols enjoy much lower download cost than any other existing CGKA constructions. },
  author       = {Pascual Perez, Guillermo},
  issn         = {2663-337X},
  pages        = {239},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{On the efficiency and security of secure group messaging}},
  doi          = {10.15479/at:ista:18088},
  year         = {2024},
}

@phdthesis{17490,
  abstract     = {Deep learning is essential in numerous applications nowadays, with many recent advancements made possible by training very large models. Despite their broad applicability, training neural networks is often time-intensive, and it is usually impractical to manage large models and datasets on a single machine. To address these issues, distributed deep learning training has become increasingly important. However, distributed training requires synchronization among nodes, and the mini-batch stochastic gradient descent algorithm places a significant load on network connections. A possible solution to tackle the synchronization bottleneck is to reduce a message size by lossy compression.

In this thesis, we investigate systems and algorithmic approaches to communication compression during training. From the systems perspective, we demonstrate that a common approach of expensive hardware overprovisioning can be replaced through a thorough system design. We introduce a framework that introduces efficient software support for compressed communication in machine learning applications, applicable to both multi-GPU single-node training and larger-scale multi-node training. Our framework integrates with popular ML frameworks, providing up to 3x speedups for multi-GPU nodes based on commodity hardware and order-of-magnitude improvements in the multi-node setting, with negligible impact on accuracy.

Also, we consider an application of our framework to different communication schemes, such as Fully Sharded Data Parallel. We provide strong convergence guarantees for the compression in such a setup. Empirical validation shows that our method preserves model accuracy for GPT-family models with up to 1.3 billion parameters, while completely removing the communication bottlenecks of non-compressed alternatives, providing up to 2.2x speedups end-to-end.

From the algorithmic side, we propose a general framework that dynamically adjusts the degree of compression across a model's layers during training. This approach enhances overall compression and results in significant speedups without compromising accuracy. Our algorithm utilizes an adaptive algorithm that automatically selects the optimal compression parameters for model layers, ensuring the best compression ratio while adhering to an error constraint. Our method is effective across all existing families of compression methods. It achieves up to 2.5x faster training and up to a 5x improvement in compression compared to efficient implementations of current approaches. Additionally, LGreCo can complement existing adaptive algorithms.
},
  author       = {Markov, Ilia},
  issn         = {2663-337X},
  pages        = {102},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Communication-efficient distributed training of deep neural networks : An algorithms and systems perspective}},
  doi          = {10.15479/at:ista:17490},
  year         = {2024},
}

@inproceedings{18086,
  abstract     = {Abstract. Continuous group key agreement (CGKA) allows a group of
users to maintain a continuously updated shared key in an asynchronous
setting where parties only come online sporadically and their messages
are relayed by an untrusted server. CGKA captures the basic primitive
underlying group messaging schemes.
Current solutions including TreeKEM (“Messaging Layer Security”
(MLS) IETF RFC 9420) cannot handle concurrent requests while retaining low communication complexity. The exception being CoCoA, which
is concurrent while having extremely low communication complexity (in
groups of size n and for m concurrent updates the communication per
user is log(n), i.e., independent of m). The main downside of CoCoA
is that in groups of size n, users might have to do up to log(n) update
requests to the server to ensure their (potentially corrupted) key material has been refreshed.
In this work we present a “fast healing” concurrent CGKA protocol,
named DeCAF, where users will heal after at most log(t) requests, with
t being the number of corrupted users. While also suitable for the standard central-server setting, our protocol is particularly interesting for
realizing decentralized group messaging, where protocol messages (add,
remove, update) are being posted on some append-only data structure
rather than sent to a server. In this setting, concurrency is crucial once
the rate of requests exceeds, say, the rate at which new blocks are added
to a blockchain.
In the central-server setting, CoCoA (the only alternative with concurrency, sub-linear communication and basic post-compromise security)
enjoys much lower download communication. However, in the decentralized setting – where there is no server which can craft specific messages
for different users to reduce their download communication – our protocol
significantly outperforms CoCoA. DeCAF heals in fewer epochs (log(t)
vs. log(n)) while incurring a similar per epoch per user communication
cost.},
  author       = {Alwen, Joel F and Auerbach, Benedikt and Cueto Noval, Miguel and Klein, Karen and Pascual Perez, Guillermo and Pietrzak, Krzysztof Z},
  booktitle    = {Security and Cryptography for Networks: 14th International Conference},
  editor       = {Galdi, Clemente and Phan, Duong Hieu},
  isbn         = {9783031710728},
  issn         = {1611-3349},
  location     = {Amalfi, Italy},
  pages        = {294–313},
  publisher    = {Springer Nature},
  title        = {{DeCAF: Decentralizable CGKA with fast healing}},
  doi          = {10.1007/978-3-031-71073-5_14},
  volume       = {14974},
  year         = {2024},
}

@inproceedings{17456,
  abstract     = {Data-parallel distributed training of deep neural networks (DNN) has gained very widespread adoption, but can still experience communication bottlenecks. To address this issue, entire families of compression mechanisms have been developed, including quantization, sparsification, and low-rank approximation, some of which are seeing significant practical adoption. Despite this progress, almost all known compression schemes apply compression uniformly across DNN layers, although layers are heterogeneous in terms of parameter count and their impact on model accuracy.In this work, we provide a general framework for adapting the degree of compression across the model's layers dynamically during training, improving the overall compression, while leading to substantial speedups, without sacrificing accuracy. Our framework, called L-GreCo, is based on an adaptive algorithm, which automatically picks the optimal compression parameters for model layers guaranteeing the best compression ratio while satisfying an error constraint. Extensive experiments over image classification and language modeling tasks shows that L-GreCo is effective across all existing families of compression methods, and achieves up to 2.5
×
 training speedup and up to 5
×
 compression improvement over efficient implementations of existing approaches, while recovering full accuracy. Moreover, L-GreCo is complementary to existing adaptive algorithms, improving their compression ratio by 50\% and practical throughput by 66\%. An anonymized implementation is available at https://github.com/LGrCo/L-GreCo.},
  author       = {Markov, Ilia and Alimohammadi, Kaveh and Frantar, Elias and Alistarh, Dan-Adrian},
  booktitle    = {Proceedings of Machine Learning and Systems },
  editor       = {Gibbons, P. and Pekhimenko, G. and De Sa, C.},
  location     = {Athens, Greece},
  publisher    = {Association for Computing Machinery},
  title        = {{L-GreCo: Layerwise-adaptive gradient compression for efficient data-parallel deep learning}},
  volume       = {6},
  year         = {2024},
}

@article{14542,
  abstract     = {It is a remarkable property of BCS theory that the ratio of the energy gap at zero temperature Ξ
 and the critical temperature Tc is (approximately) given by a universal constant, independent of the microscopic details of the fermionic interaction. This universality has rigorously been proven quite recently in three spatial dimensions and three different limiting regimes: weak coupling, low density and high density. The goal of this short note is to extend the universal behavior to lower dimensions d=1,2 and give an exemplary proof in the weak coupling limit.},
  author       = {Henheik, Sven Joscha and Lauritsen, Asbjørn Bækgaard and Roos, Barbara},
  issn         = {1793-6659},
  journal      = {Reviews in Mathematical Physics},
  number       = {9},
  publisher    = {World Scientific Publishing},
  title        = {{Universality in low-dimensional BCS theory}},
  doi          = {10.1142/s0129055x2360005x},
  volume       = {36},
  year         = {2024},
}

@article{18107,
  abstract     = {We consider a dilute fully spin-polarized Fermi gas at positive temperature in dimensions  d∈{1,2,3} . We show that the pressure of the interacting gas is bounded from below by that of the free gas plus, to leading order, an explicit term of order  adρ2+2/d, where a is the p-wave scattering length of the repulsive interaction and  ρ  is the particle density. The results are valid for a wide range of repulsive interactions, including that of a hard core, and uniform in temperatures at most of the order of the Fermi temperature. A central ingredient in the proof is a rigorous implementation of the fermionic cluster expansion of Gaudin, Gillespie and Ripka (Nucl. Phys. A, 176.2 (1971), pp. 237–260).},
  author       = {Lauritsen, Asbjørn Bækgaard and Seiringer, Robert},
  issn         = {2050-5094},
  journal      = {Forum of Mathematics, Sigma},
  publisher    = {Cambridge University Press},
  title        = {{Pressure of a dilute spin-polarized Fermi gas: Lower bound}},
  doi          = {10.1017/fms.2024.56},
  volume       = {12},
  year         = {2024},
}

@phdthesis{17164,
  abstract     = {This thesis is structured into two parts. In the first part, we consider the random
variable X := Tr(f1(W)A1 . . . fk(W)Ak) where W is an N × N Hermitian Wigner matrix, k ∈ N, and we choose (possibly N-dependent) regular functions f1, . . . , fk as well as
bounded deterministic matrices A1, . . . , Ak. In this context, we prove a functional central
limit theorem on macroscopic and mesoscopic scales, showing that the fluctuations of X
around its expectation are Gaussian and that the limiting covariance structure is given
by a deterministic recursion. We further give explicit error bounds in terms of the scaling
of f1, . . . , fk and the number of traceless matrices among A1, . . . , Ak, thus extending
the results of Cipolloni, Erdős and Schröder [40] to products of arbitrary length k ≥ 2.
Analyzing the underlying combinatorics leads to a non-recursive formula for the variance
of X as well as the covariance of X and Y := Tr(fk+1(W)Ak+1 . . . fk+ℓ(W)Ak+ℓ) of similar
build. When restricted to polynomials, these formulas reproduce recent results of Male,
Mingo, Peché, and Speicher [107], showing that the underlying combinatorics of noncrossing partitions and annular non-crossing permutations continue to stay valid beyond
the setting of second-order free probability theory. As an application, we consider the
fluctuation of Tr(eitW A1e
−itW A2)/N around its thermal value Tr(A1) Tr(A2)/N2 when t
is large and give an explicit formula for the variance.
The second part of the thesis collects three smaller projects focusing on different random
matrix models. In the first project, we show that a class of weakly perturbed Hamiltonians
of the form Hλ = H0 + λW, where W is a Wigner matrix, exhibits prethermalization.
That is, the time evolution generated by Hλ relaxes to its ultimate thermal state via an
intermediate prethermal state with a lifetime of order λ
−2
. As the main result, we obtain
a general relaxation formula, expressing the perturbed dynamics via the unperturbed
dynamics and the ultimate thermal state. The proof relies on a two-resolvent global law
for the deformed Wigner matrix Hλ.
The second project focuses on correlated random matrices, more precisely on a correlated N × N Hermitian random matrix with a polynomially decaying metric correlation
structure. A trivial a priori bound shows that the operator norm of this model is stochastically dominated by √
N. However, by calculating the trace of the moments of the matrix
and using the summable decay of the cumulants, the norm estimate can be improved to a
bound of order one.
In the third project, we consider a multiplicative perturbation of the form UA(t) where U
is a unitary random matrix and A = diag(t, 1, ..., 1). This so-called UA model was
first introduced by Fyodorov [73] for its applications in scattering theory. We give a
general description of the eigenvalue trajectories obtained by varying the parameter t and
introduce a flow of deterministic domains that separates the outlier resulting from the
rank-one perturbation from the typical eigenvalues for all sub-critical timescales. The
results are obtained under generic assumptions on U that hold for various unitary random
matrices, including the circular unitary ensemble (CUE) in the original formulation of
the model.},
  author       = {Reker, Jana},
  issn         = {2663-337X},
  keywords     = {Random Matrices, Spectrum, Central Limit Theorem, Resolvent, Free Probability},
  pages        = {206},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Central limit theorems for random matrices: From resolvents to free probability}},
  doi          = {10.15479/at:ista:17164},
  year         = {2024},
}

