@article{18266,
  abstract     = {Matrix games are the most basic model in game theory, and yet robustness with respect to small perturbations of the matrix entries is not fully understood. In this paper, we introduce value positivity and uniform value positivity, two properties that refine the notion of optimality in the context of polynomially perturbed matrix games. The first concept captures how the value depends on the perturbation parameter, and the second consists of the existence of a fixed strategy that guarantees the value of the unperturbed matrix game for every sufficiently small positive parameter. We provide polynomial-time algorithms to check whether a polynomially perturbed matrix game satisfies these properties. We further provide the functional form for a parameterized optimal strategy and the value function. Finally, we translate our results to linear programming and stochastic games, where value positivity is related to the existence of robust solutions.},
  author       = {Chatterjee, Krishnendu and Oliu-Barton, Miquel and Saona Urmeneta, Raimundo J},
  issn         = {1526-5471},
  journal      = {Mathematics of Operations Research},
  number       = {4},
  pages        = {2433--3282},
  publisher    = {Institute for Operations Research and the Management Sciences},
  title        = {{Value-positivity for matrix games}},
  doi          = {10.1287/moor.2022.0332},
  volume       = {50},
  year         = {2024},
}

@article{18554,
  abstract     = {We prove the Eigenstate Thermalization Hypothesis for general Wigner-type matrices in the bulk of the self-consistent spectrum, with optimal control on the fluctuations for obs ervables of arbitrary rank. As the main technical ingredient, we prove rank-uniform optimal local laws for one and two resolvents of a Wigner-type matrix with regular observables. Our results hold under very general conditions on the variance profile, even allowing many vanishing entries, demonstrating that Eigenstate Thermalization occurs robustly across a diverse class of random matrix ensembles, for which the underlying quantum system has a non-trivial spatial structure.},
  author       = {Erdös, László and Riabov, Volodymyr},
  issn         = {1432-0916},
  journal      = {Communications in Mathematical Physics},
  number       = {12},
  publisher    = {Springer Nature},
  title        = {{Eigenstate Thermalization Hypothesis for Wigner-type matrices}},
  doi          = {10.1007/s00220-024-05143-y},
  volume       = {405},
  year         = {2024},
}

@article{18307,
  abstract     = {Vaccination is the most effective tool to control infectious diseases. However, the evolution of vaccine resistance, exemplified by vaccine resistance in SARS-CoV-2, remains a concern. Here, we model complex vaccination strategies against a pathogen with multiple epitopes—molecules targeted by the vaccine. We found that a vaccine targeting one epitope was ineffective in preventing vaccine escape. Vaccine resistance in highly infectious pathogens was prevented by the full-epitope vaccine, that is, one targeting all available epitopes, but only when the rate of pathogen evolution was low. Strikingly, a bet-hedging strategy of random administration of vaccines targeting different epitopes was the most effective in preventing vaccine resistance in pathogens with the low rate of infection and high rate of evolution. Thus, complex vaccination strategies, when biologically feasible, may be preferable to the currently used single-vaccine approaches for long-term control of disease outbreaks, especially when applied to livestock with near 100% vaccination rates.},
  author       = {Rella, Simon and Kulikova, Yuliya A. and Minnegalieva, Aygul and Kondrashov, Fyodor},
  issn         = {1558-5646},
  journal      = {Evolution: International journal of organic evolution},
  number       = {10},
  pages        = {1722--1738},
  publisher    = {Oxford University Press},
  title        = {{Complex vaccination strategies prevent the emergence of vaccine resistance}},
  doi          = {10.1093/evolut/qpae106},
  volume       = {78},
  year         = {2024},
}

@unpublished{20701,
  abstract     = {A Proof of Exponentiation (PoE) allows a prover to efficiently convince a verifier that 𝑦 = 𝑥
𝑒
in some group of unknown order. PoEs
are the basis for practical constructions of Verifiable Delay Functions (VDFs), which, in turn, are important for various higher-level
protocols in distributed computing. In applications such as distributed consensus, many PoEs are generated regularly, motivating
protocols for secure aggregation of batches of statements into a
few statements to improve the efficiency for both parties. Rotem
(TCC 2021) recently presented two such generic batch PoEs.
In this work, we introduce two batch PoEs that outperform
both proposals of Rotem and we evaluate their practicality. First,
we show that the two batch PoEs of Rotem can be combined to
improve the overall efficiency by at least a factor of two. Second, we
revisit the work of Bellare, Garay, and Rabin (EUROCRYPT 1998)
on batch verification of digital signatures and show that, under the
low order assumption, their bucket test can be securely adapted to
the setting of groups of unknown order. The resulting batch PoE
quickly outperforms the state of the art in the expected number of
group multiplications with the growing number of instances, and it
decreases the cost of batching by an order of magnitude already for
hundreds of thousands of instances. Importantly, it is the first batch
PoE that significantly decreases both the proof size and complexity
of verification. Our experimental evaluations show that even a nonoptimized implementation achieves such improvements, which
would match the demands of real-life systems requiring large-scale
PoE processing.
Finally, even though our proof techniques are conceptually similar to Rotem, we give an improved analysis of the application of the
low order assumption towards secure batching of PoE instances,
resulting in a tight reduction, which is important when setting the
security parameter in practice.},
  author       = {Hoffmann, Charlotte and Hubáček, Pavel and Ivanova, Svetlana},
  booktitle    = {Cryptology ePrint Archive},
  publisher    = {International Association for Cryptologic Research },
  title        = {{Practical batch proofs of exponentiation}},
  year         = {2024},
}

@inproceedings{15168,
  abstract     = {A linearly ordered (LO) k-colouring of a hypergraph is a colouring of its vertices with colours 1, … , k such that each edge contains a unique maximal colour. Deciding whether an input hypergraph admits LO k-colouring with a fixed number of colours is NP-complete (and in the special case of graphs, LO colouring coincides with the usual graph colouring). Here, we investigate the complexity of approximating the "linearly ordered chromatic number" of a hypergraph. We prove that the following promise problem is NP-complete: Given a 3-uniform hypergraph, distinguish between the case that it is LO 3-colourable, and the case that it is not even LO 4-colourable. We prove this result by a combination of algebraic, topological, and combinatorial methods, building on and extending a topological approach for studying approximate graph colouring introduced by Krokhin, Opršal, Wrochna, and Živný (2023).},
  author       = {Filakovský, Marek and Nakajima, Tamio Vesa and Opršal, Jakub and Tasinato, Gianluca and Wagner, Uli},
  booktitle    = {41st International Symposium on Theoretical Aspects of Computer Science},
  isbn         = {9783959773119},
  issn         = {1868-8969},
  location     = {Clermont-Ferrand, France},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Hardness of linearly ordered 4-colouring of 3-colourable 3-uniform hypergraphs}},
  doi          = {10.4230/LIPIcs.STACS.2024.34},
  volume       = {289},
  year         = {2024},
}

@article{18656,
  abstract     = {We consider the time evolution of the out-of-time-ordered correlator (OTOC) of two general observables 
 and 
 in a mean field chaotic quantum system described by a random Wigner matrix as its Hamiltonian. We rigorously identify three time regimes separated by the physically relevant scrambling and relaxation times. The main feature of our analysis is that we express the error terms in the optimal Schatten (tracial) norms of the observables, allowing us to track the exact dependence of the errors on their rank. In particular, for significantly overlapping observables with low rank the OTOC is shown to exhibit a significant local maximum at the scrambling time, a feature that may not have been noticed in the physics literature before. Our main tool is a novel multi-resolvent local law with Schatten norms that unifies and improves previous local laws involving either the much cruder operator norm (cf. [10]) or the Hilbert-Schmidt norm (cf. [11]).},
  author       = {Cipolloni, Giorgio and Erdös, László and Henheik, Sven Joscha},
  issn         = {1095-0753},
  journal      = {Advances in Theoretical and Mathematical Physics},
  number       = {6},
  pages        = {2025--2083},
  publisher    = {International Press},
  title        = {{Out-of-time-ordered correlators for Wigner matrices}},
  doi          = {10.4310/ATMP.241031013250},
  volume       = {28},
  year         = {2024},
}

@article{17049,
  abstract     = {We consider large non-Hermitian NxN matrices with an additive independent, identically distributed (i.i.d.) noise for each matrix elements. We show that already a small noise of variance 1/N completely thermalises the bulk singular vectors, in particular they satisfy the strong form of Quantum Unique Ergodicity (QUE) with an optimal speed of convergence. In physics terms, we thus extend the Eigenstate Thermalisation Hypothesis, formulated originally by Deutsch [34] and proven for Wigner matrices in [23], to arbitrary non-Hermitian matrices with an i.i.d. noise. As a consequence we obtain an optimal lower bound on the diagonal overlaps of the corresponding non-Hermitian eigenvectors. This quantity, also known as the (square of the) eigenvalue condition number measuring the sensitivity of the eigenvalue to small perturbations, has notoriously escaped rigorous treatment beyond the explicitly computable Ginibre ensemble apart from the very recent upper bounds given in [7] and [45]. As a key tool, we develop a new systematic decomposition of general observables in random matrix theory that governs the size of products of resolvents with deterministic matrices in between.},
  author       = {Cipolloni, Giorgio and Erdös, László and Henheik, Sven Joscha and Schröder, Dominik J},
  issn         = {1096-0783},
  journal      = {Journal of Functional Analysis},
  number       = {4},
  publisher    = {Elsevier},
  title        = {{Optimal lower bound on eigenvector overlaps for non-Hermitian random matrices}},
  doi          = {10.1016/j.jfa.2024.110495},
  volume       = {287},
  year         = {2024},
}

@unpublished{19545,
  abstract     = {We prove the Eigenstate Thermalisation Hypothesis for Wigner matrices
uniformly in the entire spectrum, in particular near the spectral edges, with a
bound on the fluctuation that is optimal for any observable. This complements
earlier works of Cipolloni et. al. (Comm. Math. Phys. 388, 2021; Forum Math.,
Sigma 10, 2022) and Benigni et. al. (Comm. Math. Phys. 391, 2022; arXiv:
2303.11142) that were restricted either to the bulk of the spectrum or to
special observables. As a main ingredient, we prove a new multi-resolvent local
law that optimally accounts for the edge scaling.},
  author       = {Cipolloni, Giorgio and Erdös, László and Henheik, Sven Joscha},
  booktitle    = {arXiv},
  title        = {{Eigenstate thermalisation at the edge for Wigner matrices}},
  doi          = {10.48550/arXiv.2309.05488},
  year         = {2024},
}

@unpublished{19551,
  abstract     = {We introduce a notion of a \emph{local gap} for interacting many-body quantum lattice systems and prove the validity of response theory and Kubo's formula for localized perturbations in such settings.
On a high level, our result shows that the usual spectral gap condition, concerning the system as a whole, is not a necessary condition for understanding local properties of the system.
More precisely, we say that an equilibrium state ρ0 of a Hamiltonian H0 is locally gapped in Λgap⊂Λ, whenever the Liouvillian −i[H0,⋅] is almost invertible on local observables supported in Λgap when tested in ρ0.
To put this into context, we provide other alternative notions of a local gap and discuss their relations.
The validity of response theory is based on the construction of \emph{non-equilibrium almost stationary states} (NEASSs).
By controlling locality properties of the NEASS construction, we show that response theory holds to any order, whenever the perturbation \(\epsilon V\) acts in a region which is further than |logϵ| away from the non-gapped region Λ∖Λgap.},
  author       = {Henheik, Sven Joscha and Wessel, Tom},
  booktitle    = {arXiv},
  title        = {{Response theory for locally gapped systems}},
  doi          = {10.48550/arXiv.2410.10809},
  year         = {2024},
}

@unpublished{19550,
  abstract     = {We introduce a multi-band BCS free energy functional and prove that for a
multi-band superconductor the effect of inter-band coupling can only increase
the critical temperature, irrespective of its attractive or repulsive nature
and its strength. Further, for weak coupling and weaker inter-band coupling, we
prove that the dependence of the increase in critical temperature on the
inter-band coupling is (1) linear, if there are two or more equally strongly
superconducting bands, or (2) quadratic, if there is only one dominating band.},
  author       = {Henheik, Sven Joscha and Langmann, Edwin and Lauritsen, Asbjørn Bækgaard},
  booktitle    = {arXiv},
  title        = {{Multi-band superconductors have enhanced critical temperatures}},
  doi          = {10.48550/arXiv.2409.17297},
  year         = {2024},
}

@unpublished{19547,
  abstract     = {For correlated real symmetric or complex Hermitian random matrices, we prove
that the local eigenvalue statistics at any cusp singularity are universal.
Since the density of states typically exhibits only square root edge or cubic
root cusp singularities, our result completes the proof of the
Wigner-Dyson-Mehta universality conjecture in all spectral regimes for a very
general class of random matrices. Previously only the bulk and the edge
universality were established in this generality [arXiv:1804.07744], while cusp
universality was proven only for Wigner-type matrices with independent entries
[arXiv:1809.03971, arXiv:1811.04055]. As our main technical input, we prove an
optimal local law at the cusp using the Zigzag strategy, a recursive tandem of
the characteristic flow method and a Green function comparison argument.
Moreover, our proof of the optimal local law holds uniformly in the spectrum,
thus also re-establishing universality of the local eigenvalue statistics in
the previously studied bulk [arXiv:1705.10661] and edge [arXiv:1804.07744]
regimes.},
  author       = {Erdös, László and Henheik, Sven Joscha and Riabov, Volodymyr},
  booktitle    = {arXiv},
  title        = {{Cusp universality for correlated random matrices}},
  doi          = {10.48550/arXiv.2410.06813},
  year         = {2024},
}

@phdthesis{18588,
  abstract     = {This thesis is an experimental work about two distinct research projects that evolved from a single project: non-equilibrium dynamics of an acoustically vibrated particle and microfabrication of particles with nano-scale 3D printing. The first project explores non equilibrium dynamics of a particle driven by ultrasonic vibrations. We design an experimental system consisting of an electromechanical vibration scheme to drive the particle’s vibrations and an imaging scheme to track its trajectories. We study the trajectories to determine how the particle’s dynamics evolve under the driven conditions, considering out of equilibrium systems in the context of equilibrium statistical mechanics. Using a Langevin framework and the Boltzmann factor, we characterize the particle’s dynamics as complex; the particle motion
is not purely diffusive. We extract physical parameters like spring constant, effective temperature, damping coefficient and resonance frequency.

In the second project, we explore and develop techniques in the design and microfabrication of particles across scales. Microfabrication involves building structures at the micron or submicron scale. These designed miniaturized patterns, objects, or devices are useful in biophysics, pharmacology, medical biology, and nanotechnology. We specifically apply two-photon polymerization, a form of 3D nano printing. We print millimetric particles, characterizing different designs to evaluate and showcase the resolution, aspect ratio integrity and print quality of the printing process. We also design and fabricate a microsensor to deflect under applicable force of order 0.1 pN. We present fundamental concepts needed to design the microsensor, showcasing 3D printing at considerably smaller scales down to the µm or below.},
  author       = {Mweka, Cecelia N},
  issn         = {2791-4585},
  pages        = {61},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Non equilibrium dynamics of driven individual particles and 3D printing across scales}},
  doi          = {10.15479/at:ista:18588},
  year         = {2024},
}

@phdthesis{17225,
  abstract     = {This thesis describes the development of an atom interferometer designed to exploit the
advantages of utilizing quantum entanglement for enhanced precision measurements beyond
the standard quantum limit. While the project remains ongoing, significant progress has been
made.
A key contribution of this work is the development of Quantrol, an experimental control
system leveraging the ARTIQ framework. This software enables precise timing and control
without requiring prior knowledge of ARTIQ’s implementation details or coding experience.
The interface offers user friendly visual comprehension of the experimental sequence and
extended capabilities, allowing researchers to scan variables with a simple click of a mouse.
The main proposed project is to implement atom interferometric sequence with squeezed input
states inside of a dipole trap generated by a high finesse cavity. The presence of the dipole
trap allows one dimensional atomic cloud split while maintaining relatively strong confinement
in other directions.
We are currently able to trap and cool 87Rb atoms to few micro kelvin temperatures, load
them into the dipole trap and state prepare them to be used for squeezing and interferometric
sequence.},
  author       = {Li, Vyacheslav},
  issn         = {2663-337X},
  pages        = {79},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Towards a quantum entanglement enhanced atom interferomter}},
  doi          = {10.15479/at:ista:17225},
  year         = {2024},
}

@phdthesis{18443,
  abstract     = {In [KW06] Kapustin and Witten conjectured that there is a mirror symmetry relation between
the hyperkähler structures on certain Higgs bundle moduli spaces. As a consequence, they
conjecture an equivalence between categories of BBB and BAA-branes. At the classical
level, this mirror symmetry is given by T-duality between semi-flat hyperkähler structures on
algebraic integrable systems.
In this thesis, we investigate the T-duality relation between hyperkähler structures and the
corresponding branes on affine torus bundles. We use the techniques of generalized geometry
to show that semi-flat hyperkähler structures are T-dual on algebraic integrable systems.
We also describe T-duality for generalized branes. Motivated by Fourier-Mukai transform
we upgrade the T-duality between generalized branes to T-duality of submanifolds endowed
with U(1)-bundles and connections. This T-duality in the appropriate context specializes to
T-duality between BBB and BAA-branes.
},
  author       = {Sisak, Maria A},
  issn         = {2663-337X},
  keywords     = {hyperkaehler geometry, branes, mirror symmetry, T-duality},
  pages        = {178},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{T-dual branes on hyperkähler manifolds}},
  doi          = {10.15479/at:ista:18443},
  year         = {2024},
}

@phdthesis{17485,
  abstract     = {Large language models (LLMs) have made tremendous progress in the past few years, from being able to generate coherent text to matching or surpassing humans in a wide variety of creative, knowledge or reasoning tasks. Much of this can be attributed to massively increased scale, both in the size of the model as well as the amount of training data, from 100s of millions to 100s of billions, or even trillions. This trend is expected to continue, which, although exciting, also raises major practical concerns. Already today's 100+ billion parameter LLMs require top-of-the-line hardware just to run. Hence, it is clear that sustaining these developments will require significant efficiency advances.

Historically, one of the most practical ways of improving model efficiency has been compression, especially in the form of sparsity or quantization. While this has been studied extensively in the past, existing accurate methods are all designed for models around 100 million parameters; scaling them up to ones literally 1000x larger is highly challenging. In this thesis, we introduce a new unified sparsification and quantization approach OBC, which through additional algorithmic enhancements leads to GPTQ and SparseGPT, the first techniques fast and accurate enough to compress 100+ billion parameter models to 4- or even 3-bit precision and 50% weight-sparsity, respectively. Additionally, we show how weight-only quantizion does not just bring space savings but also up to 4.5x faster generation speed, via custom GPU kernels.

In fact, we show for the first time that it is possible to develop an FP16 times INT4 mixed-precision matrix multiplication kernel, called Marlin, which comes close to simultaneously maximizing both memory and compute utilization, making weight-only quantization highly practical even for multi-user serving. Further, we demonstrate that GPTQ can be scaled to widely overparametrized trillion-parameter models, where extreme sub-1-bit compression rates can be achieved without any inference slow-down, by co-designing a bespoke entropy coding scheme together with an efficient kernel.

Finally, we also study compression from the perspective of someone with access to massive amounts of compute resources for training large models completely from scratch. Here the key questions evolve around the joint scaling behavior between compression, model size, and amount of training data used. Based on extensive experimental results for both vision and text models, we introduce the first scaling law which accurately captures the relationship between weight-sparsity, number of non-zero weights and data. This further allows us to characterize the optimal sparsity, which we find to increase the longer a fixed cost model is being trained.

Overall, this thesis presents contributions to three different angles of large model efficiency: affordable but accurate algorithms, highly efficient systems implementations, and fundamental scaling laws for compressed training.},
  author       = {Frantar, Elias},
  issn         = {2663-337X},
  pages        = {129},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Compressing large neural networks : Algorithms, systems and scaling laws}},
  doi          = {10.15479/at:ista:17485},
  year         = {2024},
}

@inproceedings{18061,
  abstract     = {Mixture-of-Experts (MoE) architectures offer a general solution to the high inference costs of large language models (LLMs) via sparse routing, bringing faster and more accurate models, at the cost of massive parameter counts. For example, the SwitchTransformer-c2048 model has 1.6 trillion parameters, requiring 3.2TB of accelerator memory to run efficiently, which makes practical deployment challenging and expensive. In this paper, we present a solution to this memory problem, in form of a new compression and execution framework called QMoE. Specifically, QMoE consists of a scalable algorithm which accurately compresses trillion-parameter MoEs to less than 1 bit per parameter, in a custom format co-designed with bespoke GPU decoding kernels to facilitate efficient end-to-end compressed inference, with minor runtime overheads relative to uncompressed execution. Concretely, QMoE can compress the 1.6 trillion parameter SwitchTransformer-c2048 model to less than 160GB (20x compression, 0.8 bits per parameter) at only minor accuracy loss, in less than a day on a single GPU. This enables, for the first time, the execution of a trillion-parameter model on affordable commodity hardware, like a single server with 4x NVIDIA A6000 or 8x NVIDIA 3090 GPUs, at less than 5% runtime overhead relative to ideal uncompressed inference. The anonymized code is available at: github.com/mlsys24-qmoe/qmoe.},
  author       = {Frantar, Elias and Alistarh, Dan-Adrian},
  booktitle    = { Proceedings of Machine Learning and Systems},
  editor       = {Gibbons, P. and Pekhimenko, G. and De Sa, C.},
  location     = {Santa Clara, CA, USA},
  title        = {{QMoE: Sub-1-bit compression of trillion parameter models}},
  volume       = {6},
  year         = {2024},
}

@inproceedings{18062,
  abstract     = {We explore the impact of parameter sparsity on the scaling behavior of Transformers trained on massive datasets (i.e., "foundation models"), in both vision and language domains. In this setting, we identify the first scaling law describing the relationship between weight sparsity, number of non-zero parameters, and amount of training data, which we validate empirically across model and data scales; on ViT/JFT-4B and T5/C4. These results allow us to characterize the "optimal sparsity", the sparsity level which yields the best performance for a given effective model size and training budget. For a fixed number of non-zero parameters, we identify that the optimal sparsity increases with the amount of data used for training. We also extend our study to different sparsity structures (such as the hardware-friendly n:m pattern) and strategies (such as starting from a pretrained dense model). Our findings shed light on the power and limitations of weight sparsity across various parameter and computational settings, offering both theoretical understanding and practical implications for leveraging sparsity towards computational efficiency improvements. We provide pruning and scaling law fitting code at: github.com/google-research/jaxpruner/tree/main/jaxpruner/projects/bigsparse.},
  author       = {Frantar, Elias and Ruiz, Carlos Riquelme and Houlsby, Neil and Alistarh, Dan-Adrian and Evci, Utku},
  booktitle    = {The Twelfth International Conference on Learning Representations},
  location     = {Vienna, Austria},
  title        = {{Scaling laws for sparsely-connected foundation models}},
  year         = {2024},
}

@phdthesis{17208,
  abstract     = {Can current quantum computers provide a speedup over their classical counterparts for some kinds of problems? In this thesis, with a focus on ground state search/preparation, we address some of the challenges that both quantum annealing and variational quantum algorithms suffer from, hindering any possible practical speedup in comparison to the best classical counterparts. 

In the first part of the thesis, we study the performance of quantum annealing for solving a particular combinatorial optimization problem called 3-XOR satisfability (3-XORSAT). The classical problem is mapped into a ground state search of a 3-local classical Hamiltonian $H_C$. We consider how modifying the initial problem, by adding more interaction terms to the corresponding Hamiltonian, leads to the emergence of a first-order phase transition during the annealing process. This phenomenon causes the total annealing duration, $T$, required to prepare the ground state of $H_C$ with a high probability to increase exponentially with the size of the problem. Our findings indicate that with the growing complexity of problem instances, the likelihood of encountering first-order phase transitions also increases, making quantum annealing an impractical solution for these types of combinatorial optimization problems.

In the second part, we focus on the problem of barren plateaus in generic variational quantum algorithms. Barren plateaus correspond to flat regions in the parameter space where the gradient of the cost function is zero in expectation, and with the variance decaying exponentially with the system size, thus obstructing an efficient parameter optimization.  We propose an algorithm to circumvent Barren Plateaus by monitoring the entanglement entropy of k-local reduced density matrices, alongside a method for estimating entanglement entropy via classical shadow tomography. We illustrate the approach with the paradigmatic example of the variational quantum eigensolver, and show that our algorithm effectively avoids barren plateaus in the initialization as well as during the optimization stage. 

Lastly, in the last two Chapters of this thesis, we focus on the quantum approximate optimization algorithm (QAOA), originally introduced as an algorithm for solving generic combinatorial optimization problems in near-term quantum devices. Specifically, we focus on how to develop rigorous initialization strategies with guarantee improvement. Our motivation for this study lies in that for random initialization, the optimization typically leads to local minima with poor performance. Our main result corresponds to the analytical construction of index-1 saddle points or transition states, stationary points with a single direction of descent, as a tool for systematically exploring the QAOA optimization landscape. This leads us to propose a novel greedy parameter initialization strategy that guarantees for the energy to decrease with an increasing number of circuit layers. Furthermore, with precise estimates for the negative Hessian eigenvalue and its eigenvector, we establish a lower bound for energy improvement following a QAOA iteration.},
  author       = {Medina Ramos, Raimel A},
  issn         = {2663-337X},
  keywords     = {Quantum computing, Variational Quantum Algorithms, Optimization},
  pages        = {133},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Exploring the optimization landscape of variational quantum algorithms}},
  doi          = {10.15479/at:ista:17208},
  year         = {2024},
}

@unpublished{17222,
  abstract     = {The quantum approximate optimization algorithm (QAOA) uses a quantum computer
to implement a variational method with $2p$ layers of alternating unitary
operators, optimized by a classical computer to minimize a cost function. While
rigorous performance guarantees exist for the QAOA at small depths $p$, the
behavior at large depths remains less clear, though simulations suggest
exponentially fast convergence for certain problems. In this work, we gain
insights into the deep QAOA using an analytic expansion of the cost function
around transition states. Transition states are constructed in a recursive
manner: from the local minima of the QAOA with $p$ layers we obtain transition
states of the QAOA with $p+1$ layers, which are stationary points characterized
by a unique direction of negative curvature. We construct an analytic estimate
of the negative curvature and the corresponding direction in parameter space at
each transition state. The expansion of the QAOA cost function along the
negative direction to the quartic order gives a lower bound of the QAOA cost
function improvement. We provide physical intuition behind the analytic
expressions for the local curvature and quartic expansion coefficient. Our
numerical study confirms the accuracy of our approximations and reveals that
the obtained bound and the true value of the QAOA cost function gain have a
characteristic exponential decrease with the number of layers $p$, with the
bound decreasing more rapidly. Our study establishes an analytical method for
recursively studying the QAOA that is applicable in the regime of high circuit
depth.},
  author       = {Medina Ramos, Raimel A and Serbyn, Maksym},
  booktitle    = {arXiv},
  title        = {{A recursive lower bound on the energy improvement of the quantum approximate optimization algorithm}},
  doi          = {10.48550/arXiv.2405.10125},
  year         = {2024},
}

@phdthesis{18132,
  abstract     = {In this thesis, we are dealing with both arithmetic and geometric problems coming from the
study of rational points with a particular focus on function fields over finite fields:
(1) Using the circle method we produce upper bounds for the number of rational points of
bounded height on diagonal cubic surfaces and fourfolds over Fq(t). This is based on
joint work with Leonhard Hochfilzer.
(2) We study rational points on smooth complete intersections X defined by cubic and
quadratic hypersurfaces over Fq(t). We refine the Farey dissection of the “unit square”
developed by Vishe [202] and use the circle method with a Kloosterman refinement to
establish an asymptotic formula for the number of rational points of bounded height on
X when dim(X) ≥ 23. Under the same hypotheses, we also verify weak approximation.
(3) In joint work with Hochfilzer, we obtain upper bounds for the number of rational points of
bounded height on del Pezzo surfaces of low degree over any global field. Our approach
is to take hyperplane sections, which reduces the problem to uniform estimates for the
number of rational points on curves.
(4) We develop a version of the circle method capable of counting Fq-points on jet schemes
of moduli spaces of rational curves on hypersurfaces. Combining this with a spreading
out argument and a result of Mustaţă [150], this allows us to show that these moduli
spaces only have canonical singularities under suitable assumptions on the degree and the
dimension.
In addition, we give an overview of guiding questions and conjectures in the field of rational
points and explain the basic mechanism underlying the circle method.
},
  author       = {Glas, Jakob},
  issn         = {2663-337X},
  pages        = {195},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Counting rational points over function fields}},
  doi          = {10.15479/at:ista:18132},
  year         = {2024},
}