@inproceedings{21140,
  abstract     = {We consider several problems related to packing forests in graphs. The first one is to find k edge-disjoint forests in a directed graph G of maximal size such that the indegree of each vertex in these forests is at most k. We describe a min-max characterization for this problem and show that it can be solved in almost linear time for fixed k, extending the algorithm of [Gabow, 1995]. Specifically, the complexity is O(kδm log n), where n, m are the number of vertices and edges in G respectively, and δ = max{1, k − kG}, where kG is the edge connectivity of the graph. Using our solution to this problem, we improve complexities for two existing applications:(1) k-forest problem: find k forests in an undirected graph G maximizing the number of edges in their union. We show how to solve this problem in O(k3 min{kn, m} log2 n + k · MAXFLOW(m, m) log n) time, breaking the Ok(n3/2) complexity barrier of previously known approaches.(2) Directed edge-connectivity augmentation problem: find a smallest set of directed edges whose addition to the given directed graph makes it strongly k-connected. We improve the deterministic complexity for this problem from O(kδ(m + δn) log n) [Gabow, STOC 1994] to O(kδm log n). A similar approach with the same complexity also works for the undirected version of the problem.},
  author       = {Arkhipov, Pavel and Kolmogorov, Vladimir},
  booktitle    = {Proceedings of the 2026 Annual ACM-SIAM Symposium on Discrete Algorithms},
  location     = {Vancouver, Canada},
  pages        = {4023--4042},
  publisher    = {Society for Industrial and Applied Mathematics},
  title        = {{Faster algorithms for packing forests in graphs and related problems}},
  doi          = {10.1137/1.9781611978971.148},
  year         = {2026},
}

@article{18855,
  abstract     = {A central problem in computational statistics is to convert a procedure for sampling combinatorial objects into a procedure for counting those objects, and vice versa. We consider sampling problems which come from Gibbs distributions, which are families of probability distributions over a discrete space Ω with probability mass function of the form μ^Ω_β(ω) ∝ e^{β H(ω)} for β in an interval [β_min, β_max] and H(ω) ∈ {0} ∪ [1, n]. Two important parameters are the partition function, which is the normalization factor Z(β) = ∑_{ω ∈ Ω} e^{β H(ω)}, and the vector of pre-image counts c_x=|H^-1(x)|.
We develop black-box sampling algorithms to estimate the counts roughly Õ(n²/ε²) samples for integer-valued distributions and Õ(q/ε²) samples for general distributions, where q = (log Z(β_max))/Z(β_min)  (ignoring some second-order terms and parameters). We show this is optimal up to logarithmic factors. We illustrate with improved algorithms for counting connected subgraphs, independent sets, and perfect matchings. As a key subroutine, we estimate all values of the partition function using Õ(n²/ε²) samples for integer-valued distributions and Õ(q/ε²) samples for general distributions. This improves over a prior algorithm of Huber (2015) which computes a single point estimate Z(β_max) and which uses a slightly larger amount of samples. We show matching lower bounds, demonstrating this complexity is optimal as a function of n and q up to logarithmic terms.},
  author       = {Harris, David G. and Kolmogorov, Vladimir},
  issn         = {1549-6333},
  journal      = {ACM Transactions on Algorithms},
  number       = {1},
  publisher    = {Association for Computing Machinery},
  title        = {{Parameter estimation for Gibbs distributions}},
  doi          = {10.1145/3685676},
  volume       = {21},
  year         = {2025},
}

@article{10045,
  abstract     = {Given a fixed finite metric space (V,μ), the {\em minimum 0-extension problem}, denoted as 0-Ext[μ], is equivalent to the following optimization problem: minimize function of the form minx∈Vn∑ifi(xi)+∑ijcijμ(xi,xj) where cij,cvi are given nonnegative costs and fi:V→R are functions given by fi(xi)=∑v∈Vcviμ(xi,v). The computational complexity of 0-Ext[μ] has been recently established by Karzanov and by Hirai: if metric μ is {\em orientable modular} then 0-Ext[μ] can be solved in polynomial time, otherwise 0-Ext[μ] is NP-hard. To prove the tractability part, Hirai developed a theory of discrete convex functions on orientable modular graphs generalizing several known classes of functions in discrete convex analysis, such as L♮-convex functions. We consider a more general version of the problem in which unary functions fi(xi) can additionally have terms of the form cuv;iμ(xi,{u,v}) for {u,v}∈F, where set F⊆(V2) is fixed. We extend the complexity classification above by providing an explicit condition on (μ,F) for the problem to be tractable. In order to prove the tractability part, we generalize Hirai's theory and define a larger class of discrete convex functions. It covers, in particular, another well-known class of functions, namely submodular functions on an integer lattice. Finally, we improve the complexity of Hirai's algorithm for solving 0-Ext on orientable modular graphs.
},
  author       = {Dvorak, Martin and Kolmogorov, Vladimir},
  issn         = {1436-4646},
  journal      = {Mathematical Programming},
  keywords     = {minimum 0-extension problem, metric labeling problem, discrete metric spaces, metric extensions, computational complexity, valued constraint satisfaction problems, discrete convex analysis, L-convex functions},
  pages        = {279--322},
  publisher    = {Springer Nature},
  title        = {{Generalized minimum 0-extension problem and discrete convexity}},
  doi          = {10.1007/s10107-024-02064-5},
  volume       = {209},
  year         = {2025},
}

@article{21007,
  abstract     = {Currently, the best known tradeoff between approximation ratio and complexity for the Sparsest Cut problem is achieved by the algorithm in [Sherman, FOCS 2009]: it computes O(√(log n)/ε)-approximation using O(nε logO(1) n) maxflows for any ε∈[Θ(1/log n),Θ(1)]. It works by solving the SDP relaxation of [Arora-Rao-Vazirani, STOC 2004] using the Multiplicative Weights Update algorithm (MW) of [Arora-Kale, JACM 2016]. To implement one MW step, Sherman approximately solves a multicommodity flow problem using another application of MW. Nested MW steps are solved via a certain "chaining" algorithm that combines results of multiple calls to the maxflow algorithm. We present an alternative approach that avoids solving the multicommodity flow problem and instead computes "violating paths". This simplifies Sherman's algorithm by removing a need for a nested application of MW, and also allows parallelization: we show how to compute O(√(log n)/ε)-approximation via O(logO(1) n) maxflows using O(nε) processors. We also revisit Sherman's chaining algorithm, and present a simpler version together with a new analysis.},
  author       = {Kolmogorov, Vladimir},
  issn         = {1549-6333},
  journal      = {ACM Transactions on Algorithms},
  number       = {4},
  pages        = {1--22},
  publisher    = {Association for Computing Machinery},
  title        = {{A simpler and parallelizable O(√log n)-approximation algorithm for SPARSEST CUT}},
  doi          = {10.1145/3748723},
  volume       = {21},
  year         = {2025},
}

@article{21143,
  abstract     = {The Lovász Local Lemma (LLL) is a powerful tool in probabilistic
combinatorics which can be used to establish the existence of objects with certain
properties. The breakthrough paper by Moser & Tardos (STOC’09 and JACM 2010)
and follow-up work revealed that the LLL has intimate connections with a class of
stochastic local search algorithms for finding such desirable objects.
Besides conditions for convergence, many other natural questions can be asked
about algorithms; for instance, “are they parallelizable?”, “how many solutions can
they output?”, “what is the expected ‘weight’ of a solution?”. These questions and
more have been answered for a class of LLL-inspired algorithms called commutative. In
this paper we introduce a new, very natural and more general notion of commutativity
(essentially matrix commutativity) which allows us to show a number of new refined
properties of LLL-inspired local search algorithms with significantly simpler proofs.},
  author       = {Harris, David G. and Iliopoulos, Fotios and Kolmogorov, Vladimir},
  issn         = {1557-2862},
  journal      = {Theory of Computing},
  number       = {5},
  pages        = {1 -- 34},
  publisher    = {University of Chicago Press},
  title        = {{A new notion of commutativity for the algorithmic Lovász Local Lemma}},
  doi          = {10.4086/toc.2025.v021a005},
  volume       = {21},
  year         = {2025},
}

@article{21144,
  abstract     = {This paper deals with the algorithmic aspects of solving feasibility problems of semidefinite programming (SDP), aka linear matrix inequalities (LMIs). Since in some SDP instances all feasible solutions have irrational entries, numerical solvers that work with rational numbers can only find an approximate solution. We study the following question: Is it possible to certify feasibility of a given SDP using an approximate solution that is sufficiently close to some exact solution? Existing approaches make the assumption that there exist rational feasible solutions (and use techniques such as rounding and lattice reduction algorithms). We propose an alternative approach that does not need this assumption. More specifically, we show how to construct a system of polynomial equations whose set of real solutions is guaranteed to have an isolated correct solution (assuming that the target exact solution is maximum-rank). This allows, in particular, for us to use algorithms from real algebraic geometry for solving systems of polynomial equations, yielding a hybrid (or symbolic-numerical) method for SDPs. We experimentally compare it with a pure symbolic method in [D. Henrion, S. Naldi, and M. Safey El Din, SIAM J. Optim., 26 (2016), pp. 2512–2539]; the hybrid method was able to certify feasibility of many SDP instances on which the aforementioned paper failed. Our approach may have further applications, such as refining an approximate solution using methods of numerical algebraic geometry for systems of polynomial equations.},
  author       = {Kolmogorov, Vladimir and Naldi, Simone and Zapata, Jeferson},
  issn         = {1095-7189},
  journal      = {SIAM Journal on Optimization},
  number       = {3},
  pages        = {1630--1654},
  publisher    = {Society for Industrial and Applied Mathematics},
  title        = {{Certifying solutions of degenerate semidefinite programs}},
  doi          = {10.1137/24m1664691},
  volume       = {35},
  year         = {2025},
}

@unpublished{21398,
  abstract     = {Seymour's decomposition theorem is a hallmark result in matroid theory presenting a structural characterization of the class of regular matroids. Formalization of matroid theory faces many challenges, most importantly that only a limited number of notions and results have been implemented so far. In this work, we formalize the proof of the forward (composition) direction of Seymour's theorem for regular matroids. To this end, we develop a library in Lean 4 that implements definitions and results about totally unimodular matrices, vector matroids, their standard representations, regular matroids, and 1-, 2-, and 3-sums of matrices and binary matroids given by their standard representations. Using this framework, we formally state Seymour's decomposition theorem and implement a formally verified proof of the composition direction in the setting where the matroids have finite rank and may have infinite ground sets.},
  author       = {Dvorak, Martin and Figueroa-Reid, Tristan and Hamadani, Rida and Hwang, Byung-Hak and Karunus, Evgenia and Kolmogorov, Vladimir and Meiburg, Alexander and Nelson, Alexander and Nelson, Peter and Sandey, Mark and Sergeev, Ivan},
  booktitle    = {arXiv},
  pages        = {21},
  title        = {{Composition direction of Seymour's theorem for regular matroids — Formally verified}},
  doi          = {10.48550/arXiv.2509.20539},
  year         = {2025},
}

@inproceedings{17236,
  abstract     = {Currently, the best known tradeoff between approximation ratio and complexity for the Sparsest Cut problem is achieved by the algorithm in [Sherman, FOCS 2009]: it computes O(√(log n)/ε)-approximation using O(nε logO(1) n) maxflows for any ε∈[Θ(1/log n),Θ(1)]. It works by solving the SDP relaxation of [Arora-Rao-Vazirani, STOC 2004] using the Multiplicative Weights Update algorithm (MW) of [Arora-Kale, JACM 2016]. To implement one MW step, Sherman approximately solves a multicommodity flow problem using another application of MW. Nested MW steps are solved via a certain "chaining" algorithm that combines results of multiple calls to the maxflow algorithm.
We present an alternative approach that avoids solving the multicommodity flow problem and instead computes "violating paths". This simplifies Sherman's algorithm by removing a need for a nested application of MW, and also allows parallelization: we show how to compute O(√(log n)/ε)-approximation via O(logO(1) n) maxflows using O(nε) processors.
We also revisit Sherman's chaining algorithm, and present a simpler version together with a new analysis.},
  author       = {Kolmogorov, Vladimir},
  booktitle    = {Proceedings of the 36th ACM Symposium on Parallelism in Algorithms and Architectures},
  isbn         = {9798400704161},
  issn         = {1548-6109},
  location     = {Nantes, France},
  pages        = {403--414},
  publisher    = {Association for Computing Machinery},
  title        = {{A simpler and parallelizable O(√log n)-approximation algorithm for sparsest cut}},
  doi          = {10.1145/3626183.3659969},
  year         = {2024},
}

@unpublished{20071,
  abstract     = {Farkas established that a system of linear inequalities has a solution if and only if we cannot obtain a contradiction by taking a linear combination of the inequalities. We state and formally prove several Farkas-like theorems over linearly ordered fields in Lean 4. Furthermore, we extend duality theory to the case when some coefficients are allowed to take "infinite values".},
  author       = {Dvorak, Martin and Kolmogorov, Vladimir},
  booktitle    = {arXiv},
  keywords     = {Farkas lemma, linear programming, extended reals, calculus of inductive constructions},
  title        = {{Duality theory in linear optimization and its extensions -- formally  verified}},
  doi          = {10.48550/arXiv.2409.08119},
  year         = {2024},
}

@inproceedings{14084,
  abstract     = {A central problem in computational statistics is to convert a procedure for sampling combinatorial objects into a procedure for counting those objects, and vice versa. We will consider sampling problems which come from Gibbs distributions, which are families of probability distributions over a discrete space Ω with probability mass function of the form μ^Ω_β(ω) ∝ e^{β H(ω)} for β in an interval [β_min, β_max] and H(ω) ∈ {0} ∪ [1, n].
The partition function is the normalization factor Z(β) = ∑_{ω ∈ Ω} e^{β H(ω)}, and the log partition ratio is defined as q = (log Z(β_max))/Z(β_min)
We develop a number of algorithms to estimate the counts c_x using roughly Õ(q/ε²) samples for general Gibbs distributions and Õ(n²/ε²) samples for integer-valued distributions (ignoring some second-order terms and parameters), We show this is optimal up to logarithmic factors. We illustrate with improved algorithms for counting connected subgraphs and perfect matchings in a graph.},
  author       = {Harris, David G. and Kolmogorov, Vladimir},
  booktitle    = {50th International Colloquium on Automata, Languages, and Programming},
  isbn         = {9783959772785},
  issn         = {1868-8969},
  location     = {Paderborn, Germany},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Parameter estimation for Gibbs distributions}},
  doi          = {10.4230/LIPIcs.ICALP.2023.72},
  volume       = {261},
  year         = {2023},
}

@inproceedings{14448,
  abstract     = {We consider the problem of solving LP relaxations of MAP-MRF inference problems, and in particular the method proposed recently in [16], [35]. As a key computational subroutine, it uses a variant of the Frank-Wolfe (FW) method to minimize a smooth convex function over a combinatorial polytope. We propose an efficient implementation of this subroutine based on in-face Frank-Wolfe directions, introduced in [4] in a different context. More generally, we define an abstract data structure for a combinatorial subproblem that enables in-face FW directions, and describe its specialization for tree-structured MAP-MRF inference subproblems. Experimental results indicate that the resulting method is the current state-of-art LP solver for some classes of problems. Our code is available at pub.ist.ac.at/~vnk/papers/IN-FACE-FW.html.},
  author       = {Kolmogorov, Vladimir},
  booktitle    = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
  isbn         = {9798350301298},
  issn         = {1063-6919},
  location     = {Vancouver, Canada},
  pages        = {11980--11989},
  publisher    = {IEEE},
  title        = {{Solving relaxations of MAP-MRF problems: Combinatorial in-face Frank-Wolfe directions}},
  doi          = {10.1109/CVPR52729.2023.01153},
  volume       = {2023},
  year         = {2023},
}

@article{10737,
  abstract     = {We consider two models for the sequence labeling (tagging) problem. The first one is a Pattern-Based Conditional Random Field (PB), in which the energy of a string (chain labeling) x=x1⁢…⁢xn∈Dn is a sum of terms over intervals [i,j] where each term is non-zero only if the substring xi⁢…⁢xj equals a prespecified word w∈Λ. The second model is a Weighted Context-Free Grammar (WCFG) frequently used for natural language processing. PB and WCFG encode local and non-local interactions respectively, and thus can be viewed as complementary. We propose a Grammatical Pattern-Based CRF model (GPB) that combines the two in a natural way. We argue that it has certain advantages over existing approaches such as the Hybrid model of Benedí and Sanchez that combines N-grams and WCFGs. The focus of this paper is to analyze the complexity of inference tasks in a GPB such as computing MAP. We present a polynomial-time algorithm for general GPBs and a faster version for a special case that we call Interaction Grammars.},
  author       = {Takhanov, Rustem and Kolmogorov, Vladimir},
  issn         = {1571-4128},
  journal      = {Intelligent Data Analysis},
  number       = {1},
  pages        = {257--272},
  publisher    = {IOS Press},
  title        = {{Combining pattern-based CRFs and weighted context-free grammars}},
  doi          = {10.3233/IDA-205623},
  volume       = {26},
  year         = {2022},
}

@inproceedings{10552,
  abstract     = {We study a class of convex-concave saddle-point problems of the form minxmaxy⟨Kx,y⟩+fP(x)−h∗(y) where K is a linear operator, fP is the sum of a convex function f with a Lipschitz-continuous gradient and the indicator function of a bounded convex polytope P, and h∗ is a convex (possibly nonsmooth) function. Such problem arises, for example, as a Lagrangian relaxation of various discrete optimization problems. Our main assumptions are the existence of an efficient linear minimization oracle (lmo) for fP and an efficient proximal map for h∗ which motivate the solution via a blend of proximal primal-dual algorithms and Frank-Wolfe algorithms. In case h∗ is the indicator function of a linear constraint and function f is quadratic, we show a O(1/n2) convergence rate on the dual objective, requiring O(nlogn) calls of lmo. If the problem comes from the constrained optimization problem minx∈Rd{fP(x)|Ax−b=0} then we additionally get bound O(1/n2) both on the primal gap and on the infeasibility gap. In the most general case, we show a O(1/n) convergence rate of the primal-dual gap again requiring O(nlogn) calls of lmo. To the best of our knowledge, this improves on the known convergence rates for the considered class of saddle-point problems. We show applications to labeling problems frequently appearing in machine learning and computer vision.},
  author       = {Kolmogorov, Vladimir and Pock, Thomas},
  booktitle    = {38th International Conference on Machine Learning},
  location     = {Virtual},
  title        = {{One-sided Frank-Wolfe algorithms for saddle problems}},
  year         = {2021},
}

@inproceedings{10072,
  abstract     = {The Lovász Local Lemma (LLL) is a powerful tool in probabilistic combinatorics which can be used to establish the existence of objects that satisfy certain properties. The breakthrough paper of Moser and Tardos and follow-up works revealed that the LLL has intimate connections with a class of stochastic local search algorithms for finding such desirable objects. In particular, it can be seen as a sufficient condition for this type of algorithms to converge fast. Besides conditions for existence of and fast convergence to desirable objects, one may naturally ask further questions regarding properties of these algorithms. For instance, "are they parallelizable?", "how many solutions can they output?", "what is the expected "weight" of a solution?", etc. These questions and more have been answered for a class of LLL-inspired algorithms called commutative. In this paper we introduce a new, very natural and more general notion of commutativity (essentially matrix commutativity) which allows us to show a number of new refined properties of LLL-inspired local search algorithms with significantly simpler proofs.},
  author       = {Harris, David G. and Iliopoulos, Fotis and Kolmogorov, Vladimir},
  booktitle    = {Approximation, Randomization, and Combinatorial Optimization. Algorithms and Techniques},
  isbn         = {978-3-9597-7207-5},
  issn         = {1868-8969},
  location     = {Virtual},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{A new notion of commutativity for the algorithmic Lovász Local Lemma}},
  doi          = {10.4230/LIPIcs.APPROX/RANDOM.2021.31},
  volume       = {207},
  year         = {2021},
}

@inproceedings{6725,
  abstract     = {A Valued Constraint Satisfaction Problem (VCSP) provides a common framework that can express a wide range of discrete optimization problems. A VCSP instance is given by a finite set of variables, a finite domain of labels, and an objective function to be minimized. This function is represented as a sum of terms where each term depends on a subset of the variables. To obtain different classes of optimization problems, one can restrict all terms to come from a fixed set Γ of cost functions, called a language. 
Recent breakthrough results have established a complete complexity classification of such classes with respect to language Γ: if all cost functions in Γ satisfy a certain algebraic condition then all Γ-instances can be solved in polynomial time, otherwise the problem is NP-hard. Unfortunately, testing this condition for a given language Γ is known to be NP-hard. We thus study exponential algorithms for this meta-problem. We show that the tractability condition of a finite-valued language Γ can be tested in O(3‾√3|D|⋅poly(size(Γ))) time, where D is the domain of Γ and poly(⋅) is some fixed polynomial. We also obtain a matching lower bound under the Strong Exponential Time Hypothesis (SETH). More precisely, we prove that for any constant δ<1 there is no O(3‾√3δ|D|) algorithm, assuming that SETH holds.},
  author       = {Kolmogorov, Vladimir},
  booktitle    = {46th International Colloquium on Automata, Languages and Programming},
  isbn         = {978-3-95977-109-2},
  issn         = {1868-8969},
  location     = {Patras, Greece},
  pages        = {77:1--77:12},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Testing the complexity of a valued CSP language}},
  doi          = {10.4230/LIPICS.ICALP.2019.77},
  volume       = {132},
  year         = {2019},
}

@article{7412,
  abstract     = {We develop a framework for the rigorous analysis of focused stochastic local search algorithms. These algorithms search a state space by repeatedly selecting some constraint that is violated in the current state and moving to a random nearby state that addresses the violation, while (we hope) not introducing many new violations. An important class of focused local search algorithms with provable performance guarantees has recently arisen from algorithmizations of the Lovász local lemma (LLL), a nonconstructive tool for proving the existence of satisfying states by introducing a background measure on the state space. While powerful, the state transitions of algorithms in this class must be, in a precise sense, perfectly compatible with the background measure. In many applications this is a very restrictive requirement, and one needs to step outside the class. Here we introduce the notion of measure distortion and develop a framework for analyzing arbitrary focused stochastic local search algorithms, recovering LLL algorithmizations as the special case of no distortion. Our framework takes as input an arbitrary algorithm of such type and an arbitrary probability measure and shows how to use the measure as a yardstick of algorithmic progress, even for algorithms designed independently of the measure.},
  author       = {Achlioptas, Dimitris and Iliopoulos, Fotis and Kolmogorov, Vladimir},
  issn         = {1095-7111},
  journal      = {SIAM Journal on Computing},
  number       = {5},
  pages        = {1583--1602},
  publisher    = {SIAM},
  title        = {{A local lemma for focused stochastical algorithms}},
  doi          = {10.1137/16m109332x},
  volume       = {48},
  year         = {2019},
}

@inproceedings{7468,
  abstract     = {We present a new proximal bundle method for Maximum-A-Posteriori (MAP) inference in structured energy minimization problems. The method optimizes a Lagrangean relaxation of the original energy minimization problem using a multi plane block-coordinate Frank-Wolfe method that takes advantage of the specific structure of the Lagrangean decomposition. We show empirically that our method outperforms state-of-the-art Lagrangean decomposition based algorithms on some challenging Markov Random Field, multi-label discrete tomography and graph matching problems.},
  author       = {Swoboda, Paul and Kolmogorov, Vladimir},
  booktitle    = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
  isbn         = {9781728132938},
  issn         = {1063-6919},
  location     = {Long Beach, CA, United States},
  publisher    = {IEEE},
  title        = {{Map inference via block-coordinate Frank-Wolfe algorithm}},
  doi          = {10.1109/CVPR.2019.01140},
  volume       = {2019-June},
  year         = {2019},
}

@inproceedings{7639,
  abstract     = {Deep neural networks (DNNs) have become increasingly important due to their excellent empirical performance on a wide range of problems. However, regularization is generally achieved by indirect means, largely due to the complex set of functions defined by a network and the difficulty in measuring function complexity. There exists no method in the literature for additive regularization based on a norm of the function, as is classically considered in statistical learning theory. In this work, we study the tractability of function norms for deep neural networks with ReLU activations. We provide, to the best of our knowledge, the first proof in the literature of the NP-hardness of computing function norms of DNNs of 3 or more layers. We also highlight a fundamental difference between shallow and deep networks. In the light on these results, we propose a new regularization strategy based on approximate function norms, and show its efficiency on a segmentation task with a DNN.},
  author       = {Rannen-Triki, Amal and Berman, Maxim and Kolmogorov, Vladimir and Blaschko, Matthew B.},
  booktitle    = {Proceedings of the 2019 International Conference on Computer Vision Workshop},
  isbn         = {9781728150239},
  location     = {Seoul, South Korea},
  publisher    = {IEEE},
  title        = {{Function norms for neural networks}},
  doi          = {10.1109/ICCVW.2019.00097},
  year         = {2019},
}

@inproceedings{273,
  abstract     = {The accuracy of information retrieval systems is often measured using complex loss functions such as the average precision (AP) or the normalized discounted cumulative gain (NDCG). Given a set of positive and negative samples, the parameters of a retrieval system can be estimated by minimizing these loss functions. However, the non-differentiability and non-decomposability of these loss functions does not allow for simple gradient based optimization algorithms. This issue is generally circumvented by either optimizing a structured hinge-loss upper bound to the loss function or by using asymptotic methods like the direct-loss minimization framework. Yet, the high computational complexity of loss-augmented inference, which is necessary for both the frameworks, prohibits its use in large training data sets. To alleviate this deficiency, we present a novel quicksort flavored algorithm for a large class of non-decomposable loss functions. We provide a complete characterization of the loss functions that are amenable to our algorithm, and show that it includes both AP and NDCG based loss functions. Furthermore, we prove that no comparison based algorithm can improve upon the computational complexity of our approach asymptotically. We demonstrate the effectiveness of our approach in the context of optimizing the structured hinge loss upper bound of AP and NDCG loss for learning models for a variety of vision tasks. We show that our approach provides significantly better results than simpler decomposable loss functions, while requiring a comparable training time.},
  author       = {Mohapatra, Pritish and Rolinek, Michal and Jawahar, C V and Kolmogorov, Vladimir and Kumar, M Pawan},
  booktitle    = {2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  isbn         = {9781538664209},
  location     = {Salt Lake City, UT, USA},
  pages        = {3693--3701},
  publisher    = {IEEE},
  title        = {{Efficient optimization for rank-based loss functions}},
  doi          = {10.1109/cvpr.2018.00389},
  year         = {2018},
}

@article{18,
  abstract     = {An N-superconcentrator is a directed, acyclic graph with N input nodes and N output nodes such that every subset of the inputs and every subset of the outputs of same cardinality can be connected by node-disjoint paths. It is known that linear-size and bounded-degree superconcentrators exist. We prove the existence of such superconcentrators with asymptotic density 25.3 (where the density is the number of edges divided by N). The previously best known densities were 28 [12] and 27.4136 [17].},
  author       = {Kolmogorov, Vladimir and Rolinek, Michal},
  issn         = {0381-7032},
  journal      = {Ars Combinatoria},
  number       = {10},
  pages        = {269 -- 304},
  publisher    = {Charles Babbage Research Centre},
  title        = {{Superconcentrators of density 25.3}},
  volume       = {141},
  year         = {2018},
}