[{"issue":"6","date_created":"2019-07-31T09:39:42Z","arxiv":1,"article_type":"original","publication_status":"published","isi":1,"citation":{"ista":"Javanmard A, Mondelli M, Montanari A. 2020. Analysis of a two-layer neural network via displacement convexity. Annals of Statistics. 48(6), 3619–3642.","ieee":"A. Javanmard, M. Mondelli, and A. Montanari, “Analysis of a two-layer neural network via displacement convexity,” <i>Annals of Statistics</i>, vol. 48, no. 6. Institute of Mathematical Statistics, pp. 3619–3642, 2020.","chicago":"Javanmard, Adel, Marco Mondelli, and Andrea Montanari. “Analysis of a Two-Layer Neural Network via Displacement Convexity.” <i>Annals of Statistics</i>. Institute of Mathematical Statistics, 2020. <a href=\"https://doi.org/10.1214/20-AOS1945\">https://doi.org/10.1214/20-AOS1945</a>.","mla":"Javanmard, Adel, et al. “Analysis of a Two-Layer Neural Network via Displacement Convexity.” <i>Annals of Statistics</i>, vol. 48, no. 6, Institute of Mathematical Statistics, 2020, pp. 3619–42, doi:<a href=\"https://doi.org/10.1214/20-AOS1945\">10.1214/20-AOS1945</a>.","apa":"Javanmard, A., Mondelli, M., &#38; Montanari, A. (2020). Analysis of a two-layer neural network via displacement convexity. <i>Annals of Statistics</i>. Institute of Mathematical Statistics. <a href=\"https://doi.org/10.1214/20-AOS1945\">https://doi.org/10.1214/20-AOS1945</a>","short":"A. Javanmard, M. Mondelli, A. Montanari, Annals of Statistics 48 (2020) 3619–3642."},"quality_controlled":"1","publication":"Annals of Statistics","article_processing_charge":"No","page":"3619-3642","scopus_import":"1","creator":{"id":"4435EBFC-F248-11E8-B48F-1D18A9856A87","login":"apreinsp"},"publication_identifier":{"issn":[],"eissn":[]},"department":[{"_id":"MaMo","tree":[{"_id":"ResearchGroups"},{"_id":"IST"}]}],"oa":1,"oa_version":"Preprint","_id":"6748","dc":{"publisher":["Institute of Mathematical Statistics"],"creator":["Javanmard, Adel","Mondelli, Marco ; https://orcid.org/0000-0002-3242-7020","Montanari, Andrea"],"title":["Analysis of a two-layer neural network via displacement convexity"],"relation":["info:eu-repo/semantics/altIdentifier/doi/10.1214/20-AOS1945","info:eu-repo/semantics/altIdentifier/issn/1932-6157","info:eu-repo/semantics/altIdentifier/e-issn/1941-7330","info:eu-repo/semantics/altIdentifier/wos/000598369200021","info:eu-repo/semantics/altIdentifier/arxiv/1901.01375"],"source":["Javanmard A, Mondelli M, Montanari A. Analysis of a two-layer neural network via displacement convexity. <i>Annals of Statistics</i>. 2020;48(6):3619-3642. doi:<a href=\"https://doi.org/10.1214/20-AOS1945\">10.1214/20-AOS1945</a>"],"rights":["info:eu-repo/semantics/openAccess"],"identifier":["https://research-explorer.ista.ac.at/record/6748"],"date":["2020"],"type":["info:eu-repo/semantics/article","doc-type:article","text","http://purl.org/coar/resource_type/c_2df8fbb1"],"description":["Fitting a function by using linear combinations of a large number N of `simple' components is one of the most fruitful ideas in statistical learning. This idea lies at the core of a variety of methods, from two-layer neural networks to kernel regression, to boosting. In general, the resulting risk minimization problem is non-convex and is solved by gradient descent or its variants. Unfortunately, little is known about global convergence properties of these approaches.\r\nHere we consider the problem of learning a concave function f on a compact convex domain Ω⊆ℝd, using linear combinations of `bump-like' components (neurons). The parameters to be fitted are the centers of N bumps, and the resulting empirical risk minimization problem is highly non-convex. We prove that, in the limit in which the number of neurons diverges, the evolution of gradient descent converges to a Wasserstein gradient flow in the space of probability distributions over Ω. Further, when the bump width δ tends to 0, this gradient flow has a limit which is a viscous porous medium equation. Remarkably, the cost function optimized by this gradient flow exhibits a special property known as displacement convexity, which implies exponential convergence rates for N→∞, δ→0. Surprisingly, this asymptotic theory appears to capture well the behavior for moderate values of δ,N. Explaining this phenomenon, and understanding the dependence on δ,N in a quantitative manner remains an outstanding challenge."],"language":["eng"]},"uri_base":"https://research-explorer.ista.ac.at","tmp_coar":"journal_article_original","date_published":"2020-12-11T00:00:00Z","volume":48,"month":"12","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1901.01375"}],"date_updated":"2024-10-21T06:02:33Z","author":[{"last_name":"Javanmard","full_name":"Javanmard, Adel","first_name":"Adel"},{"orcid":"https://orcid.org/0000-0002-3242-7020","last_name":"Mondelli","full_name":"Mondelli, Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","first_name":"Marco"},{"full_name":"Montanari, Andrea","first_name":"Andrea","last_name":"Montanari"}],"external_id":{"isi":[],"arxiv":[]},"status":"public","type":"journal_article","language":[{}],"intvolume":"        48","abstract":[{"lang":"eng"}],"dini_type":"doc-type:article","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","day":"11"}]