[{"citation":{"chicago":"Négiar, Geoffrey, Gideon Dresdner, Alicia Tsai, Laurent El Ghaoui, Francesco Locatello, Robert M. Freund, and Fabian Pedregosa. “Stochastic Frank-Wolfe for Constrained Finite-Sum Minimization.” In <i>Proceedings of the 37th International Conference on Machine Learning</i>, 119:7253–62, 2020.","ama":"Négiar G, Dresdner G, Tsai A, et al. Stochastic Frank-Wolfe for constrained finite-sum minimization. In: <i>Proceedings of the 37th International Conference on Machine Learning</i>. Vol 119. ; 2020:7253-7262.","ista":"Négiar G, Dresdner G, Tsai A, Ghaoui LE, Locatello F, Freund RM, Pedregosa F. 2020. Stochastic Frank-Wolfe for constrained finite-sum minimization. Proceedings of the 37th International Conference on Machine Learning. International Conference on Machine Learning, PMLR, vol. 119, 7253–7262.","apa":"Négiar, G., Dresdner, G., Tsai, A., Ghaoui, L. E., Locatello, F., Freund, R. M., &#38; Pedregosa, F. (2020). Stochastic Frank-Wolfe for constrained finite-sum minimization. In <i>Proceedings of the 37th International Conference on Machine Learning</i> (Vol. 119, pp. 7253–7262). Virtual.","mla":"Négiar, Geoffrey, et al. “Stochastic Frank-Wolfe for Constrained Finite-Sum Minimization.” <i>Proceedings of the 37th International Conference on Machine Learning</i>, vol. 119, 2020, pp. 7253–62.","short":"G. Négiar, G. Dresdner, A. Tsai, L.E. Ghaoui, F. Locatello, R.M. Freund, F. Pedregosa, in:, Proceedings of the 37th International Conference on Machine Learning, 2020, pp. 7253–7262.","ieee":"G. Négiar <i>et al.</i>, “Stochastic Frank-Wolfe for constrained finite-sum minimization,” in <i>Proceedings of the 37th International Conference on Machine Learning</i>, Virtual, 2020, vol. 119, pp. 7253–7262."},"day":"27","type":"conference","quality_controlled":"1","_id":"14187","language":[{"iso":"eng"}],"abstract":[{"lang":"eng","text":"We propose a novel Stochastic Frank-Wolfe (a.k.a. conditional gradient)\r\nalgorithm for constrained smooth finite-sum minimization with a generalized\r\nlinear prediction/structure. This class of problems includes empirical risk\r\nminimization with sparse, low-rank, or other structured constraints. The\r\nproposed method is simple to implement, does not require step-size tuning, and\r\nhas a constant per-iteration cost that is independent of the dataset size.\r\nFurthermore, as a byproduct of the method we obtain a stochastic estimator of\r\nthe Frank-Wolfe gap that can be used as a stopping criterion. Depending on the\r\nsetting, the proposed method matches or improves on the best computational\r\nguarantees for Stochastic Frank-Wolfe algorithms. Benchmarks on several\r\ndatasets highlight different regimes in which the proposed method exhibits a\r\nfaster empirical convergence than related methods. Finally, we provide an\r\nimplementation of all considered methods in an open-source package."}],"external_id":{"arxiv":["2002.11860"]},"main_file_link":[{"url":"https://arxiv.org/abs/2002.11860","open_access":"1"}],"date_published":"2020-07-27T00:00:00Z","year":"2020","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","intvolume":"       119","publication":"Proceedings of the 37th International Conference on Machine Learning","date_updated":"2023-09-12T08:03:40Z","page":"7253-7262","status":"public","publication_status":"published","conference":{"end_date":"2020-07-18","location":"Virtual","name":"International Conference on Machine Learning","start_date":"2020-07-13"},"oa_version":"Preprint","alternative_title":["PMLR"],"date_created":"2023-08-22T14:07:52Z","title":"Stochastic Frank-Wolfe for constrained finite-sum minimization","arxiv":1,"department":[{"_id":"FrLo"}],"month":"07","volume":119,"author":[{"full_name":"Négiar, Geoffrey","last_name":"Négiar","first_name":"Geoffrey"},{"first_name":"Gideon","full_name":"Dresdner, Gideon","last_name":"Dresdner"},{"last_name":"Tsai","full_name":"Tsai, Alicia","first_name":"Alicia"},{"first_name":"Laurent El","full_name":"Ghaoui, Laurent El","last_name":"Ghaoui"},{"last_name":"Locatello","full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco"},{"last_name":"Freund","full_name":"Freund, Robert M.","first_name":"Robert M."},{"full_name":"Pedregosa, Fabian","last_name":"Pedregosa","first_name":"Fabian"}],"oa":1,"extern":"1","article_processing_charge":"No"},{"article_processing_charge":"No","extern":"1","oa":1,"author":[{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","full_name":"Locatello, Francesco","last_name":"Locatello"},{"last_name":"Poole","full_name":"Poole, Ben","first_name":"Ben"},{"full_name":"Rätsch, Gunnar","last_name":"Rätsch","first_name":"Gunnar"},{"first_name":"Bernhard","full_name":"Schölkopf, Bernhard","last_name":"Schölkopf"},{"first_name":"Olivier","last_name":"Bachem","full_name":"Bachem, Olivier"},{"first_name":"Michael","last_name":"Tschannen","full_name":"Tschannen, Michael"}],"volume":119,"department":[{"_id":"FrLo"}],"month":"07","arxiv":1,"date_created":"2023-08-22T14:08:14Z","alternative_title":["PMLR"],"title":"Weakly-supervised disentanglement without compromises","oa_version":"Preprint","publication_status":"published","conference":{"end_date":"2020-07-18","location":"Virtual","name":"International Conference on Machine Learning","start_date":"2020-07-13"},"page":"6348–6359","status":"public","scopus_import":"1","date_updated":"2024-10-14T12:28:02Z","publication":"Proceedings of the 37th International Conference on Machine Learning","intvolume":"       119","year":"2020","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","date_published":"2020-07-07T00:00:00Z","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2002.02886"}],"_id":"14188","abstract":[{"text":"Intelligent agents should be able to learn useful representations by\r\nobserving changes in their environment. We model such observations as pairs of\r\nnon-i.i.d. images sharing at least one of the underlying factors of variation.\r\nFirst, we theoretically show that only knowing how many factors have changed,\r\nbut not which ones, is sufficient to learn disentangled representations.\r\nSecond, we provide practical algorithms that learn disentangled representations\r\nfrom pairs of images without requiring annotation of groups, individual\r\nfactors, or the number of factors that have changed. Third, we perform a\r\nlarge-scale empirical study and show that such pairs of observations are\r\nsufficient to reliably learn disentangled representations on several benchmark\r\ndata sets. Finally, we evaluate our learned representations and find that they\r\nare simultaneously useful on a diverse suite of tasks, including generalization\r\nunder covariate shifts, fairness, and abstract reasoning. Overall, our results\r\ndemonstrate that weak supervision enables learning of useful disentangled\r\nrepresentations in realistic scenarios.","lang":"eng"}],"language":[{"iso":"eng"}],"external_id":{"arxiv":["2002.02886"]},"type":"conference","quality_controlled":"1","day":"07","citation":{"chicago":"Locatello, Francesco, Ben Poole, Gunnar Rätsch, Bernhard Schölkopf, Olivier Bachem, and Michael Tschannen. “Weakly-Supervised Disentanglement without Compromises.” In <i>Proceedings of the 37th International Conference on Machine Learning</i>, 119:6348–6359, 2020.","ista":"Locatello F, Poole B, Rätsch G, Schölkopf B, Bachem O, Tschannen M. 2020. Weakly-supervised disentanglement without compromises. Proceedings of the 37th International Conference on Machine Learning. International Conference on Machine Learning, PMLR, vol. 119, 6348–6359.","ama":"Locatello F, Poole B, Rätsch G, Schölkopf B, Bachem O, Tschannen M. Weakly-supervised disentanglement without compromises. In: <i>Proceedings of the 37th International Conference on Machine Learning</i>. Vol 119. ; 2020:6348–6359.","apa":"Locatello, F., Poole, B., Rätsch, G., Schölkopf, B., Bachem, O., &#38; Tschannen, M. (2020). Weakly-supervised disentanglement without compromises. In <i>Proceedings of the 37th International Conference on Machine Learning</i> (Vol. 119, pp. 6348–6359). Virtual.","short":"F. Locatello, B. Poole, G. Rätsch, B. Schölkopf, O. Bachem, M. Tschannen, in:, Proceedings of the 37th International Conference on Machine Learning, 2020, pp. 6348–6359.","ieee":"F. Locatello, B. Poole, G. Rätsch, B. Schölkopf, O. Bachem, and M. Tschannen, “Weakly-supervised disentanglement without compromises,” in <i>Proceedings of the 37th International Conference on Machine Learning</i>, Virtual, 2020, vol. 119, pp. 6348–6359.","mla":"Locatello, Francesco, et al. “Weakly-Supervised Disentanglement without Compromises.” <i>Proceedings of the 37th International Conference on Machine Learning</i>, vol. 119, 2020, pp. 6348–6359."}},{"publication":"Journal of Machine Learning Research","date_updated":"2024-10-14T12:28:26Z","scopus_import":"1","status":"public","has_accepted_license":"1","day":"01","citation":{"apa":"Locatello, F., Bauer, S., Lucic, M., Rätsch, G., Gelly, S., Schölkopf, B., &#38; Bachem, O. (2020). A sober look at the unsupervised learning of disentangled representations and their evaluation. <i>Journal of Machine Learning Research</i>. MIT Press.","short":"F. Locatello, S. Bauer, M. Lucic, G. Rätsch, S. Gelly, B. Schölkopf, O. Bachem, Journal of Machine Learning Research 21 (2020).","ieee":"F. Locatello <i>et al.</i>, “A sober look at the unsupervised learning of disentangled representations and their evaluation,” <i>Journal of Machine Learning Research</i>, vol. 21. MIT Press, 2020.","mla":"Locatello, Francesco, et al. “A Sober Look at the Unsupervised Learning of Disentangled Representations and Their Evaluation.” <i>Journal of Machine Learning Research</i>, vol. 21, 209, MIT Press, 2020.","chicago":"Locatello, Francesco, Stefan Bauer, Mario Lucic, Gunnar Rätsch, Sylvain Gelly, Bernhard Schölkopf, and Olivier Bachem. “A Sober Look at the Unsupervised Learning of Disentangled Representations and Their Evaluation.” <i>Journal of Machine Learning Research</i>. MIT Press, 2020.","ista":"Locatello F, Bauer S, Lucic M, Rätsch G, Gelly S, Schölkopf B, Bachem O. 2020. A sober look at the unsupervised learning of disentangled representations and their evaluation. Journal of Machine Learning Research. 21, 209.","ama":"Locatello F, Bauer S, Lucic M, et al. A sober look at the unsupervised learning of disentangled representations and their evaluation. <i>Journal of Machine Learning Research</i>. 2020;21."},"language":[{"iso":"eng"}],"_id":"14195","abstract":[{"lang":"eng","text":"The idea behind the unsupervised learning of disentangled representations is that real-world data is generated by a few explanatory factors of variation which can be recovered by unsupervised learning algorithms. In this paper, we provide a sober look at recent progress in the field and challenge some common assumptions. We first theoretically show that the unsupervised learning of disentangled representations is fundamentally impossible without inductive biases on both the models and the data. Then, we train over 14000\r\n models covering most prominent methods and evaluation metrics in a reproducible large-scale experimental study on eight data sets. We observe that while the different methods successfully enforce properties “encouraged” by the corresponding losses, well-disentangled models seemingly cannot be identified without supervision. Furthermore, different evaluation metrics do not always agree on what should be considered “disentangled” and exhibit systematic differences in the estimation. Finally, increased disentanglement does not seem to necessarily lead to a decreased sample complexity of learning for downstream tasks. Our results suggest that future work on disentanglement learning should be explicit about the role of inductive biases and (implicit) supervision, investigate concrete benefits of enforcing disentanglement of the learned representations, and consider a reproducible experimental setup covering several data sets."}],"external_id":{"arxiv":["2010.14766"]},"type":"journal_article","quality_controlled":"1","date_published":"2020-09-01T00:00:00Z","main_file_link":[{"url":"https://jmlr.csail.mit.edu/papers/v21/19-976.html","open_access":"1"}],"article_number":"209","intvolume":"        21","year":"2020","ddc":["000"],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","author":[{"first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco","last_name":"Locatello"},{"full_name":"Bauer, Stefan","last_name":"Bauer","first_name":"Stefan"},{"first_name":"Mario","full_name":"Lucic, Mario","last_name":"Lucic"},{"first_name":"Gunnar","last_name":"Rätsch","full_name":"Rätsch, Gunnar"},{"first_name":"Sylvain","last_name":"Gelly","full_name":"Gelly, Sylvain"},{"first_name":"Bernhard","full_name":"Schölkopf, Bernhard","last_name":"Schölkopf"},{"first_name":"Olivier","full_name":"Bachem, Olivier","last_name":"Bachem"}],"volume":21,"article_type":"original","month":"09","department":[{"_id":"FrLo"}],"oa":1,"extern":"1","tmp":{"short":"CC BY (4.0)","image":"/images/cc_by.png","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)"},"article_processing_charge":"No","publisher":"MIT Press","publication_status":"published","title":"A sober look at the unsupervised learning of disentangled representations and their evaluation","date_created":"2023-08-22T14:10:34Z","oa_version":"Published Version","arxiv":1},{"page":"11525-11538","status":"public","date_updated":"2025-07-10T11:50:47Z","publication":"34th International Conference on Neural Information Processing Systems","date_published":"2020-12-20T00:00:00Z","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2006.15055","open_access":"1"}],"intvolume":"        33","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2020","citation":{"ama":"Locatello F, Weissenborn D, Unterthiner T, et al. Object-centric learning with slot attention. In: <i>34th International Conference on Neural Information Processing Systems</i>. Vol 33. Neural Information Processing Systems Foundation; 2020:11525-11538.","ista":"Locatello F, Weissenborn D, Unterthiner T, Mahendran A, Heigold G, Uszkoreit J, Dosovitskiy A, Kipf T. 2020. Object-centric learning with slot attention. 34th International Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 33, 11525–11538.","chicago":"Locatello, Francesco, Dirk Weissenborn, Thomas Unterthiner, Aravindh Mahendran, Georg Heigold, Jakob Uszkoreit, Alexey Dosovitskiy, and Thomas Kipf. “Object-Centric Learning with Slot Attention.” In <i>34th International Conference on Neural Information Processing Systems</i>, 33:11525–38. Neural Information Processing Systems Foundation, 2020.","mla":"Locatello, Francesco, et al. “Object-Centric Learning with Slot Attention.” <i>34th International Conference on Neural Information Processing Systems</i>, vol. 33, Neural Information Processing Systems Foundation, 2020, pp. 11525–38.","short":"F. Locatello, D. Weissenborn, T. Unterthiner, A. Mahendran, G. Heigold, J. Uszkoreit, A. Dosovitskiy, T. Kipf, in:, 34th International Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2020, pp. 11525–11538.","ieee":"F. Locatello <i>et al.</i>, “Object-centric learning with slot attention,” in <i>34th International Conference on Neural Information Processing Systems</i>, Virtual, 2020, vol. 33, pp. 11525–11538.","apa":"Locatello, F., Weissenborn, D., Unterthiner, T., Mahendran, A., Heigold, G., Uszkoreit, J., … Kipf, T. (2020). Object-centric learning with slot attention. In <i>34th International Conference on Neural Information Processing Systems</i> (Vol. 33, pp. 11525–11538). Virtual: Neural Information Processing Systems Foundation."},"day":"20","external_id":{"arxiv":["2006.15055"]},"_id":"14326","language":[{"iso":"eng"}],"abstract":[{"lang":"eng","text":"Learning object-centric representations of complex scenes is a promising step towards enabling efficient abstract reasoning from low-level perceptual features. Yet, most deep learning approaches learn distributed representations that do not capture the compositional properties of natural scenes. In this paper, we present the Slot Attention module, an architectural component that interfaces with perceptual representations such as the output of a convolutional neural network and produces a set of task-dependent abstract representations which we call slots. These slots are exchangeable and can bind to any object in the input by specializing through a competitive procedure over multiple rounds of attention. We empirically demonstrate that Slot Attention can extract object-centric representations that enable generalization to unseen compositions when trained on unsupervised object discovery and supervised property prediction tasks.\r\n\r\n"}],"quality_controlled":"1","type":"conference","extern":"1","article_processing_charge":"No","publisher":"Neural Information Processing Systems Foundation","volume":33,"author":[{"last_name":"Locatello","full_name":"Locatello, Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","orcid":"0000-0002-4850-0683"},{"first_name":"Dirk","full_name":"Weissenborn, Dirk","last_name":"Weissenborn"},{"first_name":"Thomas","last_name":"Unterthiner","full_name":"Unterthiner, Thomas"},{"full_name":"Mahendran, Aravindh","last_name":"Mahendran","first_name":"Aravindh"},{"full_name":"Heigold, Georg","last_name":"Heigold","first_name":"Georg"},{"full_name":"Uszkoreit, Jakob","last_name":"Uszkoreit","first_name":"Jakob"},{"last_name":"Dosovitskiy","full_name":"Dosovitskiy, Alexey","first_name":"Alexey"},{"last_name":"Kipf","full_name":"Kipf, Thomas","first_name":"Thomas"}],"month":"12","department":[{"_id":"FrLo"}],"oa":1,"alternative_title":["Advances in Neural Information Processing Systems"],"title":"Object-centric learning with slot attention","date_created":"2023-09-13T12:03:46Z","oa_version":"Preprint","arxiv":1,"publication_identifier":{"eissn":["1049-5258"],"isbn":["9781713829546"]},"publication_status":"published","conference":{"start_date":"2020-12-06","name":"NeurIPS: Neural Information Processing Systems","location":"Virtual","end_date":"2020-12-12"}},{"oa":1,"department":[{"_id":"FrLo"}],"month":"12","publication":"8th International Conference on Learning Representations","date_updated":"2023-09-12T07:01:34Z","author":[{"last_name":"Locatello","full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4"},{"first_name":"Michael","last_name":"Tschannen","full_name":"Tschannen, Michael"},{"last_name":"Bauer","full_name":"Bauer, Stefan","first_name":"Stefan"},{"last_name":"Rätsch","full_name":"Rätsch, Gunnar","first_name":"Gunnar"},{"first_name":"Bernhard","last_name":"Schölkopf","full_name":"Schölkopf, Bernhard"},{"full_name":"Bachem, Olivier","last_name":"Bachem","first_name":"Olivier"}],"status":"public","article_processing_charge":"No","scopus_import":"1","extern":"1","type":"conference","conference":{"location":"Virtual","end_date":"2020-05-01","start_date":"2020-04-26","name":"ICLR: International Conference on Learning Representations"},"publication_status":"published","quality_controlled":"1","language":[{"iso":"eng"}],"_id":"14184","abstract":[{"lang":"eng","text":"Learning disentangled representations is considered a cornerstone problem in\r\nrepresentation learning. Recently, Locatello et al. (2019) demonstrated that\r\nunsupervised disentanglement learning without inductive biases is theoretically\r\nimpossible and that existing inductive biases and unsupervised methods do not\r\nallow to consistently learn disentangled representations. However, in many\r\npractical settings, one might have access to a limited amount of supervision,\r\nfor example through manual labeling of (some) factors of variation in a few\r\ntraining examples. In this paper, we investigate the impact of such supervision\r\non state-of-the-art disentanglement methods and perform a large scale study,\r\ntraining over 52000 models under well-defined and reproducible experimental\r\nconditions. We observe that a small number of labeled examples (0.01--0.5\\% of\r\nthe data set), with potentially imprecise and incomplete labels, is sufficient\r\nto perform model selection on state-of-the-art unsupervised models. Further, we\r\ninvestigate the benefit of incorporating supervision into the training process.\r\nOverall, we empirically validate that with little and imprecise supervision it\r\nis possible to reliably learn disentangled representations."}],"external_id":{"arxiv":["1905.01258"]},"day":"20","citation":{"apa":"Locatello, F., Tschannen, M., Bauer, S., Rätsch, G., Schölkopf, B., &#38; Bachem, O. (2019). Disentangling factors of variation using few labels. In <i>8th International Conference on Learning Representations</i>. Virtual.","short":"F. Locatello, M. Tschannen, S. Bauer, G. Rätsch, B. Schölkopf, O. Bachem, in:, 8th International Conference on Learning Representations, 2019.","ieee":"F. Locatello, M. Tschannen, S. Bauer, G. Rätsch, B. Schölkopf, and O. Bachem, “Disentangling factors of variation using few labels,” in <i>8th International Conference on Learning Representations</i>, Virtual, 2019.","mla":"Locatello, Francesco, et al. “Disentangling Factors of Variation Using Few Labels.” <i>8th International Conference on Learning Representations</i>, 2019.","chicago":"Locatello, Francesco, Michael Tschannen, Stefan Bauer, Gunnar Rätsch, Bernhard Schölkopf, and Olivier Bachem. “Disentangling Factors of Variation Using Few Labels.” In <i>8th International Conference on Learning Representations</i>, 2019.","ista":"Locatello F, Tschannen M, Bauer S, Rätsch G, Schölkopf B, Bachem O. 2019. Disentangling factors of variation using few labels. 8th International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","ama":"Locatello F, Tschannen M, Bauer S, Rätsch G, Schölkopf B, Bachem O. Disentangling factors of variation using few labels. In: <i>8th International Conference on Learning Representations</i>. ; 2019."},"year":"2019","arxiv":1,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Preprint","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1905.01258"}],"date_published":"2019-12-20T00:00:00Z","date_created":"2023-08-22T14:06:37Z","title":"Disentangling factors of variation using few labels"},{"scopus_import":"1","status":"public","page":"217-227","date_updated":"2023-09-12T08:07:38Z","publication":"Proceedings of the 35th Conference on Uncertainty in Artificial  Intelligence","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1905.06642"}],"date_published":"2019-05-16T00:00:00Z","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2019","intvolume":"       115","day":"16","citation":{"ieee":"L. Gresele, P. K. Rubenstein, A. Mehrjou, F. Locatello, and B. Schölkopf, “The incomplete Rosetta Stone problem: Identifiability results for multi-view nonlinear ICA,” in <i>Proceedings of the 35th Conference on Uncertainty in Artificial  Intelligence</i>, Tel Aviv, Israel, 2019, vol. 115, pp. 217–227.","short":"L. Gresele, P.K. Rubenstein, A. Mehrjou, F. Locatello, B. Schölkopf, in:, Proceedings of the 35th Conference on Uncertainty in Artificial  Intelligence, ML Research Press, 2019, pp. 217–227.","mla":"Gresele, Luigi, et al. “The Incomplete Rosetta Stone Problem: Identifiability Results for Multi-View Nonlinear ICA.” <i>Proceedings of the 35th Conference on Uncertainty in Artificial  Intelligence</i>, vol. 115, ML Research Press, 2019, pp. 217–27.","apa":"Gresele, L., Rubenstein, P. K., Mehrjou, A., Locatello, F., &#38; Schölkopf, B. (2019). The incomplete Rosetta Stone problem: Identifiability results for multi-view nonlinear ICA. In <i>Proceedings of the 35th Conference on Uncertainty in Artificial  Intelligence</i> (Vol. 115, pp. 217–227). Tel Aviv, Israel: ML Research Press.","ista":"Gresele L, Rubenstein PK, Mehrjou A, Locatello F, Schölkopf B. 2019. The incomplete Rosetta Stone problem: Identifiability results for multi-view nonlinear ICA. Proceedings of the 35th Conference on Uncertainty in Artificial  Intelligence. UAI: Uncertainty in Artificial Intelligence, PMLR, vol. 115, 217–227.","ama":"Gresele L, Rubenstein PK, Mehrjou A, Locatello F, Schölkopf B. The incomplete Rosetta Stone problem: Identifiability results for multi-view nonlinear ICA. In: <i>Proceedings of the 35th Conference on Uncertainty in Artificial  Intelligence</i>. Vol 115. ML Research Press; 2019:217-227.","chicago":"Gresele, Luigi, Paul K. Rubenstein, Arash Mehrjou, Francesco Locatello, and Bernhard Schölkopf. “The Incomplete Rosetta Stone Problem: Identifiability Results for Multi-View Nonlinear ICA.” In <i>Proceedings of the 35th Conference on Uncertainty in Artificial  Intelligence</i>, 115:217–27. ML Research Press, 2019."},"quality_controlled":"1","type":"conference","external_id":{"arxiv":["1905.06642"]},"_id":"14189","language":[{"iso":"eng"}],"abstract":[{"text":"We consider the problem of recovering a common latent source with independent\r\ncomponents from multiple views. This applies to settings in which a variable is\r\nmeasured with multiple experimental modalities, and where the goal is to\r\nsynthesize the disparate measurements into a single unified representation. We\r\nconsider the case that the observed views are a nonlinear mixing of\r\ncomponent-wise corruptions of the sources. When the views are considered\r\nseparately, this reduces to nonlinear Independent Component Analysis (ICA) for\r\nwhich it is provably impossible to undo the mixing. We present novel\r\nidentifiability proofs that this is possible when the multiple views are\r\nconsidered jointly, showing that the mixing can theoretically be undone using\r\nfunction approximators such as deep neural networks. In contrast to known\r\nidentifiability results for nonlinear ICA, we prove that independent latent\r\nsources with arbitrary mixing can be recovered as long as multiple,\r\nsufficiently different noisy views are available.","lang":"eng"}],"extern":"1","publisher":"ML Research Press","article_processing_charge":"No","month":"05","department":[{"_id":"FrLo"}],"volume":115,"author":[{"full_name":"Gresele, Luigi","last_name":"Gresele","first_name":"Luigi"},{"full_name":"Rubenstein, Paul K.","last_name":"Rubenstein","first_name":"Paul K."},{"last_name":"Mehrjou","full_name":"Mehrjou, Arash","first_name":"Arash"},{"full_name":"Locatello, Francesco","last_name":"Locatello","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683"},{"last_name":"Schölkopf","full_name":"Schölkopf, Bernhard","first_name":"Bernhard"}],"oa":1,"oa_version":"Preprint","title":"The incomplete Rosetta Stone problem: Identifiability results for multi-view nonlinear ICA","alternative_title":["PMLR"],"date_created":"2023-08-22T14:08:35Z","arxiv":1,"conference":{"location":"Tel Aviv, Israel","end_date":"2019-07-25","start_date":"2019-07-22","name":"UAI: Uncertainty in Artificial Intelligence"},"publication_status":"published"},{"status":"public","article_processing_charge":"No","extern":"1","oa":1,"department":[{"_id":"FrLo"}],"month":"06","author":[{"full_name":"Gondal, Muhammad Waleed","last_name":"Gondal","first_name":"Muhammad Waleed"},{"first_name":"Manuel","last_name":"Wüthrich","full_name":"Wüthrich, Manuel"},{"full_name":"Miladinović, Đorđe","last_name":"Miladinović","first_name":"Đorđe"},{"id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","orcid":"0000-0002-4850-0683","last_name":"Locatello","full_name":"Locatello, Francesco"},{"last_name":"Breidt","full_name":"Breidt, Martin","first_name":"Martin"},{"first_name":"Valentin","full_name":"Volchkov, Valentin","last_name":"Volchkov"},{"first_name":"Joel","full_name":"Akpo, Joel","last_name":"Akpo"},{"first_name":"Olivier","last_name":"Bachem","full_name":"Bachem, Olivier"},{"full_name":"Schölkopf, Bernhard","last_name":"Schölkopf","first_name":"Bernhard"},{"full_name":"Bauer, Stefan","last_name":"Bauer","first_name":"Stefan"}],"volume":32,"date_updated":"2023-09-13T09:46:38Z","publication":"Advances in Neural Information Processing Systems","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","arxiv":1,"publication_identifier":{"isbn":["9781713807933"]},"year":"2019","intvolume":"        32","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1906.03292"}],"oa_version":"Preprint","date_created":"2023-08-22T14:09:13Z","title":"On the transfer of inductive bias from simulation to the real world: a new disentanglement dataset","date_published":"2019-06-07T00:00:00Z","publication_status":"published","conference":{"name":"NeurIPS: Neural Information Processing Systems","start_date":"2019-12-08","end_date":"2019-12-14","location":"Vancouver, Canada"},"quality_controlled":"1","type":"conference","external_id":{"arxiv":["1906.03292"]},"_id":"14190","language":[{"iso":"eng"}],"abstract":[{"lang":"eng","text":"Learning meaningful and compact representations with disentangled semantic\r\naspects is considered to be of key importance in representation learning. Since\r\nreal-world data is notoriously costly to collect, many recent state-of-the-art\r\ndisentanglement models have heavily relied on synthetic toy data-sets. In this\r\npaper, we propose a novel data-set which consists of over one million images of\r\nphysical 3D objects with seven factors of variation, such as object color,\r\nshape, size and position. In order to be able to control all the factors of\r\nvariation precisely, we built an experimental platform where the objects are\r\nbeing moved by a robotic arm. In addition, we provide two more datasets which\r\nconsist of simulations of the experimental setup. These datasets provide for\r\nthe first time the possibility to systematically investigate how well different\r\ndisentanglement methods perform on real data in comparison to simulation, and\r\nhow simulated data can be leveraged to build better representations of the real\r\nworld. We provide a first experimental study of these questions and our results\r\nindicate that learned models transfer poorly, but that model and hyperparameter\r\nselection is an effective means of transferring information to the real world."}],"day":"07","citation":{"chicago":"Gondal, Muhammad Waleed, Manuel Wüthrich, Đorđe Miladinović, Francesco Locatello, Martin Breidt, Valentin Volchkov, Joel Akpo, Olivier Bachem, Bernhard Schölkopf, and Stefan Bauer. “On the Transfer of Inductive Bias from Simulation to the Real World: A New Disentanglement Dataset.” In <i>Advances in Neural Information Processing Systems</i>, Vol. 32, 2019.","ista":"Gondal MW, Wüthrich M, Miladinović Đ, Locatello F, Breidt M, Volchkov V, Akpo J, Bachem O, Schölkopf B, Bauer S. 2019. On the transfer of inductive bias from simulation to the real world: a new disentanglement dataset. Advances in Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 32.","ama":"Gondal MW, Wüthrich M, Miladinović Đ, et al. On the transfer of inductive bias from simulation to the real world: a new disentanglement dataset. In: <i>Advances in Neural Information Processing Systems</i>. Vol 32. ; 2019.","apa":"Gondal, M. W., Wüthrich, M., Miladinović, Đ., Locatello, F., Breidt, M., Volchkov, V., … Bauer, S. (2019). On the transfer of inductive bias from simulation to the real world: a new disentanglement dataset. In <i>Advances in Neural Information Processing Systems</i> (Vol. 32). Vancouver, Canada.","ieee":"M. W. Gondal <i>et al.</i>, “On the transfer of inductive bias from simulation to the real world: a new disentanglement dataset,” in <i>Advances in Neural Information Processing Systems</i>, Vancouver, Canada, 2019, vol. 32.","short":"M.W. Gondal, M. Wüthrich, Đ. Miladinović, F. Locatello, M. Breidt, V. Volchkov, J. Akpo, O. Bachem, B. Schölkopf, S. Bauer, in:, Advances in Neural Information Processing Systems, 2019.","mla":"Gondal, Muhammad Waleed, et al. “On the Transfer of Inductive Bias from Simulation to the Real World: A New Disentanglement Dataset.” <i>Advances in Neural Information Processing Systems</i>, vol. 32, 2019."}},{"conference":{"end_date":"2019-12-14","location":"Vancouver, Canada","name":"NeurIPS: Neural Information Processing Systems","start_date":"2019-12-08"},"publication_status":"published","arxiv":1,"publication_identifier":{"isbn":["9781713807933"]},"date_created":"2023-08-22T14:09:35Z","title":"Stochastic Frank-Wolfe for composite convex minimization","oa_version":"Preprint","oa":1,"author":[{"last_name":"Locatello","full_name":"Locatello, Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","orcid":"0000-0002-4850-0683"},{"last_name":"Yurtsever","full_name":"Yurtsever, Alp","first_name":"Alp"},{"first_name":"Olivier","last_name":"Fercoq","full_name":"Fercoq, Olivier"},{"first_name":"Volkan","last_name":"Cevher","full_name":"Cevher, Volkan"}],"volume":32,"month":"12","department":[{"_id":"FrLo"}],"article_processing_charge":"No","extern":"1","language":[{"iso":"eng"}],"_id":"14191","abstract":[{"lang":"eng","text":"A broad class of convex optimization problems can be formulated as a semidefinite program (SDP), minimization of a convex function over the positive-semidefinite cone subject to some affine constraints. The majority of classical SDP solvers are designed for the deterministic setting where problem data is readily available. In this setting, generalized conditional gradient methods (aka Frank-Wolfe-type methods) provide scalable solutions by leveraging the so-called linear minimization oracle instead of the projection onto the semidefinite cone. Most problems in machine learning and modern engineering applications, however, contain some degree of stochasticity. In this work, we propose the first conditional-gradient-type method for solving stochastic optimization problems under affine constraints. Our method guarantees O(k−1/3) convergence rate in expectation on the objective residual and O(k−5/12) on the feasibility gap."}],"external_id":{"arxiv":["1901.10348"]},"type":"conference","quality_controlled":"1","day":"29","citation":{"apa":"Locatello, F., Yurtsever, A., Fercoq, O., &#38; Cevher, V. (2019). Stochastic Frank-Wolfe for composite convex minimization. In <i>Advances in Neural Information Processing Systems</i> (Vol. 32, pp. 14291–14301). Vancouver, Canada.","mla":"Locatello, Francesco, et al. “Stochastic Frank-Wolfe for Composite Convex Minimization.” <i>Advances in Neural Information Processing Systems</i>, vol. 32, 2019, pp. 14291–14301.","short":"F. Locatello, A. Yurtsever, O. Fercoq, V. Cevher, in:, Advances in Neural Information Processing Systems, 2019, pp. 14291–14301.","ieee":"F. Locatello, A. Yurtsever, O. Fercoq, and V. Cevher, “Stochastic Frank-Wolfe for composite convex minimization,” in <i>Advances in Neural Information Processing Systems</i>, Vancouver, Canada, 2019, vol. 32, pp. 14291–14301.","chicago":"Locatello, Francesco, Alp Yurtsever, Olivier Fercoq, and Volkan Cevher. “Stochastic Frank-Wolfe for Composite Convex Minimization.” In <i>Advances in Neural Information Processing Systems</i>, 32:14291–14301, 2019.","ama":"Locatello F, Yurtsever A, Fercoq O, Cevher V. Stochastic Frank-Wolfe for composite convex minimization. In: <i>Advances in Neural Information Processing Systems</i>. Vol 32. ; 2019:14291–14301.","ista":"Locatello F, Yurtsever A, Fercoq O, Cevher V. 2019. Stochastic Frank-Wolfe for composite convex minimization. Advances in Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 32, 14291–14301."},"intvolume":"        32","year":"2019","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","date_published":"2019-12-29T00:00:00Z","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1901.10348"}],"publication":"Advances in Neural Information Processing Systems","date_updated":"2023-09-12T08:48:45Z","page":"14291–14301","status":"public","scopus_import":"1"},{"extern":"1","article_processing_charge":"No","status":"public","date_updated":"2024-10-14T12:28:15Z","publication":"Advances in Neural Information Processing Systems","author":[{"last_name":"Steenkiste","full_name":"Steenkiste, Sjoerd van","first_name":"Sjoerd van"},{"orcid":"0000-0002-4850-0683","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco","last_name":"Locatello"},{"first_name":"Jürgen","full_name":"Schmidhuber, Jürgen","last_name":"Schmidhuber"},{"last_name":"Bachem","full_name":"Bachem, Olivier","first_name":"Olivier"}],"volume":32,"department":[{"_id":"FrLo"}],"month":"05","oa":1,"date_published":"2019-05-29T00:00:00Z","title":"Are disentangled representations helpful for abstract visual reasoning?","date_created":"2023-08-22T14:09:53Z","oa_version":"Preprint","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.1905.12506"}],"intvolume":"        32","publication_identifier":{"isbn":["9781713807933"]},"arxiv":1,"year":"2019","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","day":"29","citation":{"apa":"Steenkiste, S. van, Locatello, F., Schmidhuber, J., &#38; Bachem, O. (2019). Are disentangled representations helpful for abstract visual reasoning? In <i>Advances in Neural Information Processing Systems</i> (Vol. 32). Vancouver, Canada.","mla":"Steenkiste, Sjoerd van, et al. “Are Disentangled Representations Helpful for Abstract Visual Reasoning?” <i>Advances in Neural Information Processing Systems</i>, vol. 32, 2019.","short":"S. van Steenkiste, F. Locatello, J. Schmidhuber, O. Bachem, in:, Advances in Neural Information Processing Systems, 2019.","ieee":"S. van Steenkiste, F. Locatello, J. Schmidhuber, and O. Bachem, “Are disentangled representations helpful for abstract visual reasoning?,” in <i>Advances in Neural Information Processing Systems</i>, Vancouver, Canada, 2019, vol. 32.","chicago":"Steenkiste, Sjoerd van, Francesco Locatello, Jürgen Schmidhuber, and Olivier Bachem. “Are Disentangled Representations Helpful for Abstract Visual Reasoning?” In <i>Advances in Neural Information Processing Systems</i>, Vol. 32, 2019.","ama":"Steenkiste S van, Locatello F, Schmidhuber J, Bachem O. Are disentangled representations helpful for abstract visual reasoning? In: <i>Advances in Neural Information Processing Systems</i>. Vol 32. ; 2019.","ista":"Steenkiste S van, Locatello F, Schmidhuber J, Bachem O. 2019. Are disentangled representations helpful for abstract visual reasoning? Advances in Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 32."},"abstract":[{"text":"A disentangled representation encodes information about the salient factors\r\nof variation in the data independently. Although it is often argued that this\r\nrepresentational format is useful in learning to solve many real-world\r\ndown-stream tasks, there is little empirical evidence that supports this claim.\r\nIn this paper, we conduct a large-scale study that investigates whether\r\ndisentangled representations are more suitable for abstract reasoning tasks.\r\nUsing two new tasks similar to Raven's Progressive Matrices, we evaluate the\r\nusefulness of the representations learned by 360 state-of-the-art unsupervised\r\ndisentanglement models. Based on these representations, we train 3600 abstract\r\nreasoning models and observe that disentangled representations do in fact lead\r\nto better down-stream performance. In particular, they enable quicker learning\r\nusing fewer samples.","lang":"eng"}],"_id":"14193","language":[{"iso":"eng"}],"external_id":{"arxiv":["1905.12506"]},"type":"conference","quality_controlled":"1","publication_status":"published","conference":{"name":"NeurIPS: Neural Information Processing Systems","start_date":"2019-12-08","end_date":"2019-12-14","location":"Vancouver, Canada"}},{"arxiv":1,"publication_identifier":{"isbn":["9781713807933"]},"oa_version":"Preprint","title":"On the fairness of disentangled representations","date_created":"2023-08-22T14:12:28Z","conference":{"name":"NeurIPS: Neural Information Processing Systems","start_date":"2019-12-08","end_date":"2019-12-14","location":"Vancouver, Canada"},"publication_status":"published","article_processing_charge":"No","extern":"1","oa":1,"month":"12","department":[{"_id":"FrLo"}],"volume":32,"author":[{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","last_name":"Locatello","full_name":"Locatello, Francesco"},{"first_name":"Gabriele","full_name":"Abbati, Gabriele","last_name":"Abbati"},{"last_name":"Rainforth","full_name":"Rainforth, Tom","first_name":"Tom"},{"first_name":"Stefan","full_name":"Bauer, Stefan","last_name":"Bauer"},{"full_name":"Schölkopf, Bernhard","last_name":"Schölkopf","first_name":"Bernhard"},{"first_name":"Olivier","full_name":"Bachem, Olivier","last_name":"Bachem"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2019","intvolume":"        32","main_file_link":[{"url":"https://arxiv.org/abs/1905.13662","open_access":"1"}],"date_published":"2019-12-08T00:00:00Z","quality_controlled":"1","type":"conference","external_id":{"arxiv":["1905.13662"]},"_id":"14197","language":[{"iso":"eng"}],"abstract":[{"lang":"eng","text":"Recently there has been a significant interest in learning disentangled\r\nrepresentations, as they promise increased interpretability, generalization to\r\nunseen scenarios and faster learning on downstream tasks. In this paper, we\r\ninvestigate the usefulness of different notions of disentanglement for\r\nimproving the fairness of downstream prediction tasks based on representations.\r\nWe consider the setting where the goal is to predict a target variable based on\r\nthe learned representation of high-dimensional observations (such as images)\r\nthat depend on both the target variable and an \\emph{unobserved} sensitive\r\nvariable. We show that in this setting both the optimal and empirical\r\npredictions can be unfair, even if the target variable and the sensitive\r\nvariable are independent. Analyzing the representations of more than\r\n\\num{12600} trained state-of-the-art disentangled models, we observe that\r\nseveral disentanglement scores are consistently correlated with increased\r\nfairness, suggesting that disentanglement may be a useful property to encourage\r\nfairness when sensitive variables are not observed."}],"citation":{"apa":"Locatello, F., Abbati, G., Rainforth, T., Bauer, S., Schölkopf, B., &#38; Bachem, O. (2019). On the fairness of disentangled representations. In <i>Advances in Neural Information Processing Systems</i> (Vol. 32, pp. 14611–14624). Vancouver, Canada.","mla":"Locatello, Francesco, et al. “On the Fairness of Disentangled Representations.” <i>Advances in Neural Information Processing Systems</i>, vol. 32, 2019, pp. 14611–14624.","ieee":"F. Locatello, G. Abbati, T. Rainforth, S. Bauer, B. Schölkopf, and O. Bachem, “On the fairness of disentangled representations,” in <i>Advances in Neural Information Processing Systems</i>, Vancouver, Canada, 2019, vol. 32, pp. 14611–14624.","short":"F. Locatello, G. Abbati, T. Rainforth, S. Bauer, B. Schölkopf, O. Bachem, in:, Advances in Neural Information Processing Systems, 2019, pp. 14611–14624.","chicago":"Locatello, Francesco, Gabriele Abbati, Tom Rainforth, Stefan Bauer, Bernhard Schölkopf, and Olivier Bachem. “On the Fairness of Disentangled Representations.” In <i>Advances in Neural Information Processing Systems</i>, 32:14611–14624, 2019.","ama":"Locatello F, Abbati G, Rainforth T, Bauer S, Schölkopf B, Bachem O. On the fairness of disentangled representations. In: <i>Advances in Neural Information Processing Systems</i>. Vol 32. ; 2019:14611–14624.","ista":"Locatello F, Abbati G, Rainforth T, Bauer S, Schölkopf B, Bachem O. 2019. On the fairness of disentangled representations. Advances in Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 32, 14611–14624."},"day":"08","status":"public","page":"14611–14624","scopus_import":"1","publication":"Advances in Neural Information Processing Systems","date_updated":"2024-10-14T12:29:05Z"},{"date_created":"2023-08-22T14:13:08Z","title":"Challenging common assumptions in the unsupervised learning of disentangled representations","oa_version":"Preprint","arxiv":1,"conference":{"start_date":"2019-06-10","name":"International Conference on Machine Learning","location":"Long Beach, CA, United States","end_date":"2019-06-15"},"publication_status":"published","extern":"1","article_processing_charge":"No","publisher":"ML Research Press","author":[{"last_name":"Locatello","full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco"},{"first_name":"Stefan","full_name":"Bauer, Stefan","last_name":"Bauer"},{"full_name":"Lucic, Mario","last_name":"Lucic","first_name":"Mario"},{"first_name":"Gunnar","full_name":"Rätsch, Gunnar","last_name":"Rätsch"},{"first_name":"Sylvain","last_name":"Gelly","full_name":"Gelly, Sylvain"},{"last_name":"Schölkopf","full_name":"Schölkopf, Bernhard","first_name":"Bernhard"},{"last_name":"Bachem","full_name":"Bachem, Olivier","first_name":"Olivier"}],"volume":97,"month":"06","department":[{"_id":"FrLo"}],"oa":1,"date_published":"2019-06-09T00:00:00Z","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1811.12359"}],"intvolume":"        97","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2019","day":"09","citation":{"apa":"Locatello, F., Bauer, S., Lucic, M., Rätsch, G., Gelly, S., Schölkopf, B., &#38; Bachem, O. (2019). Challenging common assumptions in the unsupervised learning of disentangled representations. In <i>Proceedings of the 36th International Conference on Machine Learning</i> (Vol. 97, pp. 4114–4124). Long Beach, CA, United States: ML Research Press.","ieee":"F. Locatello <i>et al.</i>, “Challenging common assumptions in the unsupervised learning of disentangled representations,” in <i>Proceedings of the 36th International Conference on Machine Learning</i>, Long Beach, CA, United States, 2019, vol. 97, pp. 4114–4124.","short":"F. Locatello, S. Bauer, M. Lucic, G. Rätsch, S. Gelly, B. Schölkopf, O. Bachem, in:, Proceedings of the 36th International Conference on Machine Learning, ML Research Press, 2019, pp. 4114–4124.","mla":"Locatello, Francesco, et al. “Challenging Common Assumptions in the Unsupervised Learning of Disentangled Representations.” <i>Proceedings of the 36th International Conference on Machine Learning</i>, vol. 97, ML Research Press, 2019, pp. 4114–24.","chicago":"Locatello, Francesco, Stefan Bauer, Mario Lucic, Gunnar Rätsch, Sylvain Gelly, Bernhard Schölkopf, and Olivier Bachem. “Challenging Common Assumptions in the Unsupervised Learning of Disentangled Representations.” In <i>Proceedings of the 36th International Conference on Machine Learning</i>, 97:4114–24. ML Research Press, 2019.","ista":"Locatello F, Bauer S, Lucic M, Rätsch G, Gelly S, Schölkopf B, Bachem O. 2019. Challenging common assumptions in the unsupervised learning of disentangled representations. Proceedings of the 36th International Conference on Machine Learning. International Conference on Machine Learning vol. 97, 4114–4124.","ama":"Locatello F, Bauer S, Lucic M, et al. Challenging common assumptions in the unsupervised learning of disentangled representations. In: <i>Proceedings of the 36th International Conference on Machine Learning</i>. Vol 97. ML Research Press; 2019:4114-4124."},"external_id":{"arxiv":["1811.12359"]},"abstract":[{"text":"The key idea behind the unsupervised learning of disentangled representations\r\nis that real-world data is generated by a few explanatory factors of variation\r\nwhich can be recovered by unsupervised learning algorithms. In this paper, we\r\nprovide a sober look at recent progress in the field and challenge some common\r\nassumptions. We first theoretically show that the unsupervised learning of\r\ndisentangled representations is fundamentally impossible without inductive\r\nbiases on both the models and the data. Then, we train more than 12000 models\r\ncovering most prominent methods and evaluation metrics in a reproducible\r\nlarge-scale experimental study on seven different data sets. We observe that\r\nwhile the different methods successfully enforce properties ``encouraged'' by\r\nthe corresponding losses, well-disentangled models seemingly cannot be\r\nidentified without supervision. Furthermore, increased disentanglement does not\r\nseem to lead to a decreased sample complexity of learning for downstream tasks.\r\nOur results suggest that future work on disentanglement learning should be\r\nexplicit about the role of inductive biases and (implicit) supervision,\r\ninvestigate concrete benefits of enforcing disentanglement of the learned\r\nrepresentations, and consider a reproducible experimental setup covering\r\nseveral data sets.","lang":"eng"}],"_id":"14200","language":[{"iso":"eng"}],"quality_controlled":"1","type":"conference","scopus_import":"1","page":"4114-4124","status":"public","publication":"Proceedings of the 36th International Conference on Machine Learning","date_updated":"2024-10-14T12:29:16Z"},{"citation":{"ista":"Fortuin V, Hüser M, Locatello F, Strathmann H, Rätsch G. 2018. SOM-VAE: Interpretable discrete representation learning on time series. International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","ama":"Fortuin V, Hüser M, Locatello F, Strathmann H, Rätsch G. SOM-VAE: Interpretable discrete representation learning on time series. In: <i>International Conference on Learning Representations</i>. ; 2018.","chicago":"Fortuin, Vincent, Matthias Hüser, Francesco Locatello, Heiko Strathmann, and Gunnar Rätsch. “SOM-VAE: Interpretable Discrete Representation Learning on Time Series.” In <i>International Conference on Learning Representations</i>, 2018.","short":"V. Fortuin, M. Hüser, F. Locatello, H. Strathmann, G. Rätsch, in:, International Conference on Learning Representations, 2018.","ieee":"V. Fortuin, M. Hüser, F. Locatello, H. Strathmann, and G. Rätsch, “SOM-VAE: Interpretable discrete representation learning on time series,” in <i>International Conference on Learning Representations</i>, New Orleans, LA, United States, 2018.","mla":"Fortuin, Vincent, et al. “SOM-VAE: Interpretable Discrete Representation Learning on Time Series.” <i>International Conference on Learning Representations</i>, 2018.","apa":"Fortuin, V., Hüser, M., Locatello, F., Strathmann, H., &#38; Rätsch, G. (2018). SOM-VAE: Interpretable discrete representation learning on time series. In <i>International Conference on Learning Representations</i>. New Orleans, LA, United States."},"day":"06","conference":{"start_date":"2019-05-06","name":"ICLR: International Conference on Learning Representations","location":"New Orleans, LA, United States","end_date":"2019-05-09"},"quality_controlled":"1","publication_status":"published","type":"conference","external_id":{"arxiv":["1806.02199"]},"_id":"14198","abstract":[{"lang":"eng","text":"High-dimensional time series are common in many domains. Since human\r\ncognition is not optimized to work well in high-dimensional spaces, these areas\r\ncould benefit from interpretable low-dimensional representations. However, most\r\nrepresentation learning algorithms for time series data are difficult to\r\ninterpret. This is due to non-intuitive mappings from data features to salient\r\nproperties of the representation and non-smoothness over time. To address this\r\nproblem, we propose a new representation learning framework building on ideas\r\nfrom interpretable discrete dimensionality reduction and deep generative\r\nmodeling. This framework allows us to learn discrete representations of time\r\nseries, which give rise to smooth and interpretable embeddings with superior\r\nclustering performance. We introduce a new way to overcome the\r\nnon-differentiability in discrete representation learning and present a\r\ngradient-based version of the traditional self-organizing map algorithm that is\r\nmore performant than the original. Furthermore, to allow for a probabilistic\r\ninterpretation of our method, we integrate a Markov model in the representation\r\nspace. This model uncovers the temporal transition structure, improves\r\nclustering performance even further and provides additional explanatory\r\ninsights as well as a natural representation of uncertainty. We evaluate our\r\nmodel in terms of clustering performance and interpretability on static\r\n(Fashion-)MNIST data, a time series of linearly interpolated (Fashion-)MNIST\r\nimages, a chaotic Lorenz attractor system with two macro states, as well as on\r\na challenging real world medical time series application on the eICU data set.\r\nOur learned representations compare favorably with competitor methods and\r\nfacilitate downstream tasks on the real world data."}],"language":[{"iso":"eng"}],"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1806.02199"}],"oa_version":"Preprint","title":"SOM-VAE: Interpretable discrete representation learning on time series","date_created":"2023-08-22T14:12:48Z","date_published":"2018-06-06T00:00:00Z","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2018","arxiv":1,"month":"06","department":[{"_id":"FrLo"}],"author":[{"first_name":"Vincent","last_name":"Fortuin","full_name":"Fortuin, Vincent"},{"first_name":"Matthias","full_name":"Hüser, Matthias","last_name":"Hüser"},{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","last_name":"Locatello","full_name":"Locatello, Francesco"},{"first_name":"Heiko","last_name":"Strathmann","full_name":"Strathmann, Heiko"},{"full_name":"Rätsch, Gunnar","last_name":"Rätsch","first_name":"Gunnar"}],"publication":"International Conference on Learning Representations","date_updated":"2023-09-13T06:35:12Z","oa":1,"extern":"1","status":"public","article_processing_charge":"No"},{"date_updated":"2024-10-14T12:29:27Z","publication":"Proceedings of the 21st International Conference on Artificial Intelligence and Statistics","scopus_import":"1","page":"464-472","status":"public","citation":{"short":"F. Locatello, R. Khanna, J. Ghosh, G. Rätsch, in:, Proceedings of the 21st International Conference on Artificial Intelligence and Statistics, ML Research Press, 2018, pp. 464–472.","ieee":"F. Locatello, R. Khanna, J. Ghosh, and G. Rätsch, “Boosting variational inference: An optimization perspective,” in <i>Proceedings of the 21st International Conference on Artificial Intelligence and Statistics</i>, Playa Blanca, Lanzarote, 2018, vol. 84, pp. 464–472.","mla":"Locatello, Francesco, et al. “Boosting Variational Inference: An Optimization Perspective.” <i>Proceedings of the 21st International Conference on Artificial Intelligence and Statistics</i>, vol. 84, ML Research Press, 2018, pp. 464–72.","apa":"Locatello, F., Khanna, R., Ghosh, J., &#38; Rätsch, G. (2018). Boosting variational inference: An optimization perspective. In <i>Proceedings of the 21st International Conference on Artificial Intelligence and Statistics</i> (Vol. 84, pp. 464–472). Playa Blanca, Lanzarote: ML Research Press.","ista":"Locatello F, Khanna R, Ghosh J, Rätsch G. 2018. Boosting variational inference: An optimization perspective. Proceedings of the 21st International Conference on Artificial Intelligence and Statistics. AISTATS: Conference on Artificial Intelligence and Statistics, PMLR, vol. 84, 464–472.","ama":"Locatello F, Khanna R, Ghosh J, Rätsch G. Boosting variational inference: An optimization perspective. In: <i>Proceedings of the 21st International Conference on Artificial Intelligence and Statistics</i>. Vol 84. ML Research Press; 2018:464-472.","chicago":"Locatello, Francesco, Rajiv Khanna, Joydeep Ghosh, and Gunnar Rätsch. “Boosting Variational Inference: An Optimization Perspective.” In <i>Proceedings of the 21st International Conference on Artificial Intelligence and Statistics</i>, 84:464–72. ML Research Press, 2018."},"day":"15","external_id":{"arxiv":["1708.01733"]},"_id":"14201","language":[{"iso":"eng"}],"abstract":[{"text":"Variational inference is a popular technique to approximate a possibly\r\nintractable Bayesian posterior with a more tractable one. Recently, boosting\r\nvariational inference has been proposed as a new paradigm to approximate the\r\nposterior by a mixture of densities by greedily adding components to the\r\nmixture. However, as is the case with many other variational inference\r\nalgorithms, its theoretical properties have not been studied. In the present\r\nwork, we study the convergence properties of this approach from a modern\r\noptimization viewpoint by establishing connections to the classic Frank-Wolfe\r\nalgorithm. Our analyses yields novel theoretical insights regarding the\r\nsufficient conditions for convergence, explicit rates, and algorithmic\r\nsimplifications. Since a lot of focus in previous works for variational\r\ninference has been on tractability, our work is especially important as a much\r\nneeded attempt to bridge the gap between probabilistic models and their\r\ncorresponding theoretical properties.","lang":"eng"}],"quality_controlled":"1","type":"conference","date_published":"2018-04-15T00:00:00Z","main_file_link":[{"url":"https://arxiv.org/abs/1708.01733","open_access":"1"}],"intvolume":"        84","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2018","author":[{"full_name":"Locatello, Francesco","last_name":"Locatello","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","orcid":"0000-0002-4850-0683"},{"first_name":"Rajiv","full_name":"Khanna, Rajiv","last_name":"Khanna"},{"last_name":"Ghosh","full_name":"Ghosh, Joydeep","first_name":"Joydeep"},{"last_name":"Rätsch","full_name":"Rätsch, Gunnar","first_name":"Gunnar"}],"volume":84,"department":[{"_id":"FrLo"}],"month":"04","oa":1,"extern":"1","article_processing_charge":"No","publisher":"ML Research Press","conference":{"location":"Playa Blanca, Lanzarote","end_date":"2018-04-11","start_date":"2018-04-09","name":"AISTATS: Conference on Artificial Intelligence and Statistics"},"publication_status":"published","date_created":"2023-08-22T14:15:20Z","alternative_title":["PMLR"],"title":"Boosting variational inference: An optimization perspective","oa_version":"Preprint","arxiv":1},{"oa":1,"author":[{"last_name":"Locatello","full_name":"Locatello, Francesco","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683"},{"first_name":"Gideon","full_name":"Dresdner, Gideon","last_name":"Dresdner"},{"first_name":"Rajiv","full_name":"Khanna, Rajiv","last_name":"Khanna"},{"first_name":"Isabel","full_name":"Valera, Isabel","last_name":"Valera"},{"last_name":"Rätsch","full_name":"Rätsch, Gunnar","first_name":"Gunnar"}],"volume":31,"department":[{"_id":"FrLo"}],"month":"06","article_processing_charge":"No","publisher":"Neural Information Processing Systems Foundation","extern":"1","conference":{"start_date":"2018-12-03","name":"NeurIPS: Neural Information Processing Systems","location":"Montreal, Canada","end_date":"2018-12-08"},"publication_status":"published","publication_identifier":{"eissn":["1049-5258"],"isbn":["9781510884472"]},"arxiv":1,"date_created":"2023-08-22T14:15:40Z","title":"Boosting black box variational inference","oa_version":"Preprint","date_updated":"2023-09-13T07:38:24Z","publication":"Advances in Neural Information Processing Systems","status":"public","scopus_import":"1","external_id":{"arxiv":["1806.02185"]},"abstract":[{"lang":"eng","text":"Approximating a probability density in a tractable manner is a central task\r\nin Bayesian statistics. Variational Inference (VI) is a popular technique that\r\nachieves tractability by choosing a relatively simple variational family.\r\nBorrowing ideas from the classic boosting framework, recent approaches attempt\r\nto \\emph{boost} VI by replacing the selection of a single density with a\r\ngreedily constructed mixture of densities. In order to guarantee convergence,\r\nprevious works impose stringent assumptions that require significant effort for\r\npractitioners. Specifically, they require a custom implementation of the greedy\r\nstep (called the LMO) for every probabilistic model with respect to an\r\nunnatural variational family of truncated distributions. Our work fixes these\r\nissues with novel theoretical and algorithmic insights. On the theoretical\r\nside, we show that boosting VI satisfies a relaxed smoothness assumption which\r\nis sufficient for the convergence of the functional Frank-Wolfe (FW) algorithm.\r\nFurthermore, we rephrase the LMO problem and propose to maximize the Residual\r\nELBO (RELBO) which replaces the standard ELBO optimization in VI. These\r\ntheoretical enhancements allow for black box implementation of the boosting\r\nsubroutine. Finally, we present a stopping criterion drawn from the duality gap\r\nin the classic FW analyses and exhaustive experiments to illustrate the\r\nusefulness of our theoretical and algorithmic contributions."}],"_id":"14202","language":[{"iso":"eng"}],"quality_controlled":"1","type":"conference","citation":{"ista":"Locatello F, Dresdner G, Khanna R, Valera I, Rätsch G. 2018. Boosting black box variational inference. Advances in Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 31.","ama":"Locatello F, Dresdner G, Khanna R, Valera I, Rätsch G. Boosting black box variational inference. In: <i>Advances in Neural Information Processing Systems</i>. Vol 31. Neural Information Processing Systems Foundation; 2018.","chicago":"Locatello, Francesco, Gideon Dresdner, Rajiv Khanna, Isabel Valera, and Gunnar Rätsch. “Boosting Black Box Variational Inference.” In <i>Advances in Neural Information Processing Systems</i>, Vol. 31. Neural Information Processing Systems Foundation, 2018.","ieee":"F. Locatello, G. Dresdner, R. Khanna, I. Valera, and G. Rätsch, “Boosting black box variational inference,” in <i>Advances in Neural Information Processing Systems</i>, Montreal, Canada, 2018, vol. 31.","short":"F. Locatello, G. Dresdner, R. Khanna, I. Valera, G. Rätsch, in:, Advances in Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2018.","mla":"Locatello, Francesco, et al. “Boosting Black Box Variational Inference.” <i>Advances in Neural Information Processing Systems</i>, vol. 31, Neural Information Processing Systems Foundation, 2018.","apa":"Locatello, F., Dresdner, G., Khanna, R., Valera, I., &#38; Rätsch, G. (2018). Boosting black box variational inference. In <i>Advances in Neural Information Processing Systems</i> (Vol. 31). Montreal, Canada: Neural Information Processing Systems Foundation."},"day":"06","intvolume":"        31","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2018","date_published":"2018-06-06T00:00:00Z","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1806.02185"}]},{"arxiv":1,"date_created":"2023-08-22T14:16:01Z","title":"A conditional gradient framework for composite convex minimization with applications to semidefinite programming","alternative_title":["PMLR"],"oa_version":"Preprint","conference":{"name":"ICML: International Conference on Machine Learning","start_date":"2018-07-10","end_date":"2018-07-15","location":"Stockholm, Sweden"},"publication_status":"published","article_processing_charge":"No","publisher":"ML Research Press","extern":"1","oa":1,"author":[{"full_name":"Yurtsever, Alp","last_name":"Yurtsever","first_name":"Alp"},{"full_name":"Fercoq, Olivier","last_name":"Fercoq","first_name":"Olivier"},{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","full_name":"Locatello, Francesco","last_name":"Locatello"},{"first_name":"Volkan","last_name":"Cevher","full_name":"Cevher, Volkan"}],"volume":80,"month":"07","department":[{"_id":"FrLo"}],"intvolume":"        80","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2018","date_published":"2018-07-15T00:00:00Z","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1804.08544"}],"external_id":{"arxiv":["1804.08544"]},"_id":"14203","language":[{"iso":"eng"}],"abstract":[{"text":"We propose a conditional gradient framework for a composite convex minimization template with broad applications. Our approach combines smoothing and homotopy techniques under the CGM framework, and provably achieves the optimal O(1/k−−√) convergence rate. We demonstrate that the same rate holds if the linear subproblems are solved approximately with additive or multiplicative error. In contrast with the relevant work, we are able to characterize the convergence when the non-smooth term is an indicator function. Specific applications of our framework include the non-smooth minimization, semidefinite programming, and minimization with linear inclusion constraints over a compact domain. Numerical evidence demonstrates the benefits of our framework.","lang":"eng"}],"quality_controlled":"1","type":"conference","day":"15","citation":{"ieee":"A. Yurtsever, O. Fercoq, F. Locatello, and V. Cevher, “A conditional gradient framework for composite convex minimization with applications to semidefinite programming,” in <i>Proceedings of the 35th International Conference on Machine Learning</i>, Stockholm, Sweden, 2018, vol. 80, pp. 5727–5736.","short":"A. Yurtsever, O. Fercoq, F. Locatello, V. Cevher, in:, Proceedings of the 35th International Conference on Machine Learning, ML Research Press, 2018, pp. 5727–5736.","mla":"Yurtsever, Alp, et al. “A Conditional Gradient Framework for Composite Convex Minimization with Applications to Semidefinite Programming.” <i>Proceedings of the 35th International Conference on Machine Learning</i>, vol. 80, ML Research Press, 2018, pp. 5727–36.","apa":"Yurtsever, A., Fercoq, O., Locatello, F., &#38; Cevher, V. (2018). A conditional gradient framework for composite convex minimization with applications to semidefinite programming. In <i>Proceedings of the 35th International Conference on Machine Learning</i> (Vol. 80, pp. 5727–5736). Stockholm, Sweden: ML Research Press.","ista":"Yurtsever A, Fercoq O, Locatello F, Cevher V. 2018. A conditional gradient framework for composite convex minimization with applications to semidefinite programming. Proceedings of the 35th International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 80, 5727–5736.","ama":"Yurtsever A, Fercoq O, Locatello F, Cevher V. A conditional gradient framework for composite convex minimization with applications to semidefinite programming. In: <i>Proceedings of the 35th International Conference on Machine Learning</i>. Vol 80. ML Research Press; 2018:5727-5736.","chicago":"Yurtsever, Alp, Olivier Fercoq, Francesco Locatello, and Volkan Cevher. “A Conditional Gradient Framework for Composite Convex Minimization with Applications to Semidefinite Programming.” In <i>Proceedings of the 35th International Conference on Machine Learning</i>, 80:5727–36. ML Research Press, 2018."},"page":"5727-5736","status":"public","publication":"Proceedings of the 35th International Conference on Machine Learning","date_updated":"2023-09-13T08:13:39Z"},{"external_id":{"arxiv":["1803.09539"]},"_id":"14204","abstract":[{"lang":"eng","text":"Two popular examples of first-order optimization methods over linear spaces are coordinate descent and matching pursuit algorithms, with their randomized variants. While the former targets the optimization by moving along coordinates, the latter considers a generalized notion of directions. Exploiting the connection between the two algorithms, we present a unified analysis of both, providing affine invariant sublinear O(1/t) rates on smooth objectives and linear convergence on strongly convex objectives. As a byproduct of our affine invariant analysis of matching pursuit, our rates for steepest coordinate descent are the tightest known. Furthermore, we show the first accelerated convergence rate O(1/t2) for matching pursuit and steepest coordinate descent on convex objectives."}],"language":[{"iso":"eng"}],"quality_controlled":"1","type":"conference","citation":{"apa":"Locatello, F., Raj, A., Karimireddy, S. P., Rätsch, G., Schölkopf, B., Stich, S. U., &#38; Jaggi, M. (2018). On matching pursuit and coordinate descent. In <i>Proceedings of the 35th International Conference on Machine Learning</i> (Vol. 80, pp. 3198–3207). ML Research Press.","ieee":"F. Locatello <i>et al.</i>, “On matching pursuit and coordinate descent,” in <i>Proceedings of the 35th International Conference on Machine Learning</i>, 2018, vol. 80, pp. 3198–3207.","short":"F. Locatello, A. Raj, S.P. Karimireddy, G. Rätsch, B. Schölkopf, S.U. Stich, M. Jaggi, in:, Proceedings of the 35th International Conference on Machine Learning, ML Research Press, 2018, pp. 3198–3207.","mla":"Locatello, Francesco, et al. “On Matching Pursuit and Coordinate Descent.” <i>Proceedings of the 35th International Conference on Machine Learning</i>, vol. 80, ML Research Press, 2018, pp. 3198–207.","chicago":"Locatello, Francesco, Anant Raj, Sai Praneeth Karimireddy, Gunnar Rätsch, Bernhard Schölkopf, Sebastian U. Stich, and Martin Jaggi. “On Matching Pursuit and Coordinate Descent.” In <i>Proceedings of the 35th International Conference on Machine Learning</i>, 80:3198–3207. ML Research Press, 2018.","ista":"Locatello F, Raj A, Karimireddy SP, Rätsch G, Schölkopf B, Stich SU, Jaggi M. 2018. On matching pursuit and coordinate descent. Proceedings of the 35th International Conference on Machine Learning. , PMLR, vol. 80, 3198–3207.","ama":"Locatello F, Raj A, Karimireddy SP, et al. On matching pursuit and coordinate descent. In: <i>Proceedings of the 35th International Conference on Machine Learning</i>. Vol 80. ML Research Press; 2018:3198-3207."},"day":"01","intvolume":"        80","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2018","date_published":"2018-07-01T00:00:00Z","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1803.09539"}],"date_updated":"2024-10-14T12:29:40Z","publication":"Proceedings of the 35th International Conference on Machine Learning","status":"public","page":"3198-3207","scopus_import":"1","publication_status":"published","arxiv":1,"title":"On matching pursuit and coordinate descent","date_created":"2023-08-22T14:16:25Z","alternative_title":["PMLR"],"oa_version":"Preprint","oa":1,"author":[{"full_name":"Locatello, Francesco","last_name":"Locatello","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","orcid":"0000-0002-4850-0683"},{"last_name":"Raj","full_name":"Raj, Anant","first_name":"Anant"},{"first_name":"Sai Praneeth","full_name":"Karimireddy, Sai Praneeth","last_name":"Karimireddy"},{"first_name":"Gunnar","full_name":"Rätsch, Gunnar","last_name":"Rätsch"},{"first_name":"Bernhard","full_name":"Schölkopf, Bernhard","last_name":"Schölkopf"},{"first_name":"Sebastian U.","last_name":"Stich","full_name":"Stich, Sebastian U."},{"first_name":"Martin","last_name":"Jaggi","full_name":"Jaggi, Martin"}],"volume":80,"department":[{"_id":"FrLo"}],"month":"07","article_processing_charge":"No","publisher":"ML Research Press","extern":"1"},{"department":[{"_id":"FrLo"}],"month":"05","publication":"6th International Conference on Learning Representations","date_updated":"2024-10-14T12:30:32Z","author":[{"last_name":"Locatello","full_name":"Locatello, Francesco","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683"},{"last_name":"Vincent","full_name":"Vincent, Damien","first_name":"Damien"},{"first_name":"Ilya","full_name":"Tolstikhin, Ilya","last_name":"Tolstikhin"},{"last_name":"Ratsch","full_name":"Ratsch, Gunnar","first_name":"Gunnar"},{"last_name":"Gelly","full_name":"Gelly, Sylvain","first_name":"Sylvain"},{"first_name":"Bernhard","last_name":"Scholkopf","full_name":"Scholkopf, Bernhard"}],"oa":1,"scopus_import":"1","extern":"1","status":"public","article_processing_charge":"No","citation":{"ama":"Locatello F, Vincent D, Tolstikhin I, Ratsch G, Gelly S, Scholkopf B. Clustering meets implicit generative models. In: <i>6th International Conference on Learning Representations</i>. ; 2018.","ista":"Locatello F, Vincent D, Tolstikhin I, Ratsch G, Gelly S, Scholkopf B. 2018. Clustering meets implicit generative models. 6th International Conference on Learning Representations. International Conference on Machine Learning.","chicago":"Locatello, Francesco, Damien Vincent, Ilya Tolstikhin, Gunnar Ratsch, Sylvain Gelly, and Bernhard Scholkopf. “Clustering Meets Implicit Generative Models.” In <i>6th International Conference on Learning Representations</i>, 2018.","mla":"Locatello, Francesco, et al. “Clustering Meets Implicit Generative Models.” <i>6th International Conference on Learning Representations</i>, 2018.","ieee":"F. Locatello, D. Vincent, I. Tolstikhin, G. Ratsch, S. Gelly, and B. Scholkopf, “Clustering meets implicit generative models,” in <i>6th International Conference on Learning Representations</i>, Vancouver, Canada, 2018.","short":"F. Locatello, D. Vincent, I. Tolstikhin, G. Ratsch, S. Gelly, B. Scholkopf, in:, 6th International Conference on Learning Representations, 2018.","apa":"Locatello, F., Vincent, D., Tolstikhin, I., Ratsch, G., Gelly, S., &#38; Scholkopf, B. (2018). Clustering meets implicit generative models. In <i>6th International Conference on Learning Representations</i>. Vancouver, Canada."},"day":"01","type":"conference","publication_status":"published","conference":{"name":"International Conference on Machine Learning","start_date":"2018-04-30","end_date":"2018-05-03","location":"Vancouver, Canada"},"quality_controlled":"1","_id":"14224","language":[{"iso":"eng"}],"abstract":[{"lang":"eng","text":"Clustering is a cornerstone of unsupervised learning which can be thought as disentangling multiple generative mechanisms underlying the data. In this paper we introduce an algorithmic framework to train mixtures of implicit generative models which we particularize for variational autoencoders. Relying on an additional set of discriminators, we propose a competitive procedure in which the models only need to approximate the portion of the data distribution from which they can produce realistic samples. As a byproduct, each model is simpler to train, and a clustering interpretation arises naturally from the partitioning of the training points among the models. We empirically show that our approach splits the training distribution in a reasonable way and increases the quality of the generated samples."}],"external_id":{"arxiv":["1804.11130"]},"oa_version":"Preprint","main_file_link":[{"url":"https://arxiv.org/abs/1804.11130","open_access":"1"}],"date_published":"2018-05-01T00:00:00Z","date_created":"2023-08-22T14:25:34Z","title":"Clustering meets implicit generative models","year":"2018","arxiv":1,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87"},{"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2018","arxiv":1,"article_number":"1804.11130","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.1804.11130","open_access":"1"}],"oa_version":"Preprint","title":"Competitive training of mixtures of independent deep generative models","date_created":"2023-09-13T12:20:49Z","date_published":"2018-04-30T00:00:00Z","publication_status":"submitted","type":"preprint","external_id":{"arxiv":["1804.11130"]},"language":[{"iso":"eng"}],"_id":"14327","abstract":[{"text":"A common assumption in causal modeling posits that the data is generated by a\r\nset of independent mechanisms, and algorithms should aim to recover this\r\nstructure. Standard unsupervised learning, however, is often concerned with\r\ntraining a single model to capture the overall distribution or aspects thereof.\r\nInspired by clustering approaches, we consider mixtures of implicit generative\r\nmodels that ``disentangle'' the independent generative mechanisms underlying\r\nthe data. Relying on an additional set of discriminators, we propose a\r\ncompetitive training procedure in which the models only need to capture the\r\nportion of the data distribution from which they can produce realistic samples.\r\nAs a by-product, each model is simpler and faster to train. We empirically show\r\nthat our approach splits the training distribution in a sensible way and\r\nincreases the quality of the generated samples.","lang":"eng"}],"citation":{"chicago":"Locatello, Francesco, Damien Vincent, Ilya Tolstikhin, Gunnar Rätsch, Sylvain Gelly, and Bernhard Schölkopf. “Competitive Training of Mixtures of Independent Deep Generative Models.” <i>ArXiv</i>, n.d. <a href=\"https://doi.org/10.48550/arXiv.1804.11130\">https://doi.org/10.48550/arXiv.1804.11130</a>.","ista":"Locatello F, Vincent D, Tolstikhin I, Rätsch G, Gelly S, Schölkopf B. Competitive training of mixtures of independent deep generative models. arXiv, 1804.11130.","ama":"Locatello F, Vincent D, Tolstikhin I, Rätsch G, Gelly S, Schölkopf B. Competitive training of mixtures of independent deep generative models. <i>arXiv</i>. doi:<a href=\"https://doi.org/10.48550/arXiv.1804.11130\">10.48550/arXiv.1804.11130</a>","apa":"Locatello, F., Vincent, D., Tolstikhin, I., Rätsch, G., Gelly, S., &#38; Schölkopf, B. (n.d.). Competitive training of mixtures of independent deep generative models. <i>arXiv</i>. <a href=\"https://doi.org/10.48550/arXiv.1804.11130\">https://doi.org/10.48550/arXiv.1804.11130</a>","ieee":"F. Locatello, D. Vincent, I. Tolstikhin, G. Rätsch, S. Gelly, and B. Schölkopf, “Competitive training of mixtures of independent deep generative models,” <i>arXiv</i>. .","short":"F. Locatello, D. Vincent, I. Tolstikhin, G. Rätsch, S. Gelly, B. Schölkopf, ArXiv (n.d.).","mla":"Locatello, Francesco, et al. “Competitive Training of Mixtures of Independent Deep Generative Models.” <i>ArXiv</i>, 1804.11130, doi:<a href=\"https://doi.org/10.48550/arXiv.1804.11130\">10.48550/arXiv.1804.11130</a>."},"day":"30","status":"public","article_processing_charge":"No","extern":"1","doi":"10.48550/arXiv.1804.11130","oa":1,"month":"04","department":[{"_id":"FrLo"}],"author":[{"full_name":"Locatello, Francesco","last_name":"Locatello","orcid":"0000-0002-4850-0683","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4"},{"full_name":"Vincent, Damien","last_name":"Vincent","first_name":"Damien"},{"last_name":"Tolstikhin","full_name":"Tolstikhin, Ilya","first_name":"Ilya"},{"last_name":"Rätsch","full_name":"Rätsch, Gunnar","first_name":"Gunnar"},{"full_name":"Gelly, Sylvain","last_name":"Gelly","first_name":"Sylvain"},{"first_name":"Bernhard","last_name":"Schölkopf","full_name":"Schölkopf, Bernhard"}],"publication":"arXiv","date_updated":"2024-10-14T12:31:09Z"},{"page":"860-868","status":"public","publication":"Proceedings of the 20th International Conference on Artificial Intelligence and Statistics","date_updated":"2023-09-13T09:49:10Z","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.1702.06457","open_access":"1"}],"date_published":"2017-02-21T00:00:00Z","year":"2017","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","intvolume":"        54","citation":{"ama":"Locatello F, Khanna R, Tschannen M, Jaggi M. A unified optimization view on generalized matching pursuit and Frank-Wolfe. In: <i>Proceedings of the 20th International Conference on Artificial Intelligence and Statistics</i>. Vol 54. ML Research Press; 2017:860-868.","ista":"Locatello F, Khanna R, Tschannen M, Jaggi M. 2017. A unified optimization view on generalized matching pursuit and Frank-Wolfe. Proceedings of the 20th International Conference on Artificial Intelligence and Statistics. AISTATS: Conference on Artificial Intelligence and Statistics vol. 54, 860–868.","chicago":"Locatello, Francesco, Rajiv Khanna, Michael Tschannen, and Martin Jaggi. “A Unified Optimization View on Generalized Matching Pursuit and Frank-Wolfe.” In <i>Proceedings of the 20th International Conference on Artificial Intelligence and Statistics</i>, 54:860–68. ML Research Press, 2017.","mla":"Locatello, Francesco, et al. “A Unified Optimization View on Generalized Matching Pursuit and Frank-Wolfe.” <i>Proceedings of the 20th International Conference on Artificial Intelligence and Statistics</i>, vol. 54, ML Research Press, 2017, pp. 860–68.","short":"F. Locatello, R. Khanna, M. Tschannen, M. Jaggi, in:, Proceedings of the 20th International Conference on Artificial Intelligence and Statistics, ML Research Press, 2017, pp. 860–868.","ieee":"F. Locatello, R. Khanna, M. Tschannen, and M. Jaggi, “A unified optimization view on generalized matching pursuit and Frank-Wolfe,” in <i>Proceedings of the 20th International Conference on Artificial Intelligence and Statistics</i>, Fort Lauderdale, FL, United States, 2017, vol. 54, pp. 860–868.","apa":"Locatello, F., Khanna, R., Tschannen, M., &#38; Jaggi, M. (2017). A unified optimization view on generalized matching pursuit and Frank-Wolfe. In <i>Proceedings of the 20th International Conference on Artificial Intelligence and Statistics</i> (Vol. 54, pp. 860–868). Fort Lauderdale, FL, United States: ML Research Press."},"day":"21","type":"conference","quality_controlled":"1","language":[{"iso":"eng"}],"_id":"14205","abstract":[{"text":"Two of the most fundamental prototypes of greedy optimization are the matching pursuit and Frank-Wolfe algorithms. In this paper, we take a unified view on both classes of methods, leading to the first explicit convergence rates of matching pursuit methods in an optimization sense, for general sets of atoms. We derive sublinear (1/t) convergence for both classes on general smooth objectives, and linear convergence on strongly convex objectives, as well as a clear correspondence of algorithm variants. Our presented algorithms and rates are affine invariant, and do not need any incoherence or sparsity assumptions.","lang":"eng"}],"external_id":{"arxiv":["1702.06457"]},"extern":"1","publisher":"ML Research Press","article_processing_charge":"No","month":"02","department":[{"_id":"FrLo"}],"volume":54,"author":[{"full_name":"Locatello, Francesco","last_name":"Locatello","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683"},{"full_name":"Khanna, Rajiv","last_name":"Khanna","first_name":"Rajiv"},{"last_name":"Tschannen","full_name":"Tschannen, Michael","first_name":"Michael"},{"full_name":"Jaggi, Martin","last_name":"Jaggi","first_name":"Martin"}],"oa":1,"oa_version":"Preprint","title":"A unified optimization view on generalized matching pursuit and Frank-Wolfe","date_created":"2023-08-22T14:17:19Z","arxiv":1,"publication_status":"published","conference":{"end_date":"2017-04-22","location":"Fort Lauderdale, FL, United States","name":"AISTATS: Conference on Artificial Intelligence and Statistics","start_date":"2017-04-20"}},{"oa":1,"month":"05","department":[{"_id":"FrLo"}],"publication":"Advances in Neural Information Processing Systems","date_updated":"2024-10-14T12:29:50Z","author":[{"last_name":"Locatello","full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco"},{"last_name":"Tschannen","full_name":"Tschannen, Michael","first_name":"Michael"},{"last_name":"Rätsch","full_name":"Rätsch, Gunnar","first_name":"Gunnar"},{"first_name":"Martin","full_name":"Jaggi, Martin","last_name":"Jaggi"}],"status":"public","article_processing_charge":"No","extern":"1","type":"conference","conference":{"end_date":"2017-12-09","location":"Long Beach, CA, United States","name":"NeurIPS: Neural Information Processing Systems","start_date":"2017-12-04"},"quality_controlled":"1","publication_status":"published","_id":"14206","language":[{"iso":"eng"}],"abstract":[{"text":"Greedy optimization methods such as Matching Pursuit (MP) and Frank-Wolfe (FW) algorithms regained popularity in recent years due to their simplicity, effectiveness and theoretical guarantees. MP and FW address optimization over the linear span and the convex hull of a set of atoms, respectively. In this paper, we consider the intermediate case of optimization over the convex cone, parametrized as the conic hull of a generic atom set, leading to the first principled definitions of non-negative MP algorithms for which we give explicit convergence rates and demonstrate excellent empirical performance. In particular, we derive sublinear (O(1/t)) convergence on general smooth and convex objectives, and linear convergence (O(e−t)) on strongly convex objectives, in both cases for general sets of atoms. Furthermore, we establish a clear correspondence of our algorithms to known algorithms from the MP and FW literature. Our novel algorithms and analyses target general atom sets and general objective functions, and hence are directly applicable to a large variety of learning settings.","lang":"eng"}],"external_id":{"arxiv":["1705.11041"]},"day":"31","citation":{"ista":"Locatello F, Tschannen M, Rätsch G, Jaggi M. 2017. Greedy algorithms for cone constrained optimization with convergence guarantees. Advances in Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems.","ama":"Locatello F, Tschannen M, Rätsch G, Jaggi M. Greedy algorithms for cone constrained optimization with convergence guarantees. In: <i>Advances in Neural Information Processing Systems</i>. ; 2017.","chicago":"Locatello, Francesco, Michael Tschannen, Gunnar Rätsch, and Martin Jaggi. “Greedy Algorithms for Cone Constrained Optimization with Convergence Guarantees.” In <i>Advances in Neural Information Processing Systems</i>, 2017.","short":"F. Locatello, M. Tschannen, G. Rätsch, M. Jaggi, in:, Advances in Neural Information Processing Systems, 2017.","ieee":"F. Locatello, M. Tschannen, G. Rätsch, and M. Jaggi, “Greedy algorithms for cone constrained optimization with convergence guarantees,” in <i>Advances in Neural Information Processing Systems</i>, Long Beach, CA, United States, 2017.","mla":"Locatello, Francesco, et al. “Greedy Algorithms for Cone Constrained Optimization with Convergence Guarantees.” <i>Advances in Neural Information Processing Systems</i>, 2017.","apa":"Locatello, F., Tschannen, M., Rätsch, G., &#38; Jaggi, M. (2017). Greedy algorithms for cone constrained optimization with convergence guarantees. In <i>Advances in Neural Information Processing Systems</i>. Long Beach, CA, United States."},"arxiv":1,"publication_identifier":{"isbn":["9781510860964"]},"year":"2017","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Preprint","main_file_link":[{"url":"https://arxiv.org/abs/1705.11041","open_access":"1"}],"date_published":"2017-05-31T00:00:00Z","date_created":"2023-08-22T14:17:38Z","title":"Greedy algorithms for cone constrained optimization with convergence guarantees"}]