[{"department":[{"_id":"DaAl"}],"article_processing_charge":"No","main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2021/file/1680e9fa7b4dd5d62ece800239bb53bd-Paper.pdf","open_access":"1"}],"date_created":"2022-06-19T22:01:58Z","oa_version":"Published Version","title":"Distributed principal component analysis with limited communication","project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020"},{"_id":"260C2330-B435-11E9-9278-68D0E5697425","name":"ISTplus - Postdoctoral Fellowships","grant_number":"754411","call_identifier":"H2020"}],"external_id":{"arxiv":["2110.14391"]},"abstract":[{"lang":"eng","text":"We study efficient distributed algorithms for the fundamental problem of principal component analysis and leading eigenvector computation on the sphere, when the data are randomly distributed among a set of computational nodes. We propose a new quantized variant of Riemannian gradient descent to solve this problem, and prove that the algorithm converges with high probability under a set of necessary spherical-convexity properties. We give bounds on the number of bits transmitted by the algorithm under common initialization schemes, and investigate the dependency on the problem dimension in each case."}],"arxiv":1,"type":"conference","month":"12","corr_author":"1","acknowledgement":"We would like to thank the anonymous reviewers for helpful comments and suggestions. We also thank Aurelien Lucchi and Antonio Orvieto for fruitful discussions at an early stage of this work. FA is partially supported by the SNSF under research project No. 192363 and conducted part of this work while at IST Austria under the European Union’s Horizon 2020 research and innovation programme (grant agreement No. 805223 ScaleML). PD partly conducted this work while at IST Austria and was supported by the European Union’s Horizon 2020 programme under the Marie Skłodowska-Curie grant agreement No. 754411.","publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"_id":"11452","date_published":"2021-12-01T00:00:00Z","language":[{"iso":"eng"}],"publication_status":"published","intvolume":"         4","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","day":"01","volume":4,"oa":1,"quality_controlled":"1","year":"2021","author":[{"full_name":"Alimisis, Foivos","last_name":"Alimisis","first_name":"Foivos"},{"full_name":"Davies, Peter","first_name":"Peter","last_name":"Davies","orcid":"0000-0002-5646-9524","id":"11396234-BB50-11E9-B24C-90FCE5697425"},{"full_name":"Vandereycken, Bart","first_name":"Bart","last_name":"Vandereycken"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X","first_name":"Dan-Adrian","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian"}],"date_updated":"2025-04-14T07:43:57Z","ec_funded":1,"scopus_import":"1","conference":{"name":"NeurIPS: Neural Information Processing Systems","start_date":"2021-12-06","end_date":"2021-12-14","location":"Virtual, Online"},"publisher":"Neural Information Processing Systems Foundation","citation":{"ama":"Alimisis F, Davies P, Vandereycken B, Alistarh D-A. Distributed principal component analysis with limited communication. In: <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>. Vol 4. Neural Information Processing Systems Foundation; 2021:2823-2834.","ieee":"F. Alimisis, P. Davies, B. Vandereycken, and D.-A. Alistarh, “Distributed principal component analysis with limited communication,” in <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 4, pp. 2823–2834.","apa":"Alimisis, F., Davies, P., Vandereycken, B., &#38; Alistarh, D.-A. (2021). Distributed principal component analysis with limited communication. In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i> (Vol. 4, pp. 2823–2834). Virtual, Online: Neural Information Processing Systems Foundation.","mla":"Alimisis, Foivos, et al. “Distributed Principal Component Analysis with Limited Communication.” <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, vol. 4, Neural Information Processing Systems Foundation, 2021, pp. 2823–34.","short":"F. Alimisis, P. Davies, B. Vandereycken, D.-A. Alistarh, in:, Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 2823–2834.","chicago":"Alimisis, Foivos, Peter Davies, Bart Vandereycken, and Dan-Adrian Alistarh. “Distributed Principal Component Analysis with Limited Communication.” In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, 4:2823–34. Neural Information Processing Systems Foundation, 2021.","ista":"Alimisis F, Davies P, Vandereycken B, Alistarh D-A. 2021. Distributed principal component analysis with limited communication. Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 4, 2823–2834."},"page":"2823-2834","status":"public","publication":"Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems"},{"language":[{"iso":"eng"}],"intvolume":"        20","publication_status":"published","_id":"11453","date_published":"2021-12-01T00:00:00Z","type":"conference","month":"12","abstract":[{"text":"Neuronal computations depend on synaptic connectivity and intrinsic electrophysiological properties. Synaptic connectivity determines which inputs from presynaptic neurons are integrated, while cellular properties determine how inputs are filtered over time. Unlike their biological counterparts, most computational approaches to learning in simulated neural networks are limited to changes in synaptic connectivity. However, if intrinsic parameters change, neural computations are altered drastically. Here, we include the parameters that determine the intrinsic properties,\r\ne.g., time constants and reset potential, into the learning paradigm. Using sparse feedback signals that indicate target spike times, and gradient-based parameter updates, we show that the intrinsic parameters can be learned along with the synaptic weights to produce specific input-output functions. Specifically, we use a teacher-student paradigm in which a randomly initialised leaky integrate-and-fire or resonate-and-fire neuron must recover the parameters of a teacher neuron. We show that complex temporal functions can be learned online and without backpropagation through time, relying on event-based updates only. Our results are a step towards online learning of neural computations from ungraded and unsigned sparse feedback signals with a biologically inspired learning mechanism.","lang":"eng"}],"project":[{"_id":"c084a126-5a5b-11eb-8a69-d75314a70a87","name":"Whatâs in a memory? Spatiotemporal dynamics in strongly coupled recurrent neuronal networks.","grant_number":"214316/Z/18/Z"}],"publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"acknowledgement":"We would like to thank Professor Dr. Henning Sprekeler for his valuable suggestions and Dr. Andrew Saxe, Milan Klöwer and Anna Wallis for their constructive feedback on the manuscript. Lukas Braun was supported by the Network of European Neuroscience Schools through their NENS Exchange Grant program, by the European Union through their European Community Action Scheme for the Mobility of University Students, the Woodward Scholarship awarded by Wadham College, Oxford and the Medical Research Council [MR/N013468/1]. Tim P. Vogels was supported by a Wellcome Trust Senior Research Fellowship [214316/Z/18/Z].","corr_author":"1","department":[{"_id":"TiVo"}],"title":"Online learning of neural computations from sparse temporal feedback","oa_version":"Published Version","date_created":"2022-06-19T22:01:59Z","article_processing_charge":"No","main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2021/file/88e1ce84f9feef5a08d0df0334c53468-Paper.pdf","open_access":"1"}],"publisher":"Neural Information Processing Systems Foundation","conference":{"location":"Virtual, Online","name":"NeurIPS: Neural Information Processing Systems","start_date":"2021-12-06","end_date":"2021-12-14"},"scopus_import":"1","status":"public","publication":"Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems","citation":{"mla":"Braun, Lukas, and Tim P. Vogels. “Online Learning of Neural Computations from Sparse Temporal Feedback.” <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, vol. 20, Neural Information Processing Systems Foundation, 2021, pp. 16437–50.","apa":"Braun, L., &#38; Vogels, T. P. (2021). Online learning of neural computations from sparse temporal feedback. In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i> (Vol. 20, pp. 16437–16450). Virtual, Online: Neural Information Processing Systems Foundation.","ieee":"L. Braun and T. P. Vogels, “Online learning of neural computations from sparse temporal feedback,” in <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 20, pp. 16437–16450.","ama":"Braun L, Vogels TP. Online learning of neural computations from sparse temporal feedback. In: <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>. Vol 20. Neural Information Processing Systems Foundation; 2021:16437-16450.","chicago":"Braun, Lukas, and Tim P Vogels. “Online Learning of Neural Computations from Sparse Temporal Feedback.” In <i>Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems</i>, 20:16437–50. Neural Information Processing Systems Foundation, 2021.","ista":"Braun L, Vogels TP. 2021. Online learning of neural computations from sparse temporal feedback. Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 20, 16437–16450.","short":"L. Braun, T.P. Vogels, in:, Advances in Neural Information Processing Systems - 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 16437–16450."},"page":"16437-16450","author":[{"first_name":"Lukas","last_name":"Braun","full_name":"Braun, Lukas"},{"full_name":"Vogels, Tim P","first_name":"Tim P","last_name":"Vogels","orcid":"0000-0003-3295-6181","id":"CB6FF8D2-008F-11EA-8E08-2637E6697425"}],"year":"2021","date_updated":"2025-04-14T09:44:14Z","quality_controlled":"1","oa":1,"day":"01","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","volume":20},{"scopus_import":"1","ec_funded":1,"publisher":"Neural Information Processing Systems Foundation","conference":{"end_date":"2021-12-14","start_date":"2021-12-06","name":"NeurIPS: Neural Information Processing Systems","location":"Virtual, Online"},"page":"14873-14886","citation":{"ista":"Frantar E, Kurtic E, Alistarh D-A. 2021. M-FAC: Efficient matrix-free approximations of second-order information. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 34, 14873–14886.","short":"E. Frantar, E. Kurtic, D.-A. Alistarh, in:, 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 14873–14886.","chicago":"Frantar, Elias, Eldar Kurtic, and Dan-Adrian Alistarh. “M-FAC: Efficient Matrix-Free Approximations of Second-Order Information.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:14873–86. Neural Information Processing Systems Foundation, 2021.","mla":"Frantar, Elias, et al. “M-FAC: Efficient Matrix-Free Approximations of Second-Order Information.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Neural Information Processing Systems Foundation, 2021, pp. 14873–86.","ama":"Frantar E, Kurtic E, Alistarh D-A. M-FAC: Efficient matrix-free approximations of second-order information. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Neural Information Processing Systems Foundation; 2021:14873-14886.","apa":"Frantar, E., Kurtic, E., &#38; Alistarh, D.-A. (2021). M-FAC: Efficient matrix-free approximations of second-order information. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 14873–14886). Virtual, Online: Neural Information Processing Systems Foundation.","ieee":"E. Frantar, E. Kurtic, and D.-A. Alistarh, “M-FAC: Efficient matrix-free approximations of second-order information,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 14873–14886."},"status":"public","publication":"35th Conference on Neural Information Processing Systems","year":"2021","author":[{"first_name":"Elias","last_name":"Frantar","full_name":"Frantar, Elias","id":"09a8f98d-ec99-11ea-ae11-c063a7b7fe5f"},{"id":"47beb3a5-07b5-11eb-9b87-b108ec578218","first_name":"Eldar","last_name":"Kurtic","full_name":"Kurtic, Eldar"},{"first_name":"Dan-Adrian","last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"}],"date_updated":"2025-05-14T11:28:00Z","oa":1,"quality_controlled":"1","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","alternative_title":["Advances in Neural Information Processing Systems"],"day":"06","volume":34,"language":[{"iso":"eng"}],"publication_status":"published","intvolume":"        34","_id":"11463","date_published":"2021-12-06T00:00:00Z","project":[{"_id":"268A44D6-B435-11E9-9278-68D0E5697425","name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223","call_identifier":"H2020"}],"external_id":{"arxiv":["2010.08222"]},"abstract":[{"lang":"eng","text":"Efficiently approximating local curvature information of the loss function is a key tool for optimization and compression of deep neural networks. Yet, most existing methods to approximate second-order information have high computational\r\nor storage costs, which limits their practicality. In this work, we investigate matrix-free, linear-time approaches for estimating Inverse-Hessian Vector Products (IHVPs) for the case when the Hessian can be approximated as a sum of rank-one matrices, as in the classic approximation of the Hessian by the empirical Fisher matrix. We propose two new algorithms: the first is tailored towards network compression and can compute the IHVP for dimension d, if the Hessian is given as a sum of m rank-one matrices, using O(dm2) precomputation, O(dm) cost for computing the IHVP, and query cost O(m) for any single element of the inverse Hessian. The second algorithm targets an optimization setting, where we wish to compute the product between the inverse Hessian, estimated over a sliding window of optimization steps, and a given gradient direction, as required for preconditioned SGD. We give an algorithm with cost O(dm + m2) for computing the IHVP and O(dm + m3) for adding or removing any gradient from the sliding window. These\r\ntwo algorithms yield state-of-the-art results for network pruning and optimization with lower computational overhead relative to existing second-order methods. Implementations are available at [9] and [17]."}],"month":"12","arxiv":1,"type":"conference","corr_author":"1","acknowledgement":"We gratefully acknowledge funding the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML), as well as computational support from Amazon Web Services (AWS) EC2.","publication_identifier":{"issn":["1049-5258"],"isbn":["9781713845393"]},"department":[{"_id":"DaAl"}],"main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2021/file/7cfd5df443b4eb0d69886a583b33de4c-Paper.pdf","open_access":"1"}],"article_processing_charge":"No","date_created":"2022-06-26T22:01:35Z","oa_version":"Published Version","title":"M-FAC: Efficient matrix-free approximations of second-order information"},{"year":"2021","author":[{"last_name":"Alistarh","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X"},{"full_name":"Korhonen, Janne","last_name":"Korhonen","first_name":"Janne","id":"C5402D42-15BC-11E9-A202-CA2BE6697425"}],"date_updated":"2025-05-14T11:27:51Z","scopus_import":"1","ec_funded":1,"publisher":"Neural Information Processing Systems Foundation","conference":{"location":"Virtual, Online","start_date":"2021-12-06","end_date":"2021-12-14","name":"NeurIPS: Neural Information Processing Systems"},"publication":"35th Conference on Neural Information Processing Systems","status":"public","citation":{"ieee":"D.-A. Alistarh and J. Korhonen, “Towards tight communication lower bounds for distributed optimisation,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 7254–7266.","mla":"Alistarh, Dan-Adrian, and Janne Korhonen. “Towards Tight Communication Lower Bounds for Distributed Optimisation.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Neural Information Processing Systems Foundation, 2021, pp. 7254–66.","apa":"Alistarh, D.-A., &#38; Korhonen, J. (2021). Towards tight communication lower bounds for distributed optimisation. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 7254–7266). Virtual, Online: Neural Information Processing Systems Foundation.","ama":"Alistarh D-A, Korhonen J. Towards tight communication lower bounds for distributed optimisation. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Neural Information Processing Systems Foundation; 2021:7254-7266.","ista":"Alistarh D-A, Korhonen J. 2021. Towards tight communication lower bounds for distributed optimisation. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 34, 7254–7266.","chicago":"Alistarh, Dan-Adrian, and Janne Korhonen. “Towards Tight Communication Lower Bounds for Distributed Optimisation.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:7254–66. Neural Information Processing Systems Foundation, 2021.","short":"D.-A. Alistarh, J. Korhonen, in:, 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 7254–7266."},"page":"7254-7266","alternative_title":["Advances in Neural Information Processing Systems"],"day":"06","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","volume":34,"quality_controlled":"1","oa":1,"_id":"11464","date_published":"2021-12-06T00:00:00Z","language":[{"iso":"eng"}],"publication_status":"published","intvolume":"        34","department":[{"_id":"DaAl"}],"title":"Towards tight communication lower bounds for distributed optimisation","article_processing_charge":"No","main_file_link":[{"url":"https://proceedings.neurips.cc/paper/2021/file/3b92d18aa7a6176dd37d372bc2f1eb71-Paper.pdf","open_access":"1"}],"date_created":"2022-06-26T22:01:35Z","oa_version":"Published Version","abstract":[{"lang":"eng","text":"We consider a standard distributed optimisation setting where N machines, each holding a d-dimensional function\r\nfi, aim to jointly minimise the sum of the functions ∑Ni=1fi(x). This problem arises naturally in large-scale distributed optimisation, where a standard solution is to apply variants of (stochastic) gradient descent. We focus on the communication complexity of this problem: our main result provides the first fully unconditional bounds on total number of bits which need to be sent and received by the N machines to solve this problem under point-to-point communication, within a given error-tolerance. Specifically, we show that Ω(Ndlogd/Nε) total bits need to be communicated between the machines to find an additive ϵ-approximation to the minimum of ∑Ni=1fi(x). The result holds for both deterministic and randomised algorithms, and, importantly, requires no assumptions on the algorithm structure. The lower bound is tight under certain restrictions on parameter values, and is matched within constant factors for quadratic objectives by a new variant of quantised gradient descent, which we describe and analyse. Our results bring over tools from communication complexity to distributed optimisation, which has potential for further applications."}],"arxiv":1,"type":"conference","month":"12","project":[{"name":"Elastic Coordination for Scalable Machine Learning","grant_number":"805223","_id":"268A44D6-B435-11E9-9278-68D0E5697425","call_identifier":"H2020"}],"external_id":{"arxiv":["2010.08222"]},"publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"corr_author":"1","acknowledgement":"We thank the NeurIPS reviewers for insightful comments that helped us improve the positioning of our results, as well as for pointing out the subsampling approach for complementing the randomised lower bound. We also thank Foivos Alimisis and Peter Davies for useful discussions. This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML)."},{"publication_identifier":{"isbn":["9781713845393"]},"external_id":{"arxiv":["2106.04619"]},"abstract":[{"lang":"eng","text":"Self-supervised representation learning has shown remarkable success in a number of domains. A common practice is to perform data augmentation via hand-crafted transformations intended to leave the semantics of the data invariant. We seek to understand the empirical success of this approach from a theoretical perspective. We formulate the augmentation process as a latent variable model by postulating a partition of the latent representation into a content component, which is assumed invariant to augmentation, and a style component, which is allowed to change. Unlike prior work on disentanglement and independent component analysis, we allow for both nontrivial statistical and causal dependencies in the latent space. We study the identifiability of the latent representation based on pairs of views of the observations and prove sufficient conditions that allow us to identify the invariant content partition up to an invertible mapping in both generative and discriminative settings. We find numerical simulations with dependent latent variables are consistent with our theory. Lastly, we introduce Causal3DIdent, a dataset of high-dimensional, visually complex images with rich causal dependencies, which we use to study the effect of data augmentations performed in practice."}],"type":"conference","arxiv":1,"month":"06","article_processing_charge":"No","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2106.04619"}],"date_created":"2023-08-22T14:04:36Z","oa_version":"Preprint","title":"Self-supervised learning with data augmentations provably isolates content from style","department":[{"_id":"FrLo"}],"publication_status":"published","intvolume":"        34","language":[{"iso":"eng"}],"date_published":"2021-06-08T00:00:00Z","_id":"14179","oa":1,"quality_controlled":"1","volume":34,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","day":"08","page":"16451-16467","citation":{"ista":"Kügelgen J von, Sharma Y, Gresele L, Brendel W, Schölkopf B, Besserve M, Locatello F. 2021. Self-supervised learning with data augmentations provably isolates content from style. Advances in Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 16451–16467.","short":"J. von Kügelgen, Y. Sharma, L. Gresele, W. Brendel, B. Schölkopf, M. Besserve, F. Locatello, in:, Advances in Neural Information Processing Systems, 2021, pp. 16451–16467.","chicago":"Kügelgen, Julius von, Yash Sharma, Luigi Gresele, Wieland Brendel, Bernhard Schölkopf, Michel Besserve, and Francesco Locatello. “Self-Supervised Learning with Data Augmentations Provably Isolates Content from Style.” In <i>Advances in Neural Information Processing Systems</i>, 34:16451–67, 2021.","ieee":"J. von Kügelgen <i>et al.</i>, “Self-supervised learning with data augmentations provably isolates content from style,” in <i>Advances in Neural Information Processing Systems</i>, Virtual, 2021, vol. 34, pp. 16451–16467.","mla":"Kügelgen, Julius von, et al. “Self-Supervised Learning with Data Augmentations Provably Isolates Content from Style.” <i>Advances in Neural Information Processing Systems</i>, vol. 34, 2021, pp. 16451–67.","ama":"Kügelgen J von, Sharma Y, Gresele L, et al. Self-supervised learning with data augmentations provably isolates content from style. In: <i>Advances in Neural Information Processing Systems</i>. Vol 34. ; 2021:16451-16467.","apa":"Kügelgen, J. von, Sharma, Y., Gresele, L., Brendel, W., Schölkopf, B., Besserve, M., &#38; Locatello, F. (2021). Self-supervised learning with data augmentations provably isolates content from style. In <i>Advances in Neural Information Processing Systems</i> (Vol. 34, pp. 16451–16467). Virtual."},"status":"public","publication":"Advances in Neural Information Processing Systems","conference":{"location":"Virtual","end_date":"2021-12-10","start_date":"2021-12-07","name":"NeurIPS: Neural Information Processing Systems"},"extern":"1","date_updated":"2023-09-11T10:33:19Z","year":"2021","author":[{"full_name":"Kügelgen, Julius von","first_name":"Julius von","last_name":"Kügelgen"},{"first_name":"Yash","last_name":"Sharma","full_name":"Sharma, Yash"},{"full_name":"Gresele, Luigi","last_name":"Gresele","first_name":"Luigi"},{"last_name":"Brendel","first_name":"Wieland","full_name":"Brendel, Wieland"},{"last_name":"Schölkopf","first_name":"Bernhard","full_name":"Schölkopf, Bernhard"},{"last_name":"Besserve","first_name":"Michel","full_name":"Besserve, Michel"},{"id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco","last_name":"Locatello","first_name":"Francesco"}]},{"_id":"14180","date_published":"2021-10-12T00:00:00Z","language":[{"iso":"eng"}],"intvolume":"        34","publication_status":"published","department":[{"_id":"FrLo"}],"title":"Dynamic inference with neural interpreters","oa_version":"Preprint","date_created":"2023-08-22T14:04:55Z","article_processing_charge":"No","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2110.06399","open_access":"1"}],"arxiv":1,"month":"10","type":"conference","abstract":[{"lang":"eng","text":"Modern neural network architectures can leverage large amounts of data to generalize well within the training distribution. However, they are less capable of systematic generalization to data drawn from unseen but related distributions, a feat that is hypothesized to require compositional reasoning and reuse of knowledge. In this work, we present Neural Interpreters, an architecture that factorizes inference in a self-attention network as a system of modules, which we call \\emph{functions}. Inputs to the model are routed through a sequence of functions in a way that is end-to-end learned. The proposed architecture can flexibly compose computation along width and depth, and lends itself well to capacity extension after training. To demonstrate the versatility of Neural Interpreters, we evaluate it in two distinct settings: image classification and visual abstract reasoning on Raven Progressive Matrices. In the former, we show that Neural Interpreters perform on par with the vision transformer using fewer parameters, while being transferrable to a new task in a sample efficient manner. In the latter, we find that Neural Interpreters are competitive with respect to the state-of-the-art in terms of systematic generalization. "}],"external_id":{"arxiv":["2110.06399"]},"publication_identifier":{"isbn":["9781713845393"]},"author":[{"first_name":"Nasim","last_name":"Rahaman","full_name":"Rahaman, Nasim"},{"last_name":"Gondal","first_name":"Muhammad Waleed","full_name":"Gondal, Muhammad Waleed"},{"first_name":"Shruti","last_name":"Joshi","full_name":"Joshi, Shruti"},{"first_name":"Peter","last_name":"Gehler","full_name":"Gehler, Peter"},{"full_name":"Bengio, Yoshua","first_name":"Yoshua","last_name":"Bengio"},{"id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco","first_name":"Francesco","last_name":"Locatello"},{"last_name":"Schölkopf","first_name":"Bernhard","full_name":"Schölkopf, Bernhard"}],"year":"2021","date_updated":"2024-10-14T12:27:25Z","extern":"1","conference":{"location":"Virtual","end_date":"2021-12-10","start_date":"2021-12-07","name":"NeurIPS: Neural Information Processing Systems"},"publication":"Advances in Neural Information Processing Systems","status":"public","citation":{"ieee":"N. Rahaman <i>et al.</i>, “Dynamic inference with neural interpreters,” in <i>Advances in Neural Information Processing Systems</i>, Virtual, 2021, vol. 34, pp. 10985–10998.","mla":"Rahaman, Nasim, et al. “Dynamic Inference with Neural Interpreters.” <i>Advances in Neural Information Processing Systems</i>, vol. 34, 2021, pp. 10985–98.","apa":"Rahaman, N., Gondal, M. W., Joshi, S., Gehler, P., Bengio, Y., Locatello, F., &#38; Schölkopf, B. (2021). Dynamic inference with neural interpreters. In <i>Advances in Neural Information Processing Systems</i> (Vol. 34, pp. 10985–10998). Virtual.","ama":"Rahaman N, Gondal MW, Joshi S, et al. Dynamic inference with neural interpreters. In: <i>Advances in Neural Information Processing Systems</i>. Vol 34. ; 2021:10985-10998.","short":"N. Rahaman, M.W. Gondal, S. Joshi, P. Gehler, Y. Bengio, F. Locatello, B. Schölkopf, in:, Advances in Neural Information Processing Systems, 2021, pp. 10985–10998.","ista":"Rahaman N, Gondal MW, Joshi S, Gehler P, Bengio Y, Locatello F, Schölkopf B. 2021. Dynamic inference with neural interpreters. Advances in Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 10985–10998.","chicago":"Rahaman, Nasim, Muhammad Waleed Gondal, Shruti Joshi, Peter Gehler, Yoshua Bengio, Francesco Locatello, and Bernhard Schölkopf. “Dynamic Inference with Neural Interpreters.” In <i>Advances in Neural Information Processing Systems</i>, 34:10985–98, 2021."},"page":"10985-10998","day":"12","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","volume":34,"quality_controlled":"1","oa":1},{"publication_identifier":{"isbn":["9781713845393"]},"type":"conference","month":"07","arxiv":1,"abstract":[{"text":"When machine learning systems meet real world applications, accuracy is only\r\none of several requirements. In this paper, we assay a complementary\r\nperspective originating from the increasing availability of pre-trained and\r\nregularly improving state-of-the-art models. While new improved models develop\r\nat a fast pace, downstream tasks vary more slowly or stay constant. Assume that\r\nwe have a large unlabelled data set for which we want to maintain accurate\r\npredictions. Whenever a new and presumably better ML models becomes available,\r\nwe encounter two problems: (i) given a limited budget, which data points should\r\nbe re-evaluated using the new model?; and (ii) if the new predictions differ\r\nfrom the current ones, should we update? Problem (i) is about compute cost,\r\nwhich matters for very large data sets and models. Problem (ii) is about\r\nmaintaining consistency of the predictions, which can be highly relevant for\r\ndownstream applications; our demand is to avoid negative flips, i.e., changing\r\ncorrect to incorrect predictions. In this paper, we formalize the Prediction\r\nUpdate Problem and present an efficient probabilistic approach as answer to the\r\nabove questions. In extensive experiments on standard classification benchmark\r\ndata sets, we show that our method outperforms alternative strategies along key\r\nmetrics for backward-compatible prediction updates.","lang":"eng"}],"external_id":{"arxiv":["2107.01057"]},"title":"Backward-compatible prediction updates: A probabilistic approach","date_created":"2023-08-22T14:05:41Z","oa_version":"Preprint","article_processing_charge":"No","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2107.01057"}],"department":[{"_id":"FrLo"}],"intvolume":"        34","publication_status":"published","language":[{"iso":"eng"}],"date_published":"2021-07-02T00:00:00Z","_id":"14182","quality_controlled":"1","oa":1,"volume":34,"day":"02","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","publication":"35th Conference on Neural Information Processing Systems","status":"public","page":"116-128","citation":{"short":"F. Träuble, J. von Kügelgen, M. Kleindessner, F. Locatello, B. Schölkopf, P. Gehler, in:, 35th Conference on Neural Information Processing Systems, 2021, pp. 116–128.","chicago":"Träuble, Frederik, Julius von Kügelgen, Matthäus Kleindessner, Francesco Locatello, Bernhard Schölkopf, and Peter Gehler. “Backward-Compatible Prediction Updates: A Probabilistic Approach.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:116–28, 2021.","ista":"Träuble F, Kügelgen J von, Kleindessner M, Locatello F, Schölkopf B, Gehler P. 2021. Backward-compatible prediction updates: A probabilistic approach. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 116–128.","apa":"Träuble, F., Kügelgen, J. von, Kleindessner, M., Locatello, F., Schölkopf, B., &#38; Gehler, P. (2021). Backward-compatible prediction updates: A probabilistic approach. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 116–128). Virtual.","ama":"Träuble F, Kügelgen J von, Kleindessner M, Locatello F, Schölkopf B, Gehler P. Backward-compatible prediction updates: A probabilistic approach. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. ; 2021:116-128.","ieee":"F. Träuble, J. von Kügelgen, M. Kleindessner, F. Locatello, B. Schölkopf, and P. Gehler, “Backward-compatible prediction updates: A probabilistic approach,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, 2021, vol. 34, pp. 116–128.","mla":"Träuble, Frederik, et al. “Backward-Compatible Prediction Updates: A Probabilistic Approach.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, 2021, pp. 116–28."},"conference":{"location":"Virtual","end_date":"2021-12-10","start_date":"2021-12-07","name":"NeurIPS: Neural Information Processing Systems"},"date_updated":"2023-09-11T11:31:59Z","extern":"1","author":[{"last_name":"Träuble","first_name":"Frederik","full_name":"Träuble, Frederik"},{"full_name":"Kügelgen, Julius von","first_name":"Julius von","last_name":"Kügelgen"},{"first_name":"Matthäus","last_name":"Kleindessner","full_name":"Kleindessner, Matthäus"},{"full_name":"Locatello, Francesco","first_name":"Francesco","last_name":"Locatello","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683"},{"first_name":"Bernhard","last_name":"Schölkopf","full_name":"Schölkopf, Bernhard"},{"full_name":"Gehler, Peter","first_name":"Peter","last_name":"Gehler"}],"year":"2021"},{"article_processing_charge":"No","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2106.02356"}],"date_created":"2022-01-03T10:50:02Z","oa_version":"Preprint","title":"PCA initialization for approximate message passing in rotationally invariant models","department":[{"_id":"MaMo"}],"corr_author":"1","acknowledgement":"M. Mondelli would like to thank László Erdős for helpful discussions. M. Mondelli was partially supported by the 2019 Lopez-Loreta Prize. R. Venkataramanan was partially supported by the Alan Turing Institute under the EPSRC grant EP/N510129/1.\r\n","publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"project":[{"name":"Prix Lopez-Loretta 2019 - Marco Mondelli","_id":"059876FA-7A3F-11EA-A408-12923DDC885E"}],"external_id":{"arxiv":["2106.02356"]},"abstract":[{"text":"We study the problem of estimating a rank-$1$ signal in the presence of rotationally invariant noise-a class of perturbations more general than Gaussian noise. Principal Component Analysis (PCA) provides a natural estimator, and sharp results on its performance have been obtained in the high-dimensional regime. Recently, an Approximate Message Passing (AMP) algorithm has been proposed as an alternative estimator with the potential to improve the accuracy of PCA. However, the existing analysis of AMP requires an initialization that is both correlated with the signal and independent of the noise, which is often unrealistic in practice. In this work, we combine the two methods, and propose to initialize AMP with PCA. Our main result is a rigorous asymptotic characterization of the performance of this estimator. Both the AMP algorithm and its analysis differ from those previously derived in the Gaussian setting: at every iteration, our AMP algorithm requires a specific term to account for PCA initialization, while in the Gaussian case, PCA initialization affects only the first iteration of AMP. The proof is based on a two-phase artificial AMP that first approximates the PCA estimator and then mimics the true AMP. Our numerical simulations show an excellent agreement between AMP results and theoretical predictions, and suggest an interesting open direction on achieving Bayes-optimal performance.","lang":"eng"}],"type":"conference","arxiv":1,"month":"12","date_published":"2021-12-01T00:00:00Z","_id":"10593","publication_status":"published","intvolume":"        35","language":[{"iso":"eng"}],"volume":35,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","day":"01","oa":1,"quality_controlled":"1","date_updated":"2025-04-15T07:50:11Z","year":"2021","author":[{"last_name":"Mondelli","first_name":"Marco","full_name":"Mondelli, Marco","orcid":"0000-0002-3242-7020","id":"27EB676C-8706-11E9-9510-7717E6697425"},{"last_name":"Venkataramanan","first_name":"Ramji","full_name":"Venkataramanan, Ramji"}],"citation":{"ama":"Mondelli M, Venkataramanan R. PCA initialization for approximate message passing in rotationally invariant models. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 35. Neural Information Processing Systems Foundation; 2021:29616-29629.","ieee":"M. Mondelli and R. Venkataramanan, “PCA initialization for approximate message passing in rotationally invariant models,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, 2021, vol. 35, pp. 29616–29629.","apa":"Mondelli, M., &#38; Venkataramanan, R. (2021). PCA initialization for approximate message passing in rotationally invariant models. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 35, pp. 29616–29629). Virtual: Neural Information Processing Systems Foundation.","mla":"Mondelli, Marco, and Ramji Venkataramanan. “PCA Initialization for Approximate Message Passing in Rotationally Invariant Models.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 35, Neural Information Processing Systems Foundation, 2021, pp. 29616–29.","ista":"Mondelli M, Venkataramanan R. 2021. PCA initialization for approximate message passing in rotationally invariant models. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 35, 29616–29629.","short":"M. Mondelli, R. Venkataramanan, in:, 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 29616–29629.","chicago":"Mondelli, Marco, and Ramji Venkataramanan. “PCA Initialization for Approximate Message Passing in Rotationally Invariant Models.” In <i>35th Conference on Neural Information Processing Systems</i>, 35:29616–29. Neural Information Processing Systems Foundation, 2021."},"page":"29616-29629","publication":"35th Conference on Neural Information Processing Systems","status":"public","scopus_import":"1","publisher":"Neural Information Processing Systems Foundation","conference":{"location":"Virtual","name":"NeurIPS: Neural Information Processing Systems","end_date":"2021-12-14","start_date":"2021-12-06"}},{"type":"conference","month":"12","arxiv":1,"abstract":[{"lang":"eng","text":"The question of how and why the phenomenon of mode connectivity occurs in training deep neural networks has gained remarkable attention in the research community. From a theoretical perspective, two possible explanations have been proposed: (i) the loss function has connected sublevel sets, and (ii) the solutions found by stochastic gradient descent are dropout stable. While these explanations provide insights into the phenomenon, their assumptions are not always satisfied in practice. In particular, the first approach requires the network to have one layer with order of N neurons (N being the number of training samples), while the second one requires the loss to be almost invariant after removing half of the neurons at each layer (up to some rescaling of the remaining ones). In this work, we improve both conditions by exploiting the quality of the features at every intermediate layer together with a milder over-parameterization condition. More specifically, we show that: (i) under generic assumptions on the features of intermediate layers, it suffices that the last two hidden layers have order of N−−√ neurons, and (ii) if subsets of features at each layer are linearly separable, then no over-parameterization is needed to show the connectivity. Our experiments confirm that the proposed condition ensures the connectivity of solutions found by stochastic gradient descent, even in settings where the previous requirements do not hold."}],"external_id":{"arxiv":["2102.09671"]},"project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"}],"publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"acknowledgement":"MM was partially supported by the 2019 Lopez-Loreta Prize. QN and PB acknowledge support from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement no 757983).","corr_author":"1","department":[{"_id":"MaMo"}],"title":"When are solutions connected in deep networks?","date_created":"2022-01-03T10:56:20Z","oa_version":"Preprint","article_processing_charge":"No","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2102.09671"}],"language":[{"iso":"eng"}],"intvolume":"        35","publication_status":"published","_id":"10594","date_published":"2021-12-01T00:00:00Z","quality_controlled":"1","oa":1,"day":"01","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","volume":35,"conference":{"start_date":"2021-12-06","end_date":"2021-12-14","name":"35th Conference on Neural Information Processing Systems","location":"Virtual"},"publisher":"Neural Information Processing Systems Foundation","status":"public","publication":"35th Conference on Neural Information Processing Systems","citation":{"mla":"Nguyen, Quynh, et al. “When Are Solutions Connected in Deep Networks?” <i>35th Conference on Neural Information Processing Systems</i>, vol. 35, Neural Information Processing Systems Foundation, 2021.","ieee":"Q. Nguyen, P. Bréchet, and M. Mondelli, “When are solutions connected in deep networks?,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, 2021, vol. 35.","ama":"Nguyen Q, Bréchet P, Mondelli M. When are solutions connected in deep networks? In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 35. Neural Information Processing Systems Foundation; 2021.","apa":"Nguyen, Q., Bréchet, P., &#38; Mondelli, M. (2021). When are solutions connected in deep networks? In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 35). Virtual: Neural Information Processing Systems Foundation.","chicago":"Nguyen, Quynh, Pierre Bréchet, and Marco Mondelli. “When Are Solutions Connected in Deep Networks?” In <i>35th Conference on Neural Information Processing Systems</i>, Vol. 35. Neural Information Processing Systems Foundation, 2021.","ista":"Nguyen Q, Bréchet P, Mondelli M. 2021. When are solutions connected in deep networks? 35th Conference on Neural Information Processing Systems. 35th Conference on Neural Information Processing Systems vol. 35.","short":"Q. Nguyen, P. Bréchet, M. Mondelli, in:, 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021."},"author":[{"full_name":"Nguyen, Quynh","last_name":"Nguyen","first_name":"Quynh"},{"full_name":"Bréchet, Pierre","last_name":"Bréchet","first_name":"Pierre"},{"full_name":"Mondelli, Marco","last_name":"Mondelli","first_name":"Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","orcid":"0000-0002-3242-7020"}],"year":"2021","date_updated":"2025-04-15T07:50:11Z"},{"corr_author":"1","acknowledgement":"This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML), and a CNRS PEPS grant. This research was supported by the Scientific Service Units (SSU) of IST Austria through resources provided by Scientific Computing (SciComp). We would also like to thank Christoph Lampert for his feedback on an earlier version of this work, as well as for providing hardware for the Transformer-XL experiments.","publication_identifier":{"isbn":["9781713845393"],"issn":["1049-5258"]},"project":[{"call_identifier":"H2020","_id":"268A44D6-B435-11E9-9278-68D0E5697425","grant_number":"805223","name":"Elastic Coordination for Scalable Machine Learning"}],"external_id":{"arxiv":["2106.12379"]},"abstract":[{"text":"The increasing computational requirements of deep neural networks (DNNs) have led to significant interest in obtaining DNN models that are sparse, yet accurate. Recent work has investigated the even harder case of sparse training, where the DNN weights are, for as much as possible, already sparse to reduce computational costs during training. Existing sparse training methods are often empirical and can have lower accuracy relative to the dense baseline. In this paper, we present a general approach called Alternating Compressed/DeCompressed (AC/DC) training of DNNs, demonstrate convergence for a variant of the algorithm, and show that AC/DC outperforms existing sparse training methods in accuracy at similar computational budgets; at high sparsity levels, AC/DC even outperforms existing methods that rely on accurate pre-trained dense models. An important property of AC/DC is that it allows co-training of dense and sparse models, yielding accurate sparse–dense model pairs at the end of the training process. This is useful in practice, where compressed variants may be desirable for deployment in resource-constrained settings without re-doing the entire training flow, and also provides us with insights into the accuracy gap between dense and compressed models. The code is available at: https://github.com/IST-DASLab/ACDC.","lang":"eng"}],"type":"conference","arxiv":1,"month":"12","main_file_link":[{"open_access":"1","url":"https://proceedings.neurips.cc/paper/2021/file/48000647b315f6f00f913caa757a70b3-Paper.pdf"}],"article_processing_charge":"No","oa_version":"Published Version","date_created":"2022-06-20T12:11:53Z","title":"AC/DC: Alternating Compressed/DeCompressed training of deep neural networks","department":[{"_id":"GradSch"},{"_id":"DaAl"}],"publication_status":"published","intvolume":"        34","language":[{"iso":"eng"}],"date_published":"2021-12-06T00:00:00Z","_id":"11458","related_material":{"record":[{"relation":"dissertation_contains","id":"13074","status":"public"}]},"oa":1,"quality_controlled":"1","volume":34,"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","alternative_title":["Advances in Neural Information Processing Systems"],"day":"06","citation":{"ista":"Krumes A, Iofinova EB, Vladu A, Alistarh D-A. 2021. AC/DC: Alternating Compressed/DeCompressed training of deep neural networks. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, Advances in Neural Information Processing Systems, vol. 34, 8557–8570.","chicago":"Krumes, Alexandra, Eugenia B Iofinova, Adrian Vladu, and Dan-Adrian Alistarh. “AC/DC: Alternating Compressed/DeCompressed Training of Deep Neural Networks.” In <i>35th Conference on Neural Information Processing Systems</i>, 34:8557–70. Neural Information Processing Systems Foundation, 2021.","short":"A. Krumes, E.B. Iofinova, A. Vladu, D.-A. Alistarh, in:, 35th Conference on Neural Information Processing Systems, Neural Information Processing Systems Foundation, 2021, pp. 8557–8570.","mla":"Krumes, Alexandra, et al. “AC/DC: Alternating Compressed/DeCompressed Training of Deep Neural Networks.” <i>35th Conference on Neural Information Processing Systems</i>, vol. 34, Neural Information Processing Systems Foundation, 2021, pp. 8557–70.","ama":"Krumes A, Iofinova EB, Vladu A, Alistarh D-A. AC/DC: Alternating Compressed/DeCompressed training of deep neural networks. In: <i>35th Conference on Neural Information Processing Systems</i>. Vol 34. Neural Information Processing Systems Foundation; 2021:8557-8570.","ieee":"A. Krumes, E. B. Iofinova, A. Vladu, and D.-A. Alistarh, “AC/DC: Alternating Compressed/DeCompressed training of deep neural networks,” in <i>35th Conference on Neural Information Processing Systems</i>, Virtual, Online, 2021, vol. 34, pp. 8557–8570.","apa":"Krumes, A., Iofinova, E. B., Vladu, A., &#38; Alistarh, D.-A. (2021). AC/DC: Alternating Compressed/DeCompressed training of deep neural networks. In <i>35th Conference on Neural Information Processing Systems</i> (Vol. 34, pp. 8557–8570). Virtual, Online: Neural Information Processing Systems Foundation."},"page":"8557-8570","status":"public","publication":"35th Conference on Neural Information Processing Systems","ec_funded":1,"scopus_import":"1","publisher":"Neural Information Processing Systems Foundation","conference":{"start_date":"2021-12-06","end_date":"2021-12-14","name":"NeurIPS: Neural Information Processing Systems","location":"Virtual, Online"},"acknowledged_ssus":[{"_id":"ScienComp"}],"date_updated":"2026-04-07T13:30:19Z","year":"2021","author":[{"full_name":"Peste, Elena-Alexandra","last_name":"Peste","first_name":"Elena-Alexandra","id":"32D78294-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Iofinova","first_name":"Eugenia B","full_name":"Iofinova, Eugenia B","orcid":"0000-0002-7778-3221","id":"f9a17499-f6e0-11ea-865d-fdf9a3f77117"},{"last_name":"Vladu","first_name":"Adrian","full_name":"Vladu, Adrian"},{"full_name":"Alistarh, Dan-Adrian","first_name":"Dan-Adrian","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0003-3650-940X"}]}]