{"project":[{"grant_number":"101158077","_id":"8e35c14b-16d5-11f0-9cad-a3fc35339161","name":"FastML: Efficient and Cost-Effective Distributed Machine Learning"}],"status":"public","department":[{"_id":"DaAl"}],"author":[{"last_name":"Nguyen","first_name":"Anh Duc","full_name":"Nguyen, Anh Duc"},{"id":"D0CF4148-C985-11E9-8066-0BDEE5697425","last_name":"Markov","first_name":"Ilia","full_name":"Markov, Ilia"},{"last_name":"Wu","full_name":"Wu, Frank Zhengqing","first_name":"Frank Zhengqing"},{"first_name":"Ali","full_name":"Ramezani-Kebrya, Ali","last_name":"Ramezani-Kebrya"},{"last_name":"Antonakopoulos","full_name":"Antonakopoulos, Kimon","first_name":"Kimon"},{"id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","full_name":"Alistarh, Dan-Adrian","orcid":"0000-0003-3650-940X","last_name":"Alistarh"},{"full_name":"Cevher, Volkan","first_name":"Volkan","last_name":"Cevher"}],"date_updated":"2025-12-16T12:46:54Z","OA_place":"publisher","_id":"20821","day":"01","ddc":["000"],"publication":"42nd International Conference on Machine Learning","publisher":"ML Research Press","scopus_import":"1","file_date_updated":"2025-12-16T12:45:41Z","month":"05","year":"2025","title":"Layer-wise quantization for quantized optimistic dual averaging","OA_type":"gold","oa":1,"arxiv":1,"acknowledgement":"This work was supported by Hasler Foundation Program: Hasler Responsible AI (project number 21043). The research was also sponsored by the Army Research Office and was accomplished under Grant Number W911NF-24-1-0048. This work was further funded by the Swiss National Science Foundation (SNSF) under grant number 200021_205011. We also acknowledge project A11 of the Swiss National Supercomputing Centre (CSCS) for providing computing resources. Dan Alistarh and Ilia Markov were supported in part through the ERC Proofof-Concept grant FastML (Grant Agreement 101158077). Ali Ramezani-Kebrya was supported by the Research Council of Norway through FRIPRO Grant under project number 356103, its Centres of Excellence scheme, Integreat - Norwegian Centre for knowledge-driven machine learning under\r\nproject number 332645 - and its Centre for Research-based Innovation funding scheme (Visual Intelligence under grant no. 309439).","volume":267,"language":[{"iso":"eng"}],"date_published":"2025-05-01T00:00:00Z","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","short":"CC BY (4.0)"},"external_id":{"arxiv":["2505.14371"]},"publication_identifier":{"eissn":["2640-3498"]},"conference":{"location":"Vancouver, Canada","start_date":"2025-07-13","end_date":"2025-07-19","name":"ICML: International Conference on Machine Learning"},"date_created":"2025-12-14T23:02:06Z","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","abstract":[{"text":"Modern deep neural networks exhibit heterogeneity across numerous layers of various types such as residuals, multi-head attention, etc., due to varying structures (dimensions, activation functions, etc.), distinct representation characteristics, which impact predictions. We develop a general layer-wise quantization framework with tight variance and code-length bounds, adapting to the heterogeneities over the course of training. We then apply a new layer-wise quantization technique within distributed variational inequalities (VIs), proposing a novel Quantized Optimistic Dual Averaging (QODA) algorithm with adaptive learning rates, which achieves competitive convergence rates for monotone VIs. We empirically show that QODA achieves up to a 150% speedup over the baselines in end-to-end training time for training Wasserstein GAN on 12+GPUs.","lang":"eng"}],"has_accepted_license":"1","alternative_title":["PMLR"],"oa_version":"Published Version","quality_controlled":"1","publication_status":"published","intvolume":" 267","citation":{"chicago":"Nguyen, Anh Duc, Ilia Markov, Frank Zhengqing Wu, Ali Ramezani-Kebrya, Kimon Antonakopoulos, Dan-Adrian Alistarh, and Volkan Cevher. “Layer-Wise Quantization for Quantized Optimistic Dual Averaging.” In 42nd International Conference on Machine Learning, 267:46026–72. ML Research Press, 2025.","ieee":"A. D. Nguyen et al., “Layer-wise quantization for quantized optimistic dual averaging,” in 42nd International Conference on Machine Learning, Vancouver, Canada, 2025, vol. 267, pp. 46026–46072.","ama":"Nguyen AD, Markov I, Wu FZ, et al. Layer-wise quantization for quantized optimistic dual averaging. In: 42nd International Conference on Machine Learning. Vol 267. ML Research Press; 2025:46026-46072.","mla":"Nguyen, Anh Duc, et al. “Layer-Wise Quantization for Quantized Optimistic Dual Averaging.” 42nd International Conference on Machine Learning, vol. 267, ML Research Press, 2025, pp. 46026–72.","apa":"Nguyen, A. D., Markov, I., Wu, F. Z., Ramezani-Kebrya, A., Antonakopoulos, K., Alistarh, D.-A., & Cevher, V. (2025). Layer-wise quantization for quantized optimistic dual averaging. In 42nd International Conference on Machine Learning (Vol. 267, pp. 46026–46072). Vancouver, Canada: ML Research Press.","ista":"Nguyen AD, Markov I, Wu FZ, Ramezani-Kebrya A, Antonakopoulos K, Alistarh D-A, Cevher V. 2025. Layer-wise quantization for quantized optimistic dual averaging. 42nd International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 267, 46026–46072.","short":"A.D. Nguyen, I. Markov, F.Z. Wu, A. Ramezani-Kebrya, K. Antonakopoulos, D.-A. Alistarh, V. Cevher, in:, 42nd International Conference on Machine Learning, ML Research Press, 2025, pp. 46026–46072."},"file":[{"relation":"main_file","access_level":"open_access","content_type":"application/pdf","date_created":"2025-12-16T12:45:41Z","date_updated":"2025-12-16T12:45:41Z","creator":"dernst","file_size":756213,"file_id":"20830","success":1,"file_name":"2025_ICML_Nguyen.pdf","checksum":"a7edf0e4304171a3e035842b3aab1704"}],"article_processing_charge":"No","page":"46026-46072","type":"conference"}