[{"publisher":"IEEE","corr_author":"1","user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","author":[{"full_name":"Chatterjee, Bapi","last_name":"Chatterjee","id":"3C41A08A-F248-11E8-B48F-1D18A9856A87","first_name":"Bapi","orcid":"0000-0002-2742-4028"},{"full_name":"Kungurtsev, Vyacheslav","last_name":"Kungurtsev","first_name":"Vyacheslav"},{"last_name":"Alistarh","full_name":"Alistarh, Dan-Adrian","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X"}],"day":"26","department":[{"_id":"DaAl"}],"abstract":[{"text":"Parallel SGD in a shared-memory setting is oft-represented by the popular Hogwild! algorithm, in which lock-free updates are asynchronously performed by multiple computing processes. Unfortunately, scaling Hogwild! to distributed workers is largely unexplored. Specifically, it is unknown if any adaptation of Hogwild! to the popular decentralized multi-GPU setting offers any competitive speedup, either empirically or theoretically. In this work, we investigate the potential of decentralizing Hogwild! by incorporating simultaneously (a) asynchronous local gradient updates on the shared memory of GPUs, and (b) non-blocking asynchronous decentralized federated averaging. A naive direct implementation shows degradation in performance, arising from scheduling overheads and concurrent write conflicts on GPUs. To mitigate these drawbacks, we investigate and propose a new method, based on careful block selection rules, which update only portions of the parameter vectors. Our experiments show that the resulting decentralized training method exhibits improved throughput and competitive accuracy for standard image classification benchmarks on the CIFAR-10, CIFAR-100, and Imagenet datasets. On the theoretical side, we prove that our method guarantees sublinear ergodic convergence rates for non-convex objectives.","lang":"eng"}],"external_id":{"isi":["001304430200075"]},"scopus_import":"1","publication_identifier":{"issn":["1063-6927"],"eissn":["2575-8411"],"isbn":["9798350386059"]},"isi":1,"citation":{"ama":"Chatterjee B, Kungurtsev V, Alistarh D-A. Federated SGD with local asynchrony. In: <i>Proceedings of the 44th International Conference on Distributed Computing Systems</i>. IEEE; 2024:857-868. doi:<a href=\"https://doi.org/10.1109/ICDCS60910.2024.00084\">10.1109/ICDCS60910.2024.00084</a>","mla":"Chatterjee, Bapi, et al. “Federated SGD with Local Asynchrony.” <i>Proceedings of the 44th International Conference on Distributed Computing Systems</i>, IEEE, 2024, pp. 857–68, doi:<a href=\"https://doi.org/10.1109/ICDCS60910.2024.00084\">10.1109/ICDCS60910.2024.00084</a>.","chicago":"Chatterjee, Bapi, Vyacheslav Kungurtsev, and Dan-Adrian Alistarh. “Federated SGD with Local Asynchrony.” In <i>Proceedings of the 44th International Conference on Distributed Computing Systems</i>, 857–68. IEEE, 2024. <a href=\"https://doi.org/10.1109/ICDCS60910.2024.00084\">https://doi.org/10.1109/ICDCS60910.2024.00084</a>.","ista":"Chatterjee B, Kungurtsev V, Alistarh D-A. 2024. Federated SGD with local asynchrony. Proceedings of the 44th International Conference on Distributed Computing Systems. ICDCS: International Conference on Distributed Computing Systems, 857–868.","short":"B. Chatterjee, V. Kungurtsev, D.-A. Alistarh, in:, Proceedings of the 44th International Conference on Distributed Computing Systems, IEEE, 2024, pp. 857–868.","apa":"Chatterjee, B., Kungurtsev, V., &#38; Alistarh, D.-A. (2024). Federated SGD with local asynchrony. In <i>Proceedings of the 44th International Conference on Distributed Computing Systems</i> (pp. 857–868). Jersey City, NJ, United States: IEEE. <a href=\"https://doi.org/10.1109/ICDCS60910.2024.00084\">https://doi.org/10.1109/ICDCS60910.2024.00084</a>","ieee":"B. Chatterjee, V. Kungurtsev, and D.-A. Alistarh, “Federated SGD with local asynchrony,” in <i>Proceedings of the 44th International Conference on Distributed Computing Systems</i>, Jersey City, NJ, United States, 2024, pp. 857–868."},"publication":"Proceedings of the 44th International Conference on Distributed Computing Systems","date_created":"2024-09-15T22:01:41Z","page":"857-868","publication_status":"published","date_updated":"2025-09-08T09:23:48Z","year":"2024","_id":"18070","type":"conference","status":"public","month":"07","conference":{"end_date":"2024-07-26","start_date":"2024-07-23","name":"ICDCS: International Conference on Distributed Computing Systems","location":"Jersey City, NJ, United States"},"oa_version":"None","title":"Federated SGD with local asynchrony","article_processing_charge":"No","quality_controlled":"1","date_published":"2024-07-26T00:00:00Z","doi":"10.1109/ICDCS60910.2024.00084","language":[{"iso":"eng"}]},{"scopus_import":"1","page":"1377-1387","publication_status":"published","isi":1,"citation":{"chicago":"Tsimos, Giorgos, Anastasios Kichidis, Alberto Sonnino, and Eleftherios Kokoris Kogias. “HammerHead: Leader Reputation for Dynamic Scheduling.” In <i>Proceedings - International Conference on Distributed Computing Systems</i>, 1377–87. IEEE, 2024. <a href=\"https://doi.org/10.1109/ICDCS60910.2024.00129\">https://doi.org/10.1109/ICDCS60910.2024.00129</a>.","mla":"Tsimos, Giorgos, et al. “HammerHead: Leader Reputation for Dynamic Scheduling.” <i>Proceedings - International Conference on Distributed Computing Systems</i>, IEEE, 2024, pp. 1377–87, doi:<a href=\"https://doi.org/10.1109/ICDCS60910.2024.00129\">10.1109/ICDCS60910.2024.00129</a>.","ama":"Tsimos G, Kichidis A, Sonnino A, Kokoris Kogias E. HammerHead: Leader reputation for dynamic scheduling. In: <i>Proceedings - International Conference on Distributed Computing Systems</i>. IEEE; 2024:1377-1387. doi:<a href=\"https://doi.org/10.1109/ICDCS60910.2024.00129\">10.1109/ICDCS60910.2024.00129</a>","ista":"Tsimos G, Kichidis A, Sonnino A, Kokoris Kogias E. 2024. HammerHead: Leader reputation for dynamic scheduling. Proceedings - International Conference on Distributed Computing Systems. ICDCS: International Conference on Distributed Computing Systems, 1377–1387.","short":"G. Tsimos, A. Kichidis, A. Sonnino, E. Kokoris Kogias, in:, Proceedings - International Conference on Distributed Computing Systems, IEEE, 2024, pp. 1377–1387.","apa":"Tsimos, G., Kichidis, A., Sonnino, A., &#38; Kokoris Kogias, E. (2024). HammerHead: Leader reputation for dynamic scheduling. In <i>Proceedings - International Conference on Distributed Computing Systems</i> (pp. 1377–1387). Jersey City, NJ, United States: IEEE. <a href=\"https://doi.org/10.1109/ICDCS60910.2024.00129\">https://doi.org/10.1109/ICDCS60910.2024.00129</a>","ieee":"G. Tsimos, A. Kichidis, A. Sonnino, and E. Kokoris Kogias, “HammerHead: Leader reputation for dynamic scheduling,” in <i>Proceedings - International Conference on Distributed Computing Systems</i>, Jersey City, NJ, United States, 2024, pp. 1377–1387."},"publisher":"IEEE","day":"26","author":[{"first_name":"Giorgos","last_name":"Tsimos","full_name":"Tsimos, Giorgos"},{"last_name":"Kichidis","full_name":"Kichidis, Anastasios","first_name":"Anastasios"},{"first_name":"Alberto","last_name":"Sonnino","full_name":"Sonnino, Alberto"},{"id":"f5983044-d7ef-11ea-ac6d-fd1430a26d30","first_name":"Eleftherios","full_name":"Kokoris Kogias, Eleftherios","last_name":"Kokoris Kogias"}],"doi":"10.1109/ICDCS60910.2024.00129","quality_controlled":"1","status":"public","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2309.12713"}],"oa_version":"Preprint","conference":{"location":"Jersey City, NJ, United States","start_date":"2024-07-23","name":"ICDCS: International Conference on Distributed Computing Systems","end_date":"2024-07-26"},"month":"07","external_id":{"isi":["001304430200120"],"arxiv":["2309.12713"]},"abstract":[{"lang":"eng","text":"Recent advancements on DAG-based consensus protocols allow for blockchains with improved metrics and properties, such as throughput and censorship-resistance. Variants of the Bullshark [18] consensus protocol are adopted for practical use by the Sui blockchain, for improved latency. However, the protocol is leader-based, and is strongly affected by crashed leaders that can lead to various performance issues, for example, decreased transaction throughput. In this paper, we propose HammerHead, a DAG-based consensus protocol, that is inspired by Carousel [8] and provides Leader-Utilization. Our proposal differs from Carousel, which is built for a chained consensus protocol; in HammerHead chain quality is inherited by the DAG. HammerHead needs to preserve safety and liveness, despite validators committing leader vertices asynchronously. The key idea is to update leader schedules dynamically, based on the validators' scores during the previous schedule. We implement HammerHead and show a minor improvement in performance for cases without faults. The major improvements in comparison to Bullshark appear in faulty settings. Specifically, we show a drastic, 2x-latency improvement and up to 40% increased throughput when crash faults occur (100 validators, 33 faults)."}],"date_created":"2024-09-15T22:01:41Z","publication":"Proceedings - International Conference on Distributed Computing Systems","year":"2024","date_updated":"2025-09-08T09:42:36Z","arxiv":1,"publication_identifier":{"isbn":["9798350386059"],"issn":["1063-6927"],"eissn":["2575-8411"]},"acknowledgement":"This work is supported by Mysten Labs. We thank the Mysten Labs Engineering teams for valuable feedback broadly, and specifically to Laura Makdah for helping implementing the early reputation score system for validators and Dmitry Perelman for managing the overall implementation effort.","user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","oa":1,"department":[{"_id":"ElKo"}],"date_published":"2024-07-26T00:00:00Z","title":"HammerHead: Leader reputation for dynamic scheduling","article_processing_charge":"No","language":[{"iso":"eng"}],"_id":"18071","type":"conference"},{"quality_controlled":"1","doi":"10.1109/ICDCS57875.2023.00037","status":"public","main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2306.16006","open_access":"1"}],"month":"10","oa_version":"Preprint","conference":{"location":"Hong Kong, China","end_date":"2023-07-21","start_date":"2023-07-18","name":"ICDCS: International Conference on Distributed Computing Systems"},"scopus_import":"1","citation":{"mla":"Avarikioti, Zeta, et al. “Lightning Creation Games.” <i>43rd International Conference on Distributed Computing Systems</i>, vol. 2023, IEEE, 2023, pp. 603–13, doi:<a href=\"https://doi.org/10.1109/ICDCS57875.2023.00037\">10.1109/ICDCS57875.2023.00037</a>.","ista":"Avarikioti Z, Lizurej T, Michalak T, Yeo MX. 2023. Lightning creation games. 43rd International Conference on Distributed Computing Systems. ICDCS: International Conference on Distributed Computing Systems vol. 2023, 603–613.","ama":"Avarikioti Z, Lizurej T, Michalak T, Yeo MX. Lightning creation games. In: <i>43rd International Conference on Distributed Computing Systems</i>. Vol 2023. IEEE; 2023:603-613. doi:<a href=\"https://doi.org/10.1109/ICDCS57875.2023.00037\">10.1109/ICDCS57875.2023.00037</a>","chicago":"Avarikioti, Zeta, Tomasz Lizurej, Tomasz Michalak, and Michelle X Yeo. “Lightning Creation Games.” In <i>43rd International Conference on Distributed Computing Systems</i>, 2023:603–13. IEEE, 2023. <a href=\"https://doi.org/10.1109/ICDCS57875.2023.00037\">https://doi.org/10.1109/ICDCS57875.2023.00037</a>.","short":"Z. Avarikioti, T. Lizurej, T. Michalak, M.X. Yeo, in:, 43rd International Conference on Distributed Computing Systems, IEEE, 2023, pp. 603–613.","apa":"Avarikioti, Z., Lizurej, T., Michalak, T., &#38; Yeo, M. X. (2023). Lightning creation games. In <i>43rd International Conference on Distributed Computing Systems</i> (Vol. 2023, pp. 603–613). Hong Kong, China: IEEE. <a href=\"https://doi.org/10.1109/ICDCS57875.2023.00037\">https://doi.org/10.1109/ICDCS57875.2023.00037</a>","ieee":"Z. Avarikioti, T. Lizurej, T. Michalak, and M. X. Yeo, “Lightning creation games,” in <i>43rd International Conference on Distributed Computing Systems</i>, Hong Kong, China, 2023, vol. 2023, pp. 603–613."},"isi":1,"page":"603-613","publication_status":"published","publisher":"IEEE","volume":2023,"author":[{"first_name":"Zeta","last_name":"Avarikioti","full_name":"Avarikioti, Zeta"},{"first_name":"Tomasz","last_name":"Lizurej","full_name":"Lizurej, Tomasz"},{"last_name":"Michalak","full_name":"Michalak, Tomasz","first_name":"Tomasz"},{"last_name":"Yeo","full_name":"Yeo, Michelle X","first_name":"Michelle X","id":"2D82B818-F248-11E8-B48F-1D18A9856A87","orcid":"0009-0001-3676-4809"}],"day":"11","related_material":{"record":[{"id":"14506","status":"public","relation":"dissertation_contains"}]},"title":"Lightning creation games","article_processing_charge":"No","date_published":"2023-10-11T00:00:00Z","language":[{"iso":"eng"}],"type":"conference","_id":"14490","intvolume":"      2023","abstract":[{"text":"Payment channel networks (PCNs) are a promising solution to the scalability problem of cryptocurrencies. Any two users connected by a payment channel in the network can theoretically send an unbounded number of instant, costless transactions between them. Users who are not directly connected can also transact with each other in a multi-hop fashion. In this work, we study the incentive structure behind the creation of payment channel networks, particularly from the point of view of a single user that wants to join the network. We define a utility function for a new user in terms of expected revenue, expected fees, and the cost of creating channels, and then provide constant factor approximation algorithms that optimise the utility function given a certain budget. Additionally, we take a step back from a single user to the whole network and examine the parameter spaces under which simple graph topologies form a Nash equilibrium.","lang":"eng"}],"external_id":{"arxiv":["2306.16006"],"isi":["001081242600053"]},"publication_identifier":{"eissn":["2575-8411"],"isbn":["9798350339864"]},"arxiv":1,"acknowledgement":"The work was partially supported by the Austrian Science Fund (FWF) through the project CoRaF (grant 2020388). It was also partially supported by NCN Grant 2019/35/B/ST6/04138 and ERC Grant 885666.","publication":"43rd International Conference on Distributed Computing Systems","date_created":"2023-11-05T23:00:54Z","year":"2023","date_updated":"2026-04-07T13:29:44Z","user_id":"317138e5-6ab7-11ef-aa6d-ffef3953e345","oa":1,"department":[{"_id":"KrPi"}]}]
