[{"citation":{"ieee":"I. Markov, H. Ramezanikebrya, and D.-A. Alistarh, “CGX: Adaptive system support for communication-efficient deep learning,” in <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>, Quebec, QC, Canada, 2022, pp. 241–254.","short":"I. Markov, H. Ramezanikebrya, D.-A. Alistarh, in:, Proceedings of the 23rd ACM/IFIP International Middleware Conference, Association for Computing Machinery, 2022, pp. 241–254.","ama":"Markov I, Ramezanikebrya H, Alistarh D-A. CGX: Adaptive system support for communication-efficient deep learning. In: <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>. Association for Computing Machinery; 2022:241-254. doi:<a href=\"https://doi.org/10.1145/3528535.3565248\">10.1145/3528535.3565248</a>","apa":"Markov, I., Ramezanikebrya, H., &#38; Alistarh, D.-A. (2022). CGX: Adaptive system support for communication-efficient deep learning. In <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i> (pp. 241–254). Quebec, QC, Canada: Association for Computing Machinery. <a href=\"https://doi.org/10.1145/3528535.3565248\">https://doi.org/10.1145/3528535.3565248</a>","mla":"Markov, Ilia, et al. “CGX: Adaptive System Support for Communication-Efficient Deep Learning.” <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>, Association for Computing Machinery, 2022, pp. 241–54, doi:<a href=\"https://doi.org/10.1145/3528535.3565248\">10.1145/3528535.3565248</a>.","chicago":"Markov, Ilia, Hamidreza Ramezanikebrya, and Dan-Adrian Alistarh. “CGX: Adaptive System Support for Communication-Efficient Deep Learning.” In <i>Proceedings of the 23rd ACM/IFIP International Middleware Conference</i>, 241–54. Association for Computing Machinery, 2022. <a href=\"https://doi.org/10.1145/3528535.3565248\">https://doi.org/10.1145/3528535.3565248</a>.","ista":"Markov I, Ramezanikebrya H, Alistarh D-A. 2022. CGX: Adaptive system support for communication-efficient deep learning. Proceedings of the 23rd ACM/IFIP International Middleware Conference. Middleware: International Middleware Conference, 241–254."},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa":1,"language":[{"iso":"eng"}],"department":[{"_id":"DaAl"}],"file":[{"success":1,"file_name":"2022_ACMMiddleware_Markov.pdf","access_level":"open_access","content_type":"application/pdf","relation":"main_file","checksum":"1a397746235f245da5468819247ff663","date_created":"2023-04-03T06:17:58Z","file_size":1514169,"creator":"dernst","date_updated":"2023-04-03T06:17:58Z","file_id":"12795"}],"month":"11","arxiv":1,"publication_identifier":{"isbn":["9781450393409"]},"publication_status":"published","file_date_updated":"2023-04-03T06:17:58Z","has_accepted_license":"1","abstract":[{"lang":"eng","text":"The ability to scale out training workloads has been one of the key performance enablers of deep learning. The main scaling approach is data-parallel GPU-based training, which has been boosted by hardware and software support for highly efficient point-to-point communication, and in particular via hardware bandwidth over-provisioning. Overprovisioning comes at a cost: there is an order of magnitude price difference between \"cloud-grade\" servers with such support, relative to their popular \"consumer-grade\" counterparts, although single server-grade and consumer-grade GPUs can have similar computational envelopes.\r\n\r\nIn this paper, we show that the costly hardware overprovisioning approach can be supplanted via algorithmic and system design, and propose a framework called CGX, which provides efficient software support for compressed communication in ML applications, for both multi-GPU single-node training, as well as larger-scale multi-node training. CGX is based on two technical advances: At the system level, it relies on a re-developed communication stack for ML frameworks, which provides flexible, highly-efficient support for compressed communication. At the application level, it provides seamless, parameter-free integration with popular frameworks, so that end-users do not have to modify training recipes, nor significant training code. This is complemented by a layer-wise adaptive compression technique which dynamically balances compression gains with accuracy preservation. CGX integrates with popular ML frameworks, providing up to 3X speedups for multi-GPU nodes based on commodity hardware, and order-of-magnitude improvements in the multi-node setting, with negligible impact on accuracy."}],"license":"https://creativecommons.org/licenses/by/4.0/","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","image":"/images/cc_by.png","short":"CC BY (4.0)"},"date_created":"2023-03-31T06:17:00Z","author":[{"first_name":"Ilia","id":"D0CF4148-C985-11E9-8066-0BDEE5697425","full_name":"Markov, Ilia","last_name":"Markov"},{"last_name":"Ramezanikebrya","full_name":"Ramezanikebrya, Hamidreza","first_name":"Hamidreza"},{"first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","last_name":"Alistarh","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87","full_name":"Alistarh, Dan-Adrian"}],"day":"01","oa_version":"Published Version","title":"CGX: Adaptive system support for communication-efficient deep learning","date_published":"2022-11-01T00:00:00Z","conference":{"location":"Quebec, QC, Canada","name":"Middleware: International Middleware Conference","start_date":"2022-11-07","end_date":"2022-11-11"},"acknowledgement":"The authors sincerely thank Nikoli Dryden, Tal Ben-Nun, Torsten Hoefler and Bapi Chatterjee for useful discussions throughout the development of this project.","status":"public","publication":"Proceedings of the 23rd ACM/IFIP International Middleware Conference","year":"2022","external_id":{"arxiv":["2111.08617"]},"quality_controlled":"1","page":"241-254","ddc":["000"],"date_updated":"2023-04-03T06:21:04Z","_id":"12780","type":"conference","doi":"10.1145/3528535.3565248","article_processing_charge":"Yes (via OA deal)","publisher":"Association for Computing Machinery"}]