@inproceedings{35ddf3756cef4ba695f83eb5a915258e,
title = "Pruning and Optimizing Large Language Models in an Era of GPU Scarcity",
abstract = "The increasing computational and environmental costs associated with AI models, especially large language models (LLMs), highlight the urgent need for network optimization. These models consume vast amounts of energy and resources, requiring more efficient training strategies to balance performance with ecological responsibility. Our focus is on enhancing the efficiency of deep neural networks on embedded devices through novel pruning techniques: “evolution of weights” and “smart pruning.” These methods, compared to traditional pruning approaches using benchmark datasets, involve evaluating parameter importance during training to better preserve accuracy during compression. Our approach results in faster computations and higher compression rates with minimal accuracy loss. We have successfully applied these techniques to LLMs consisting of around 10 million parameters. The LLM experiment is publicly available on Github to facilitate replication testing.",
keywords = "LLM Optimization, Network Pruning, Sparse Matrices",
author = "Ashhadul Islam and Belhaouari, \{Samir Brahim\} and Amine Bermak",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.; 26th International Conference on Artificial Intelligence and Applications, ICAI 2024, held as part of the World Congress in Computer Science, Computer Engineering and Applied Computing, CSCE 2024 ; Conference date: 22-07-2024 Through 25-07-2024",
year = "2025",
month = may,
day = "3",
doi = "10.1007/978-3-031-86623-4\_12",
language = "English",
isbn = "9783031866227",
volume = "2252",
series = "Communications In Computer And Information Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "145--153",
editor = "HR Arabnia and L Deligiannidis and S Amirian and F Shenavarmasouleh and FG Mohammadi and D DeLaFuente",
booktitle = "Artificial Intelligence And Applications, Icai 2024",
address = "Germany",
}