From: Erik Scholz Date: Sat, 1 Mar 2025 11:57:22 +0000 (+0100) Subject: CUDA: compress mode option and default to size (llama/12029) X-Git-Tag: upstream/1.7.4+203~21 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b442dcd59874a72e2fa28fd03a016a0c05df3589;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp CUDA: compress mode option and default to size (llama/12029) cuda 12.8 added the option to specify stronger compression for binaries, so we now default to "size". --- diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 359f9851..835bf16e 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -155,6 +155,9 @@ option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON) option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT}) +set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING + "ggml: cuda link binary compression mode; requires cuda 12.8+") +set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size") option(GGML_HIP "ggml: use HIP" OFF) option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF) diff --git a/ggml/src/ggml-cuda/CMakeLists.txt b/ggml/src/ggml-cuda/CMakeLists.txt index 96bd5a0b..8623214c 100644 --- a/ggml/src/ggml-cuda/CMakeLists.txt +++ b/ggml/src/ggml-cuda/CMakeLists.txt @@ -102,6 +102,15 @@ if (CUDAToolkit_FOUND) set(CUDA_FLAGS -use_fast_math) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8") + # Options are: + # - none (not recommended) + # - speed (nvcc's default) + # - balance + # - size + list(APPEND CUDA_FLAGS -compress-mode=${GGML_CUDA_COMPRESSION_MODE}) + endif() + if (GGML_FATAL_WARNINGS) list(APPEND CUDA_FLAGS -Werror all-warnings) endif()