params.split_mode = LLAMA_SPLIT_MODE_NONE;
} else if (arg_next == "layer") {
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
- }
- else if (arg_next == "row") {
+ } else if (arg_next == "row") {
#ifdef GGML_USE_SYCL
fprintf(stderr, "warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\nExit!\n");
exit(1);
#endif // GGML_USE_SYCL
params.split_mode = LLAMA_SPLIT_MODE_ROW;
- }
- else {
+ } else {
throw std::invalid_argument("invalid value");
}
-#ifndef GGML_USE_CUDA_SYCL_VULKAN
- fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the split mode has no effect.\n");
-#endif // GGML_USE_CUDA_SYCL_VULKAN
+ if (!llama_supports_gpu_offload()) {
+ fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the split mode has no effect.\n");
+ }
}
));
add_opt(llama_arg(
}
for (size_t i = 0; i < llama_max_devices(); ++i) {
if (i < split_arg.size()) {
- params.tensor_split[i] = std::stof(split_arg[i]);
+ params.tensor_split[i] = std::stof(split_arg[i]);
} else {
- params.tensor_split[i] = 0.0f;
+ params.tensor_split[i] = 0.0f;
}
}
-#ifndef GGML_USE_CUDA_SYCL_VULKAN
- fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting a tensor split has no effect.\n");
-#endif // GGML_USE_CUDA_SYCL_VULKAN
+ if (!llama_supports_gpu_offload()) {
+ fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting a tensor split has no effect.\n");
+ }
}
));
add_opt(llama_arg(
format("the GPU to use for the model (with split-mode = none), or for intermediate results and KV (with split-mode = row) (default: %d)", params.main_gpu),
[](gpt_params & params, int value) {
params.main_gpu = value;
-#ifndef GGML_USE_CUDA_SYCL_VULKAN
- fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the main GPU has no effect.\n");
-#endif // GGML_USE_CUDA_SYCL_VULKAN
+ if (!llama_supports_gpu_offload()) {
+ fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the main GPU has no effect.\n");
+ }
}
));
add_opt(llama_arg(
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
-#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL))
-#define GGML_USE_CUDA_SYCL
-#endif
-
-#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)) || defined(GGML_USE_VULKAN)
-#define GGML_USE_CUDA_SYCL_VULKAN
-#endif
-
#if defined(LLAMA_USE_CURL)
#ifdef __linux__
#include <linux/limits.h>