kompute : llama-bench support and ggml_cpu_has_kompute() (#5226)

author Jared Van Bortel <redacted>

Wed, 31 Jan 2024 00:04:37 +0000 (19:04 -0500)

committer GitHub <redacted>

Wed, 31 Jan 2024 00:04:37 +0000 (19:04 -0500)
author Jared Van Bortel <redacted>
Wed, 31 Jan 2024 00:04:37 +0000 (19:04 -0500)
committer GitHub <redacted>
Wed, 31 Jan 2024 00:04:37 +0000 (19:04 -0500)
diff --git a/common/common.cpp b/common/common.cpp

index 2880136761afb4d3be6b1abe5d96038474a451e2..0dd1c50cfb35a59e9b3ed08d82a8ae172e5df3bb 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1521,6 +1521,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
      fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
      fprintf(stream, "cpu_has_cublas: %s\n",      ggml_cpu_has_cublas()      ? "true" : "false");
      fprintf(stream, "cpu_has_clblast: %s\n",     ggml_cpu_has_clblast()     ? "true" : "false");
+    fprintf(stream, "cpu_has_kompute: %s\n",     ggml_cpu_has_kompute()     ? "true" : "false");
      fprintf(stream, "cpu_has_fma: %s\n",         ggml_cpu_has_fma()         ? "true" : "false");
      fprintf(stream, "cpu_has_gpublas: %s\n",     ggml_cpu_has_gpublas()     ? "true" : "false");
      fprintf(stream, "cpu_has_neon: %s\n",        ggml_cpu_has_neon()        ? "true" : "false");
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp

index f239415d375f6b7109c425f2b43714410f981069..542cc7bb8ea0de5a6a438d7856669e0ac4f89b2c 100644 (file)
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -563,6 +563,7 @@ struct test {
      static const bool cuda;
      static const bool opencl;
      static const bool vulkan;
+    static const bool kompute;
      static const bool metal;
      static const bool gpu_blas;
      static const bool blas;
@@ -647,6 +648,9 @@ struct test {
          if (vulkan) {
              return "Vulkan";
          }
+        if (kompute) {
+            return "Kompute";
+        }
          if (metal) {
              return "Metal";
          }
@@ -662,7 +666,7 @@ struct test {
      static const std::vector<std::string> & get_fields() {
          static const std::vector<std::string> fields = {
              "build_commit", "build_number",
-            "cuda", "opencl", "vulkan", "metal", "gpu_blas", "blas",
+            "cuda", "opencl", "vulkan", "kompute", "metal", "gpu_blas", "blas",
              "cpu_info", "gpu_info",
              "model_filename", "model_type", "model_size", "model_n_params",
              "n_batch", "n_threads", "type_k", "type_v",
@@ -686,8 +690,9 @@ struct test {
              field == "avg_ns" || field == "stddev_ns") {
              return INT;
          }
-        if (field == "cuda" || field == "opencl"  || field == "vulkan"|| field == "metal" || field == "gpu_blas" || field == "blas" ||
-            field == "f16_kv" || field == "no_kv_offload" || field == "mul_mat_q") {
+        if (field == "cuda" || field == "opencl"  || field == "vulkan" || field == "kompute" || field == "metal" ||
+            field == "gpu_blas" || field == "blas" || field == "f16_kv" || field == "no_kv_offload" ||
+            field == "mul_mat_q") {
              return BOOL;
          }
          if (field == "avg_ts" || field == "stddev_ts") {
@@ -714,7 +719,8 @@ struct test {
          }
          std::vector<std::string> values = {
              build_commit, std::to_string(build_number),
-            std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
+            std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(vulkan),
+            std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
              cpu_info, gpu_info,
              model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
              std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
@@ -743,6 +749,7 @@ const int         test::build_number = LLAMA_BUILD_NUMBER;
  const bool        test::cuda         = !!ggml_cpu_has_cublas();
  const bool        test::opencl       = !!ggml_cpu_has_clblast();
  const bool        test::vulkan       = !!ggml_cpu_has_vulkan();
+const bool        test::kompute      = !!ggml_cpu_has_kompute();
  const bool        test::metal        = !!ggml_cpu_has_metal();
  const bool        test::gpu_blas     = !!ggml_cpu_has_gpublas();
  const bool        test::blas         = !!ggml_cpu_has_blas();
diff --git a/ggml.c b/ggml.c

index a7a9ea319c5f09dd711d783838200d66fb2f10d7..b2c8baaa844398c6e9cfb441e77fb8359980d57b 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -20473,6 +20473,14 @@ int ggml_cpu_has_vulkan(void) {
  #endif
  }
  
+int ggml_cpu_has_kompute(void) {
+#if defined(GGML_USE_KOMPUTE)
+    return 1;
+#else
+    return 0;
+#endif
+}
+
  int ggml_cpu_has_sycl(void) {
  #if defined(GGML_USE_SYCL)
      return 1;
@@ -20482,7 +20490,8 @@ int ggml_cpu_has_sycl(void) {
  }
  
  int ggml_cpu_has_gpublas(void) {
-    return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_sycl();
+    return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() ||
+           ggml_cpu_has_sycl();
  }
  
  int ggml_cpu_has_sse3(void) {
diff --git a/ggml.h b/ggml.h

index bf782e6ad12793931e36f828bf5e01345774d6f2..afc87b843f2289e1d5c07e4382e27436a83a64b4 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -2266,6 +2266,7 @@ extern "C" {
      GGML_API int ggml_cpu_has_cublas     (void);
      GGML_API int ggml_cpu_has_clblast    (void);
      GGML_API int ggml_cpu_has_vulkan     (void);
+    GGML_API int ggml_cpu_has_kompute    (void);
      GGML_API int ggml_cpu_has_gpublas    (void);
      GGML_API int ggml_cpu_has_sse3       (void);
      GGML_API int ggml_cpu_has_ssse3      (void);
diff --git a/llama.cpp b/llama.cpp

index 7b9a5c0796030e0b3040d4dbaf46ee817a0452a9..a490eeab2fd7b4258cdd65c8d7f167984db91d88 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -6878,11 +6878,6 @@ static int llama_decode_internal(
          n_threads = std::min(4, n_threads);
      }
  
-    const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 1;
-    if ((ggml_cpu_has_cublas() || ggml_cpu_has_vulkan()) && fully_offloaded) {
-        n_threads = 1;
-    }
-
  #ifdef GGML_USE_MPI
      const int64_t n_layer = hparams.n_layer;
      ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
author	Jared Van Bortel <redacted>
	Wed, 31 Jan 2024 00:04:37 +0000 (19:04 -0500)
committer	GitHub <redacted>
	Wed, 31 Jan 2024 00:04:37 +0000 (19:04 -0500)
common/common.cpp		patch \| blob \| history
examples/llama-bench/llama-bench.cpp		patch \| blob \| history
ggml.c		patch \| blob \| history
ggml.h		patch \| blob \| history
llama.cpp		patch \| blob \| history