imatrix : offload to GPU support (llama/4957)

author Georgi Gerganov <redacted>

Wed, 17 Jan 2024 16:46:30 +0000 (18:46 +0200)

committer Georgi Gerganov <redacted>

Wed, 17 Jan 2024 19:21:10 +0000 (21:21 +0200)
author Georgi Gerganov <redacted>
Wed, 17 Jan 2024 16:46:30 +0000 (18:46 +0200)
committer Georgi Gerganov <redacted>
Wed, 17 Jan 2024 19:21:10 +0000 (21:21 +0200)
diff --git a/ggml.c b/ggml.c

index d7e01b81f01792b13e251b44e2fe67042f51734d..35fd29a9ec2dc899525d8d0f1d20d5480b17f3f0 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -394,12 +394,6 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
  static void ggml_vec_dot_f32(const int n, float * restrict s, const float * restrict x, const float * restrict y);
  static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y);
  
-ggml_collect_imatrix_t g_imatrix_collect = NULL;
-
-void ggml_set_imatrix_collection(ggml_collect_imatrix_t imatrix_collect) {
-    g_imatrix_collect = imatrix_collect;
-}
-
  static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
      [GGML_TYPE_I8] = {
          .type_name                = "i8",
@@ -9790,10 +9784,6 @@ static void ggml_compute_forward_mul_mat(
      const int ith = params->ith;
      const int nth = params->nth;
  
-    if (ith == 1 && g_imatrix_collect) {
-        g_imatrix_collect(src0, src1);
-    }
-
      const enum ggml_type type = src0->type;
  
      const bool src1_cont = ggml_is_contiguous(src1);
@@ -10097,10 +10087,6 @@ static void ggml_compute_forward_mul_mat_id(
  
          const struct ggml_tensor * src0_cur = dst->src[cur_a + 2];
  
-        if (ith == 1 && g_imatrix_collect) {
-            g_imatrix_collect(src0_cur, src1);
-        }
-
          const void * wdata    = (src1->type == vec_dot_type) ? src1->data : params->wdata;
          const size_t row_size = ggml_row_size(vec_dot_type, ne10);
  
diff --git a/ggml.h b/ggml.h

index 837c52e68c90cefc813e66f1a9817433ca3ce990..27daf6fd1e12b6ccda8513a62e1e6a420e18930f 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -2085,12 +2085,6 @@ extern "C" {
      GGML_API void ggml_init_iq2_quantization(enum ggml_type type);
      GGML_API void ggml_deinit_iq2_quantization(enum ggml_type type);
  
-    //
-    // Importance matrix
-    //
-    typedef void(*ggml_collect_imatrix_t)(const struct ggml_tensor * src0, const struct ggml_tensor * src1);
-    GGML_API void ggml_set_imatrix_collection(ggml_collect_imatrix_t imatrix_collect);
-
      //
      // gguf
      //
author	Georgi Gerganov <redacted>
	Wed, 17 Jan 2024 16:46:30 +0000 (18:46 +0200)
committer	Georgi Gerganov <redacted>
	Wed, 17 Jan 2024 19:21:10 +0000 (21:21 +0200)