ggml : fix q4xx mat mul, increase ggml_aligned_malloc alignment (llama/10167)

author Diego Devesa <redacted>

Mon, 4 Nov 2024 16:34:08 +0000 (17:34 +0100)

committer Georgi Gerganov <redacted>

Fri, 15 Nov 2024 13:21:04 +0000 (15:21 +0200)
author Diego Devesa <redacted>
Mon, 4 Nov 2024 16:34:08 +0000 (17:34 +0100)
committer Georgi Gerganov <redacted>
Fri, 15 Nov 2024 13:21:04 +0000 (15:21 +0200)
diff --git a/ggml/src/ggml-cpu.c b/ggml/src/ggml-cpu.c

index 4b8ffb629afbbaab31cd14736f27249ce2c44ab3..09ba49b1348a1b84aa465c0205dd471536931a7a 100644 (file)
--- a/ggml/src/ggml-cpu.c
+++ b/ggml/src/ggml-cpu.c
@@ -304,6 +304,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
          .nrows                    = 1,
      },
      [GGML_TYPE_Q8_0] = {
+        .from_float_to_mat        = quantize_mat_q8_0,
          .vec_dot                  = ggml_vec_dot_q8_0_q8_0,
          .vec_dot_type             = GGML_TYPE_Q8_0,
  #if defined (__ARM_FEATURE_MATMUL_INT8)
@@ -13692,9 +13693,7 @@ void ggml_cpu_init(void) {
                      uint16_t u16;
                      ggml_fp16_t fp16;
                  } u = {i};
-                // FIXME: this table is used in conversion functions outside of compute
-                // current code depends on ggml_init initializing this table
-                float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
+                float f = GGML_FP16_TO_FP32(u.fp16);
                  ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
                  ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
              }
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c

index 7dc3340a1e7499ffda4616e20425ebe82c537ce5..1ccf78d98412c34b59698491be4116a965a33fa1 100644 (file)
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -220,8 +220,10 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi
  
  
  void * ggml_aligned_malloc(size_t size) {
+    const int alignment = 64;
+
  #if defined(_MSC_VER) || defined(__MINGW32__)
-    return _aligned_malloc(size, TENSOR_ALIGNMENT);
+    return _aligned_malloc(size, alignment);
  #else
      if (size == 0) {
          GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
@@ -229,8 +231,9 @@ void * ggml_aligned_malloc(size_t size) {
      }
      void * aligned_memory = NULL;
    #ifdef GGML_USE_CPU_HBM
-    int result = hbw_posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size);
+    int result = hbw_posix_memalign(&aligned_memory, alignment, size);
    #elif TARGET_OS_OSX
+    GGML_UNUSED(alignment);
      kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
      int result = EFAULT;
      switch (alloc_status) {
@@ -248,7 +251,7 @@ void * ggml_aligned_malloc(size_t size) {
              break;
      }
    #else
-    int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size);
+    int result = posix_memalign(&aligned_memory, alignment, size);
    #endif
      if (result != 0) {
          // Handle allocation failure
author	Diego Devesa <redacted>
	Mon, 4 Nov 2024 16:34:08 +0000 (17:34 +0100)
committer	Georgi Gerganov <redacted>
	Fri, 15 Nov 2024 13:21:04 +0000 (15:21 +0200)
ggml/src/ggml-cpu.c		patch \| blob \| history
ggml/src/ggml.c		patch \| blob \| history