llamafile: use member variable instead of constant for iq4nlt (llama/11780)

author Jeffrey Morgan <redacted>

Thu, 13 Feb 2025 17:05:04 +0000 (09:05 -0800)

committer Georgi Gerganov <redacted>

Thu, 27 Feb 2025 06:55:36 +0000 (08:55 +0200)
author Jeffrey Morgan <redacted>
Thu, 13 Feb 2025 17:05:04 +0000 (09:05 -0800)
committer Georgi Gerganov <redacted>
Thu, 27 Feb 2025 06:55:36 +0000 (08:55 +0200)
diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp

index c22a662876c4a88a6e7aaf25d974276190c5f4cd..e0482c59377fd22ca628bf41d3e06856f04ee801 100644 (file)
--- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp
+++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
@@ -280,14 +280,6 @@ template <> inline __m256bh load(const float *p) {
  }
  #endif
  
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// CONSTANTS
-
-#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
-static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
-#endif
-
  ////////////////////////////////////////////////////////////////////////////////////////////////////
  // FLOATING POINT MATRIX MULTIPLICATION
  
@@ -614,6 +606,14 @@ class tinyBLAS_Q0_AVX {
                      TC *C, int64_t ldc,
                      int ith, int nth)
          : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
+        const int8_t kvalues_iq4nl[16] = {
+            -127, -104, -83, -65,
+            -49,  -35,  -22, -10,
+              1,   13,   25,  38,
+             53,   69,   89, 113
+        };
+
+        iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
      }
  
      void matmul(int64_t m, int64_t n) {
@@ -1038,6 +1038,7 @@ class tinyBLAS_Q0_AVX {
      const int64_t ldc;
      const int ith;
      const int nth;
+    __m128i iq4nlt;
  };
  #endif // __AVX__
author	Jeffrey Morgan <redacted>
	Thu, 13 Feb 2025 17:05:04 +0000 (09:05 -0800)
committer	Georgi Gerganov <redacted>
	Thu, 27 Feb 2025 06:55:36 +0000 (08:55 +0200)