From: Stephan Walter Date: Thu, 13 Apr 2023 14:59:50 +0000 (+0000) Subject: ggml : optimize non-SIMD Q4_0 vector dot product (#703) X-Git-Tag: gguf-v0.4.0~972 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=6232f2d7fd7a22d5eeb62182b2f21fcf01359754;p=pkg%2Fggml%2Fsources%2Fllama.cpp ggml : optimize non-SIMD Q4_0 vector dot product (#703) --- diff --git a/ggml.c b/ggml.c index 281fd8ec..eb47d829 100644 --- a/ggml.c +++ b/ggml.c @@ -2160,18 +2160,20 @@ static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void * rest const uint8_t * restrict p0 = x[i].qs; const uint8_t * restrict p1 = y[i].qs; + int sumi = 0; for (int j = 0; j < QK/2; j++) { const uint8_t v0 = p0[j]; const uint8_t v1 = p1[j]; - const float f0 = d0*((int8_t) (v0 & 0xf) - 8); - const float f1 = d0*((int8_t) (v0 >> 4) - 8); + const int8_t i0 = (int8_t) (v0 & 0xf) - 8; + const int8_t i1 = (int8_t) (v0 >> 4) - 8; - const float f2 = d1*((int8_t) (v1 & 0xf) - 8); - const float f3 = d1*((int8_t) (v1 >> 4) - 8); + const int8_t i2 = (int8_t) (v1 & 0xf) - 8; + const int8_t i3 = (int8_t) (v1 >> 4) - 8; - sumf += f0*f2 + f1*f3; + sumi += i0*i2 + i1*i3; } + sumf += d0 * d1 * sumi; } #endif