Fix CI: ARM NEON, quantization unit tests, editorconfig (#1122)

author Stephan Walter <redacted>

Sat, 22 Apr 2023 10:54:13 +0000 (10:54 +0000)

committer GitHub <redacted>

Sat, 22 Apr 2023 10:54:13 +0000 (10:54 +0000)
author Stephan Walter <redacted>
Sat, 22 Apr 2023 10:54:13 +0000 (10:54 +0000)
committer GitHub <redacted>
Sat, 22 Apr 2023 10:54:13 +0000 (10:54 +0000)
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index 6d79a7e6fed35fe4bbc5e9761b71761e2cf22b74..decf41a9fb7920457ba210dd7ecab6d093e5ce5a 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -94,7 +94,7 @@ int main(int argc, char ** argv) {
  
  //    params.prompt = R"(// this function checks if the number n is prime
  //bool is_prime(int n) {)";
-    
+
      llama_context * ctx;
      g_ctx = &ctx;
  
diff --git a/ggml.c b/ggml.c

index 46c0292fe1ce397ae7f695af85331e4795d27612..d9a95af8d4b5c279ff3d8a195141b80c800037c4 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -2635,15 +2635,15 @@ static void ggml_vec_dot_q4_1_q8_0(const int n, float * restrict s, const void *
          sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), x0->d*y0->d);
          sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), x1->d*y1->d);
  #else
-        const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0l), vget_low_s8 (v1_0ls));
-        const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0l), vget_high_s8(v1_0ls));
-        const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0h), vget_low_s8 (v1_0hs));
-        const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0h), vget_high_s8(v1_0hs));
+        const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0lz), vget_low_s8 (v1_0l));
+        const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0lz), vget_high_s8(v1_0l));
+        const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hz), vget_low_s8 (v1_0h));
+        const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0hz), vget_high_s8(v1_0h));
  
-        const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1l), vget_low_s8 (v1_1ls));
-        const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1l), vget_high_s8(v1_1ls));
-        const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1h), vget_low_s8 (v1_1hs));
-        const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1h), vget_high_s8(v1_1hs));
+        const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1lz), vget_low_s8 (v1_1l));
+        const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1lz), vget_high_s8(v1_1l));
+        const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1hz), vget_low_s8 (v1_1h));
+        const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1hz), vget_high_s8(v1_1h));
  
          const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h));
          const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h));
diff --git a/llama.cpp b/llama.cpp

index 4e92f551585a6116dd45f1ebd3f1ab00134bbc7c..34327ecfab4fe22a3ceabf28754465c5b2dccb21 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -2256,7 +2256,6 @@ std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_te
  
  // Returns the size of the state
  size_t llama_get_state_size(struct llama_context * ctx) {
-    const size_t s_bool = sizeof(int32_t);
      // we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.
      // for reference, std::mt19937(1337) serializes to 6701 bytes.
      const size_t s_rng_size = sizeof(size_t);
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp

index 5a54101523637b34091b4e2451f4022195a42059..7e091e8c49085b02f04c45003a9427f43bf8b062 100644 (file)
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -120,7 +120,7 @@ int main(int argc, char * argv[]) {
          ggml_type type = (ggml_type) i;
          quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
  
-        if (qfns.quantize_row_q) {
+        if (qfns.quantize_row_q && qfns.dequantize_row_q) {
              const float total_error = total_quantization_error(qfns, test_size, test_data.data());
              failed = !(total_error < MAX_QUANTIZATION_TOTAL_ERROR);
              num_failed += failed;
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp

index 883df05fe8bb2ef86aad52b8244809c5552f4f2a..d5514455db11d00b49530977f4d4112799f3dff7 100644 (file)
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -225,7 +225,7 @@ int main(int argc, char * argv[]) {
              continue;
          }
  
-        if (qfns.quantize_row_q) {
+        if (qfns.quantize_row_q && qfns.dequantize_row_q) {
              printf("%s\n", ggml_type_name(type));
  
              if (params.op_quantize_row_q_reference) {
author	Stephan Walter <redacted>
	Sat, 22 Apr 2023 10:54:13 +0000 (10:54 +0000)
committer	GitHub <redacted>
	Sat, 22 Apr 2023 10:54:13 +0000 (10:54 +0000)
examples/main/main.cpp		patch \| blob \| history
ggml.c		patch \| blob \| history
llama.cpp		patch \| blob \| history
tests/test-quantize-fns.cpp		patch \| blob \| history
tests/test-quantize-perf.cpp		patch \| blob \| history