update examples and tests

author slaren <redacted>

Thu, 14 Mar 2024 15:45:27 +0000 (16:45 +0100)

committer Georgi Gerganov <redacted>

Fri, 15 Mar 2024 12:01:14 +0000 (14:01 +0200)
author slaren <redacted>
Thu, 14 Mar 2024 15:45:27 +0000 (16:45 +0100)
committer Georgi Gerganov <redacted>
Fri, 15 Mar 2024 12:01:14 +0000 (14:01 +0200)
diff --git a/examples/common-ggml.cpp b/examples/common-ggml.cpp

index 53811ad193c8e502c90fb142d55f96cd6ecf9476..cf2478f0a43254be9642fc5d19cf6c61e723688e 100644 (file)
--- a/examples/common-ggml.cpp
+++ b/examples/common-ggml.cpp
@@ -90,8 +90,6 @@ bool ggml_common_quantize_0(
      std::vector<ggml_fp16_t> data_f16;
      std::vector<float>       data_f32;
  
-    std::vector<int64_t> hist_all(1 << 4, 0);
-
      while (true) {
          int32_t n_dims;
          int32_t length;
@@ -176,8 +174,6 @@ bool ggml_common_quantize_0(
              work.resize(nelements); // for quantization
  
              size_t cur_size = 0;
-            std::vector<int64_t> hist_cur(1 << 4, 0);
-
              switch ((ggml_type) ttype) {
                  case GGML_TYPE_Q4_0:
                  case GGML_TYPE_Q4_1:
@@ -190,7 +186,7 @@ bool ggml_common_quantize_0(
                  case GGML_TYPE_Q5_K:
                  case GGML_TYPE_Q6_K:
                      {
-                        cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], hist_cur.data(), nullptr);
+                        cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], nullptr);
                      } break;
                  case GGML_TYPE_F32:
                  case GGML_TYPE_F16:
@@ -217,15 +213,7 @@ bool ggml_common_quantize_0(
              fout.write(reinterpret_cast<char *>(work.data()), cur_size);
              total_size_new += cur_size;
  
-            printf("size = %8.2f MB -> %8.2f MB | hist: ", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
-            for (int i = 0; i < (int) hist_cur.size(); ++i) {
-                hist_all[i] += hist_cur[i];
-            }
-
-            for (int i = 0; i < (int) hist_cur.size(); ++i) {
-                printf("%5.3f ", hist_cur[i] / (float)nelements);
-            }
-            printf("\n");
+            printf("size = %8.2f MB -> %8.2f MB\n", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
          } else {
              printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0);
              fout.write(reinterpret_cast<char *>(data_u8.data()), data_u8.size());
@@ -238,18 +226,5 @@ bool ggml_common_quantize_0(
      printf("%s: model size  = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
      printf("%s: quant size  = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype));
  
-    {
-        int64_t sum_all = 0;
-        for (int i = 0; i < (int) hist_all.size(); ++i) {
-            sum_all += hist_all[i];
-        }
-
-        printf("%s: hist: ", __func__);
-        for (int i = 0; i < (int) hist_all.size(); ++i) {
-            printf("%5.3f ", hist_all[i] / (float)sum_all);
-        }
-        printf("\n");
-    }
-
      return true;
  }
author	slaren <redacted>
	Thu, 14 Mar 2024 15:45:27 +0000 (16:45 +0100)
committer	Georgi Gerganov <redacted>
	Fri, 15 Mar 2024 12:01:14 +0000 (14:01 +0200)