ci : fix wikitext url + compile warnings (#5569)

author Georgi Gerganov <redacted>

Sun, 18 Feb 2024 20:39:30 +0000 (22:39 +0200)

committer GitHub <redacted>

Sun, 18 Feb 2024 20:39:30 +0000 (22:39 +0200)
author Georgi Gerganov <redacted>
Sun, 18 Feb 2024 20:39:30 +0000 (22:39 +0200)
committer GitHub <redacted>
Sun, 18 Feb 2024 20:39:30 +0000 (22:39 +0200)
diff --git a/README.md b/README.md

index 0c4ee5a27470a940aa54ea252f565dbe7b4e92c7..8c7bc2689c64764fd3fd3c27fbb758c824912a4b 100644 (file)
--- a/README.md
+++ b/README.md
@@ -768,7 +768,7 @@ The time per token is measured on a MacBook M1 Pro 32GB RAM using 4 and 8 thread
  
  #### How to run
  
-1. Download/extract: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
+1. Download/extract: https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
  2. Run `./perplexity -m models/7B/ggml-model-q4_0.gguf -f wiki.test.raw`
  3. Output:
  ```
diff --git a/ci/run.sh b/ci/run.sh

index b94658c96c5a4942eaee28be6ddfdf90b5af6781..f3a29c2e9be0b3aab5902634e6e75d08bda15bd6 100755 (executable)
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -219,7 +219,7 @@ function gg_run_open_llama_3b_v2 {
      gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin
      gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json
  
-    gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
+    gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
      unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
      head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw
  
@@ -401,7 +401,7 @@ function gg_run_open_llama_7b_v2 {
      gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin
      gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json
  
-    gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
+    gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
      unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
  
      path_models="../models-mnt/open-llama/7B-v2"
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp

index 74dcc642a876e304f98d0881c2e85a3a42884f51..9ec989389cfad3f1e397c61fa3fb1df1269c7d4a 100644 (file)
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -309,7 +309,7 @@ static void process_logits(int n_vocab, const float * logits, const int * tokens
  }
  
  static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params & params) {
-    // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
+    // Download: https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
      // Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
      // Output: `perplexity: 13.5106 [114/114]`
      // BOS tokens will be added for each chunk before eval
@@ -447,7 +447,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
          return perplexity_v2(ctx, params);
      }
  
-    // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
+    // Download: https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
      // Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
      // Output: `perplexity: 13.5106 [114/114]`
      // BOS tokens will be added for each chunk before eval
diff --git a/ggml-quants.c b/ggml-quants.c

index 48f5294e1e78831869c7ec474807c0654daa068f..43a8f1de4dac6aef271d93bde15b426ba3401890 100644 (file)
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -1837,9 +1837,9 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
          float sigma2 = sumx2/QK_K;
          for (int j = 0; j < QK_K/16; ++j) {
              const float * restrict qw = quant_weights + QK_K * i + 16*j;
-            for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j + l]*x[16*j + l]);
-            for (int l = 0; l < 16; ++l) sw[j] += weight[l];
-            scales[j] = make_qkx3_quants(16, 3, x + 16*j, weight, L + 16*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
+            for (int l = 0; l < QK_K/16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j + l]*x[16*j + l]);
+            for (int l = 0; l < QK_K/16; ++l) sw[j] += weight[l];
+            scales[j] = make_qkx3_quants(QK_K/16, 3, x + 16*j, weight, L + 16*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
          }
  
          float dm  = make_qp_quants(QK_K/16, 15, scales, Ls, sw);
diff --git a/scripts/get-wikitext-2.sh b/scripts/get-wikitext-2.sh

index ff96f331e026ed59261d55ee96e28df72ab6f455..7ca760fa613045d9741a8a2ec5f88d02e3c98dbf 100755 (executable)
--- a/scripts/get-wikitext-2.sh
+++ b/scripts/get-wikitext-2.sh
@@ -1,6 +1,6 @@
  #!/bin/bash
  
-wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
+wget https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
  
  echo "Usage:"
  echo ""
author	Georgi Gerganov <redacted>
	Sun, 18 Feb 2024 20:39:30 +0000 (22:39 +0200)
committer	GitHub <redacted>
	Sun, 18 Feb 2024 20:39:30 +0000 (22:39 +0200)
README.md		patch \| blob \| history
ci/run.sh		patch \| blob \| history
examples/perplexity/perplexity.cpp		patch \| blob \| history
ggml-quants.c		patch \| blob \| history
scripts/get-wikitext-2.sh		patch \| blob \| history