llama : sanitize invalid tokens (#9357)

author Georgi Gerganov <redacted>

Sat, 7 Sep 2024 21:33:13 +0000 (00:33 +0300)

committer GitHub <redacted>

Sat, 7 Sep 2024 21:33:13 +0000 (00:33 +0300)
author Georgi Gerganov <redacted>
Sat, 7 Sep 2024 21:33:13 +0000 (00:33 +0300)
committer GitHub <redacted>
Sat, 7 Sep 2024 21:33:13 +0000 (00:33 +0300)
diff --git a/common/common.cpp b/common/common.cpp

index e92dee7a7f6ec18620ae2ad6276897d23c6662e0..c5c4d7508f033a12d114d15312d1b9c3be64c78e 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2690,10 +2690,15 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
          llama_token bos = llama_token_bos(model);
          llama_token eos = llama_token_eos(model);
          // some models (e.g. T5) don't have a BOS token
-        if (bos != -1) {
+        if (bos != LLAMA_TOKEN_NULL) {
              tmp.push_back(bos);
          }
-        tmp.push_back(eos);
+        if (eos != LLAMA_TOKEN_NULL) {
+            tmp.push_back(eos);
+        }
+        if (tmp.empty()) {
+            tmp.push_back(0);
+        }
  
          if (llama_model_has_encoder(model)) {
              llama_encode(lctx, llama_batch_get_one(tmp.data(), tmp.size(), 0, 0));
diff --git a/examples/server/tests/features/embeddings.feature b/examples/server/tests/features/embeddings.feature

index 6f163ce04b3f6b51c73f6b9d946a1a80e3159b8e..e1eade6cdbc9b04615e09f8b76fc80b16c3499c8 100644 (file)
--- a/examples/server/tests/features/embeddings.feature
+++ b/examples/server/tests/features/embeddings.feature
@@ -9,8 +9,11 @@ Feature: llama.cpp server
      And   a model alias bert-bge-small
      And   42 as server seed
      And   2 slots
-    And   1024 as batch size
-    And   1024 as ubatch size
+    # the bert-bge-small model has context size of 512
+    # since the generated prompts are as big as the batch size, we need to set the batch size to 512
+    # ref: https://huggingface.co/BAAI/bge-small-en-v1.5/blob/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/config.json#L20
+    And   512 as batch size
+    And   512 as ubatch size
      And   2048 KV cache size
      And   embeddings extraction
      Then  the server is starting
diff --git a/src/llama.cpp b/src/llama.cpp

index 6bbaf9fc9bae7f8bd93a89327f734e662dd78059..190564fa4e69e2ca47aecf9822a3ed44a6728168 100644 (file)
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -16066,6 +16066,13 @@ static int llama_decode_internal(
          return -1;
      }
  
+    for (uint32_t i = 0; i < n_tokens_all; ++i) {
+        if (batch_all.token[i] < 0) {
+            LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch_all.token[i]);
+            return -1;
+        }
+    }
+
      const auto & model   = lctx.model;
      const auto & hparams = model.hparams;
      const auto & cparams = lctx.cparams;
@@ -16358,6 +16365,13 @@ static int llama_encode_internal(
          return -1;
      }
  
+    for (uint32_t i = 0; i < n_tokens; ++i) {
+        if (batch.token[i] < 0) {
+            LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch.token[i]);
+            return -1;
+        }
+    }
+
      const auto & model   = lctx.model;
      const auto & hparams = model.hparams;
      const auto & cparams = lctx.cparams;
author	Georgi Gerganov <redacted>
	Sat, 7 Sep 2024 21:33:13 +0000 (00:33 +0300)
committer	GitHub <redacted>
	Sat, 7 Sep 2024 21:33:13 +0000 (00:33 +0300)
common/common.cpp		patch \| blob \| history
examples/server/tests/features/embeddings.feature		patch \| blob \| history
src/llama.cpp		patch \| blob \| history