common : remove duplicate function llama_should_add_bos_token (#8778)

author Zhenwei Jin <redacted>

Thu, 15 Aug 2024 07:23:23 +0000 (15:23 +0800)

committer GitHub <redacted>

Thu, 15 Aug 2024 07:23:23 +0000 (10:23 +0300)
author Zhenwei Jin <redacted>
Thu, 15 Aug 2024 07:23:23 +0000 (15:23 +0800)
committer GitHub <redacted>
Thu, 15 Aug 2024 07:23:23 +0000 (10:23 +0300)
diff --git a/common/common.cpp b/common/common.cpp

index d3d896115ae36b9446ef1bdbe35aa8f8231571ce..ce46e65ae2834df99d22a64bfebe5e3703026350 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2702,12 +2702,6 @@ std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token>
      return text;
  }
  
-bool llama_should_add_bos_token(const llama_model * model) {
-    const int add_bos = llama_add_bos_token(model);
-
-    return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
-}
-
  //
  // Chat template utils
  //
diff --git a/common/common.h b/common/common.h

index bbc33a499afcd26c047a79b0ed52ff33ecf0a7d6..df23460a50fe063b51b9c6eca03b1ce5f5084685 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -380,10 +380,6 @@ std::string llama_detokenize(
          const std::vector<llama_token> & tokens,
                                    bool   special = true);
  
-// Uses the value from the model metadata if possible, otherwise
-// defaults to true when model type is SPM, otherwise false.
-bool llama_should_add_bos_token(const llama_model * model);
-
  //
  // Chat template utils
  //
diff --git a/examples/cvector-generator/cvector-generator.cpp b/examples/cvector-generator/cvector-generator.cpp

index a12e90d828275dd34ba2e661c23c64f7061d7e4e..8fa492571aa445a4c32b74dead4cf1c42d9815e7 100644 (file)
--- a/examples/cvector-generator/cvector-generator.cpp
+++ b/examples/cvector-generator/cvector-generator.cpp
@@ -271,7 +271,7 @@ struct tokenized_prompt {
      size_t max_seq_len;
  
      tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
-        const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
+        const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
          tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
          tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
          max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp

index ef35ba2c03942e93f312fb0389ef23b56322eb8b..5e89988e2beda3f117113f3f4a2933a97e442b37 100644 (file)
--- a/examples/eval-callback/eval-callback.cpp
+++ b/examples/eval-callback/eval-callback.cpp
@@ -127,7 +127,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
  }
  
  static bool run(llama_context * ctx, const gpt_params & params) {
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
  
      std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, add_bos);
  
diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp

index 58814b96e7d497fc3ffa42afb91b89078a07dfec..83b85d72b043abe438e08d81769dd6305517da8a 100644 (file)
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -433,8 +433,8 @@ static void process_logits(
  }
  
  static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
      const int n_ctx = llama_n_ctx(ctx);
  
      auto tim1 = std::chrono::high_resolution_clock::now();
diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp

index 92d630b15fdf1bbb4c2e92a02aed29b49087b5e4..05700c1d591d9f232dee34f22f4696f4aa40b607 100644 (file)
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@@ -203,8 +203,8 @@ int main(int argc, char ** argv) {
          LOG_TEE("\n");
          LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
      }
-    const bool add_bos = llama_should_add_bos_token(model);
-    GGML_ASSERT(llama_add_eos_token(model) != 1);
+    const bool add_bos = llama_add_bos_token(model);
+    GGML_ASSERT(!llama_add_eos_token(model));
      LOG("add_bos: %d\n", add_bos);
  
      std::vector<llama_token> embd_inp;
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index 6e0635a66cd0671e81edc6f0c3815e92ae3d8693..4a342ad031663b38e4563db8bfb524a7422fcb92 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -267,9 +267,9 @@ int main(int argc, char ** argv) {
          }
      }
  
-    const bool add_bos = llama_should_add_bos_token(model);
+    const bool add_bos = llama_add_bos_token(model);
      if (!llama_model_has_encoder(model)) {
-        GGML_ASSERT(llama_add_eos_token(model) != 1);
+        GGML_ASSERT(!llama_add_eos_token(model));
      }
      LOG("add_bos: %d\n", add_bos);
  
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp

index 372684f092de29e12248d3594a0a3049ab0efc7f..484dd589109c7cbc736a28bcea52be686547ce1a 100644 (file)
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -340,8 +340,8 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
      // Output: `perplexity: 13.5106 [114/114]`
      // BOS tokens will be added for each chunk before eval
  
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
  
      fprintf(stderr, "%s: tokenizing the input ..\n", __func__);
  
@@ -480,8 +480,8 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
      // Output: `perplexity: 13.5106 [114/114]`
      // BOS tokens will be added for each chunk before eval
  
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
  
      std::ofstream logits_stream;
      if (!params.logits_file.empty()) {
@@ -1733,8 +1733,8 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
      const int n_batch = params.n_batch;
      const int num_batches = (n_ctx + n_batch - 1)/n_batch;
      const int nv = 2*((n_vocab + 1)/2) + 4;
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
  
      std::vector<uint16_t> log_probs_uint16(size_t(n_ctx - 1 - n_ctx/2) * nv);
      std::vector<float>    kld_values(size_t(n_ctx - 1 - n_ctx/2)*n_chunk);
diff --git a/examples/server/server.cpp b/examples/server/server.cpp

index ace17a12f79316b407e54d6fa9ae99e433544a10..3fe0e6558b0874b33a8a28496a0d98dfb40bd97d 100644 (file)
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -693,9 +693,8 @@ struct server_context {
  
          n_ctx = llama_n_ctx(ctx);
  
-        add_bos_token = llama_should_add_bos_token(model);
-        has_eos_token = llama_add_eos_token(model) != 1;
-
+        add_bos_token = llama_add_bos_token(model);
+        has_eos_token = !llama_add_eos_token(model);
          return true;
      }
  
@@ -2038,7 +2037,7 @@ struct server_context {
                          slot.t_start_generation = 0;
  
                          if (slot.infill) {
-                            const bool add_bos = llama_should_add_bos_token(model);
+                            const bool add_bos = llama_add_bos_token(model);
                              bool suff_rm_leading_spc = true;
                              if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
                                  params.input_suffix.erase(0, 1);
diff --git a/examples/tokenize/tokenize.cpp b/examples/tokenize/tokenize.cpp

index 17f5e496153a7bdc296cbaab99f9ade19043b973..c817be566cf548d1721dae5ffd8103bd4ec39843 100644 (file)
--- a/examples/tokenize/tokenize.cpp
+++ b/examples/tokenize/tokenize.cpp
@@ -362,7 +362,7 @@ int main(int raw_argc, char ** raw_argv) {
          prompt = stdin_buffer.str();
      }
  
-    const bool model_wants_add_bos = llama_should_add_bos_token(model);
+    const bool model_wants_add_bos = llama_add_bos_token(model);
      const bool add_bos = model_wants_add_bos && !no_bos;
      const bool parse_special = !no_parse_special;
  
diff --git a/include/llama.h b/include/llama.h

index fda68da851408742f763025fb7644b335f6b5655..ed81aa4697d17ef9e5f20fc04964b32fdf6f976f 100644 (file)
--- a/include/llama.h
+++ b/include/llama.h
@@ -914,11 +914,8 @@ extern "C" {
      LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
      LLAMA_API llama_token llama_token_pad(const struct llama_model * model); // padding
  
-    // Returns -1 if unknown, 1 for true or 0 for false.
-    LLAMA_API int32_t llama_add_bos_token(const struct llama_model * model);
-
-    // Returns -1 if unknown, 1 for true or 0 for false.
-    LLAMA_API int32_t llama_add_eos_token(const struct llama_model * model);
+    LLAMA_API bool llama_add_bos_token(const struct llama_model * model);
+    LLAMA_API bool llama_add_eos_token(const struct llama_model * model);
  
      // Codellama infill tokens
      LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp

index 063af648eb3578585b030ad5487356b1b910f50b..11fffce9386d7401b40ba1373e5dca1a785af768 100644 (file)
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1468,11 +1468,11 @@ llama_token llama_token_pad_impl(const struct llama_vocab & vocab) {
      return vocab.special_pad_id;
  }
  
-int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab) {
+bool llama_add_bos_token_impl(const struct llama_vocab & vocab) {
      return vocab.tokenizer_add_bos;
  }
  
-int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab) {
+bool llama_add_eos_token_impl(const struct llama_vocab & vocab) {
      return vocab.tokenizer_add_eos;
  }
  
diff --git a/src/llama-vocab.h b/src/llama-vocab.h

index 7adfc16da3af3839fb10e202d9d2460368948e56..6e8f30be43ba1cb2f93cd07d8189546998de49d1 100644 (file)
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -95,8 +95,8 @@ llama_token llama_token_sep_impl(const struct llama_vocab & vocab);
  llama_token llama_token_nl_impl (const struct llama_vocab & vocab);
  llama_token llama_token_pad_impl(const struct llama_vocab & vocab);
  
-int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab);
-int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab);
+bool llama_add_bos_token_impl(const struct llama_vocab & vocab);
+bool llama_add_eos_token_impl(const struct llama_vocab & vocab);
  
  llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
  llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
diff --git a/src/llama.cpp b/src/llama.cpp

index bf7a57c79905d12277c5407b8e57cd00969ecc82..ee36de977cdc2cdf9d3b4ada8294a22ce3181873 100644 (file)
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -18705,11 +18705,11 @@ llama_token llama_token_pad(const struct llama_model * model) {
      return llama_token_pad_impl(model->vocab);
  }
  
-int32_t llama_add_bos_token(const struct llama_model * model) {
+bool llama_add_bos_token(const struct llama_model * model) {
      return llama_add_bos_token_impl(model->vocab);
  }
  
-int32_t llama_add_eos_token(const struct llama_model * model) {
+bool llama_add_eos_token(const struct llama_model * model) {
      return llama_add_eos_token_impl(model->vocab);
  }
author	Zhenwei Jin <redacted>
	Thu, 15 Aug 2024 07:23:23 +0000 (15:23 +0800)
committer	GitHub <redacted>
	Thu, 15 Aug 2024 07:23:23 +0000 (10:23 +0300)
common/common.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
examples/cvector-generator/cvector-generator.cpp		patch \| blob \| history
examples/eval-callback/eval-callback.cpp		patch \| blob \| history
examples/imatrix/imatrix.cpp		patch \| blob \| history
examples/infill/infill.cpp		patch \| blob \| history
examples/main/main.cpp		patch \| blob \| history
examples/perplexity/perplexity.cpp		patch \| blob \| history
examples/server/server.cpp		patch \| blob \| history
examples/tokenize/tokenize.cpp		patch \| blob \| history
include/llama.h		patch \| blob \| history
src/llama-vocab.cpp		patch \| blob \| history
src/llama-vocab.h		patch \| blob \| history
src/llama.cpp		patch \| blob \| history