llama : the WPM vocabs use the CLS token as BOS (#10930)

author Georgi Gerganov <redacted>

Tue, 24 Dec 2024 07:44:20 +0000 (09:44 +0200)

committer GitHub <redacted>

Tue, 24 Dec 2024 07:44:20 +0000 (09:44 +0200)
author Georgi Gerganov <redacted>
Tue, 24 Dec 2024 07:44:20 +0000 (09:44 +0200)
committer GitHub <redacted>
Tue, 24 Dec 2024 07:44:20 +0000 (09:44 +0200)
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp

index 7f2725f94be1376228171fbb42276f662f033afc..0a477d6dd85f1990642c76a85c1d666e7f899ec7 100644 (file)
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1657,7 +1657,7 @@ bool llama_token_is_control_impl(const struct llama_vocab & vocab, llama_token t
  }
  
  llama_token llama_token_bos_impl(const struct llama_vocab & vocab) {
-    return vocab.special_bos_id;
+    return vocab.type != LLAMA_VOCAB_TYPE_WPM ? vocab.special_bos_id : vocab.special_cls_id;
  }
  
  llama_token llama_token_eos_impl(const struct llama_vocab & vocab) {
diff --git a/src/llama-vocab.h b/src/llama-vocab.h

index 4bb16d2e4299f776d2a9f510a7df7c28da3ffb20..a9b0da5ef3e33803e38b2c96841b1f865beb6cbf 100644 (file)
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -45,7 +45,7 @@ struct llama_vocab {
      id special_unk_id  = 0;
      id special_sep_id  = LLAMA_TOKEN_NULL;
      id special_pad_id  = LLAMA_TOKEN_NULL;
-    id special_cls_id  = LLAMA_TOKEN_NULL;
+    id special_cls_id  = LLAMA_TOKEN_NULL; // TODO: revisit if this is really needed https://github.com/ggerganov/llama.cpp/pull/10930
      id special_mask_id = LLAMA_TOKEN_NULL;
  
      id linefeed_id = 13;
author	Georgi Gerganov <redacted>
	Tue, 24 Dec 2024 07:44:20 +0000 (09:44 +0200)
committer	GitHub <redacted>
	Tue, 24 Dec 2024 07:44:20 +0000 (09:44 +0200)
src/llama-vocab.cpp		patch \| blob \| history
src/llama-vocab.h		patch \| blob \| history