if (llama_model_has_encoder(model)) {
llama_encode(lctx, llama_batch_get_one(tmp.data(), tmp.size()));
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
- if (decoder_start_token_id == -1) {
+ if (decoder_start_token_id == LLAMA_TOKEN_NULL) {
decoder_start_token_id = bos;
}
tmp.clear();
static llama_token try_draft(common_ngram_cache & nc_static, const common_ngram ngram_static) {
common_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
if (part_static_it == nc_static.end()) {
- return -1;
+ return LLAMA_TOKEN_NULL;
}
const common_ngram_cache_part part_static = part_static_it->second;
int max_count_static = 0;
int sum_count_static = 0;
- llama_token max_token = -1;
+ llama_token max_token = LLAMA_TOKEN_NULL;
for (std::pair<llama_token, int> token_count_static : part_static) {
const llama_token token = token_count_static.first;
}
if (sum_count_static < draft_min_sample_size_lax[LLAMA_NGRAM_STATIC-1]) {
- return -1;
+ return LLAMA_TOKEN_NULL;
}
if (100*max_count_static < draft_min_percent_lax[LLAMA_NGRAM_STATIC-1]*sum_count_static) {
- return -1;
+ return LLAMA_TOKEN_NULL;
}
return max_token;
}
common_ngram_cache & nc_primary, const std::vector<common_ngram> & ngrams_primary, common_ngram_cache_part & part_static,
const int * min_sample_size, const int * min_percent) {
- llama_token drafted_token = -1;
+ llama_token drafted_token = LLAMA_TOKEN_NULL;
- for (int i = ngrams_primary.size()-1; i >= 0 && drafted_token == -1; --i) {
+ for (int i = ngrams_primary.size()-1; i >= 0 && drafted_token == LLAMA_TOKEN_NULL; --i) {
const common_ngram ngram_primary = ngrams_primary[i];
common_ngram_cache::iterator part_primary_it = nc_primary.find(ngram_primary);
int max_count_primary = 0;
int max_count_static = 0;
int sum_count_primary = 0;
- llama_token max_token = -1;
+ llama_token max_token = LLAMA_TOKEN_NULL;
for (std::pair<llama_token, int> token_count_primary : part_primary) {
const llama_token token = token_count_primary.first;
}
while ((int) draft.size()-1 < n_draft) {
- llama_token drafted_token = -1;
+ llama_token drafted_token = LLAMA_TOKEN_NULL;
const int ngram_start_static = inp_size-LLAMA_NGRAM_STATIC + draft.size()-1;
common_ngram ngram_static;
}
ngrams_cd.push_back(ngram_cd);
}
- if (drafted_token == -1) {
+ if (drafted_token == LLAMA_TOKEN_NULL) {
drafted_token = try_draft(nc_context, ngrams_cd, part_static, draft_min_sample_size_lax, draft_min_percent_lax);
}
- if (drafted_token == -1) {
+ if (drafted_token == LLAMA_TOKEN_NULL) {
drafted_token = try_draft(nc_dynamic, ngrams_cd, part_static, draft_min_sample_size_strict, draft_min_percent_strict);
}
- if (drafted_token == -1) {
+ if (drafted_token == LLAMA_TOKEN_NULL) {
drafted_token = try_draft(nc_static, ngram_static);
}
- if (drafted_token == -1) {
+ if (drafted_token == LLAMA_TOKEN_NULL) {
break;
}
common_ngram() {
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
- tokens[i] = -1;
+ tokens[i] = LLAMA_TOKEN_NULL;
}
}
common_ngram(const llama_token * input, const int ngram_size) {
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
- tokens[i] = i < ngram_size ? input[i] : -1;
+ tokens[i] = i < ngram_size ? input[i] : LLAMA_TOKEN_NULL;
}
}
}
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
- if (decoder_start_token_id == -1) {
+ if (decoder_start_token_id == LLAMA_TOKEN_NULL) {
decoder_start_token_id = llama_token_bos(model);
}
gguf_set_val_u32(ctx, KV_TOKENIZER_UNK_ID, UNKNOWN_TOKEN_ID);
gguf_set_val_u32(ctx, KV_TOKENIZER_BOS_ID, BOS_TOKEN_ID);
gguf_set_val_u32(ctx, KV_TOKENIZER_EOS_ID, EOS_TOKEN_ID);
- gguf_set_val_u32(ctx, KV_TOKENIZER_SEP_ID, -1);
- gguf_set_val_u32(ctx, KV_TOKENIZER_PAD_ID, -1);
+ gguf_set_val_u32(ctx, KV_TOKENIZER_SEP_ID, LLAMA_TOKEN_NULL);
+ gguf_set_val_u32(ctx, KV_TOKENIZER_PAD_ID, LLAMA_TOKEN_NULL);
gguf_set_val_u32(ctx, KV_CONTEXT_LENGTH, model->hparams.n_ctx);
gguf_set_val_u32(ctx, KV_EMBEDDING_LENGTH, model->hparams.n_embd);
}
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
- if (decoder_start_token_id == -1) {
+ if (decoder_start_token_id == LLAMA_TOKEN_NULL) {
decoder_start_token_id = llama_token_bos(model);
}
// if user stop generation mid-way, we must add EOT to finish model's last response
if (need_insert_eot && format_chat) {
llama_token eot = llama_token_eot(model);
- embd_inp.push_back(eot == -1 ? llama_token_eos(model) : eot);
+ embd_inp.push_back(eot == LLAMA_TOKEN_NULL ? llama_token_eos(model) : eot);
need_insert_eot = false;
}
// format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context * ctx, const llama_token token) {
- std::string out = token == -1 ? "" : common_token_to_piece(ctx, token);
+ std::string out = token == LLAMA_TOKEN_NULL ? "" : common_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character
// (size > 1 meaning it's already a known token)
#define LLAMA_DEFAULT_SEED 0xFFFFFFFF
-// TODO: use everywhere in the implementation
#define LLAMA_TOKEN_NULL -1
#define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
LLAMA_LOG_INFO("%s: general.name = %s\n", __func__, model.name.c_str());
// special tokens
- if (vocab.special_bos_id != -1) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].text.c_str() ); }
- if (vocab.special_eos_id != -1) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].text.c_str() ); }
- if (vocab.special_eot_id != -1) { LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, vocab.special_eot_id, vocab.id_to_token[vocab.special_eot_id].text.c_str() ); }
- if (vocab.special_eom_id != -1) { LLAMA_LOG_INFO( "%s: EOM token = %d '%s'\n", __func__, vocab.special_eom_id, vocab.id_to_token[vocab.special_eom_id].text.c_str() ); }
- if (vocab.special_unk_id != -1) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].text.c_str() ); }
- if (vocab.special_sep_id != -1) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].text.c_str() ); }
- if (vocab.special_pad_id != -1) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].text.c_str() ); }
- if (vocab.special_cls_id != -1) { LLAMA_LOG_INFO( "%s: CLS token = %d '%s'\n", __func__, vocab.special_cls_id, vocab.id_to_token[vocab.special_cls_id].text.c_str() ); }
- if (vocab.special_mask_id != -1) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, vocab.special_mask_id, vocab.id_to_token[vocab.special_mask_id].text.c_str() ); }
-
- if (vocab.linefeed_id != -1) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, vocab.linefeed_id, vocab.id_to_token[vocab.linefeed_id].text.c_str() ); }
-
- if (vocab.special_fim_pre_id != -1) { LLAMA_LOG_INFO( "%s: FIM PRE token = %d '%s'\n", __func__, vocab.special_fim_pre_id, vocab.id_to_token[vocab.special_fim_pre_id].text.c_str() ); }
- if (vocab.special_fim_suf_id != -1) { LLAMA_LOG_INFO( "%s: FIM SUF token = %d '%s'\n", __func__, vocab.special_fim_suf_id, vocab.id_to_token[vocab.special_fim_suf_id].text.c_str() ); }
- if (vocab.special_fim_mid_id != -1) { LLAMA_LOG_INFO( "%s: FIM MID token = %d '%s'\n", __func__, vocab.special_fim_mid_id, vocab.id_to_token[vocab.special_fim_mid_id].text.c_str() ); }
- if (vocab.special_fim_pad_id != -1) { LLAMA_LOG_INFO( "%s: FIM PAD token = %d '%s'\n", __func__, vocab.special_fim_pad_id, vocab.id_to_token[vocab.special_fim_pad_id].text.c_str() ); }
- if (vocab.special_fim_rep_id != -1) { LLAMA_LOG_INFO( "%s: FIM REP token = %d '%s'\n", __func__, vocab.special_fim_rep_id, vocab.id_to_token[vocab.special_fim_rep_id].text.c_str() ); }
- if (vocab.special_fim_sep_id != -1) { LLAMA_LOG_INFO( "%s: FIM SEP token = %d '%s'\n", __func__, vocab.special_fim_sep_id, vocab.id_to_token[vocab.special_fim_sep_id].text.c_str() ); }
+ if (vocab.special_bos_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].text.c_str() ); }
+ if (vocab.special_eos_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].text.c_str() ); }
+ if (vocab.special_eot_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, vocab.special_eot_id, vocab.id_to_token[vocab.special_eot_id].text.c_str() ); }
+ if (vocab.special_eom_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOM token = %d '%s'\n", __func__, vocab.special_eom_id, vocab.id_to_token[vocab.special_eom_id].text.c_str() ); }
+ if (vocab.special_unk_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].text.c_str() ); }
+ if (vocab.special_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].text.c_str() ); }
+ if (vocab.special_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].text.c_str() ); }
+ if (vocab.special_cls_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: CLS token = %d '%s'\n", __func__, vocab.special_cls_id, vocab.id_to_token[vocab.special_cls_id].text.c_str() ); }
+ if (vocab.special_mask_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, vocab.special_mask_id, vocab.id_to_token[vocab.special_mask_id].text.c_str() ); }
+
+ if (vocab.linefeed_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, vocab.linefeed_id, vocab.id_to_token[vocab.linefeed_id].text.c_str() ); }
+
+ if (vocab.special_fim_pre_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM PRE token = %d '%s'\n", __func__, vocab.special_fim_pre_id, vocab.id_to_token[vocab.special_fim_pre_id].text.c_str() ); }
+ if (vocab.special_fim_suf_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM SUF token = %d '%s'\n", __func__, vocab.special_fim_suf_id, vocab.id_to_token[vocab.special_fim_suf_id].text.c_str() ); }
+ if (vocab.special_fim_mid_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM MID token = %d '%s'\n", __func__, vocab.special_fim_mid_id, vocab.id_to_token[vocab.special_fim_mid_id].text.c_str() ); }
+ if (vocab.special_fim_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM PAD token = %d '%s'\n", __func__, vocab.special_fim_pad_id, vocab.id_to_token[vocab.special_fim_pad_id].text.c_str() ); }
+ if (vocab.special_fim_rep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM REP token = %d '%s'\n", __func__, vocab.special_fim_rep_id, vocab.id_to_token[vocab.special_fim_rep_id].text.c_str() ); }
+ if (vocab.special_fim_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM SEP token = %d '%s'\n", __func__, vocab.special_fim_sep_id, vocab.id_to_token[vocab.special_fim_sep_id].text.c_str() ); }
for (const auto & id : vocab.special_eog_ids) {
LLAMA_LOG_INFO( "%s: EOG token = %d '%s'\n", __func__, id, vocab.id_to_token[id].text.c_str() );
for (int i = 0; i < (int)cur_p->size; ++i) {
const float val = cur_p->data[i].logit;
int ib = int(bucket_scale * val + bucket_inter); //nbuckets * (val - bucket_low) / (bucket_high - bucket_low);
- ib = std::max(0, std::min(nbuckets-1, ib));
+ ib = std::max(0, std::min(nbuckets - 1, ib));
bucket_idx[i] = ib;
++histo[ib];
}
for (int i = 0; i < (int)cur_p->size; ++i) {
int j = bucket_idx[i];
if (j >= ib) {
- *bucket_ptrs[nbuckets-1-j]++ = cur_p->data[i];
+ *bucket_ptrs[nbuckets - 1 - j]++ = cur_p->data[i];
}
}
ptr = tmp_tokens.data();
int ndone = 0;
- for (int j = nbuckets-1; j > ib; --j) {
+ for (int j = nbuckets - 1; j > ib; --j) {
std::sort(ptr, ptr + histo[j], comp);
ptr += histo[j];
ndone += histo[j];
ctx->dry_repeat_count[last - k] = std::min(n, rep_limit);
if (n > 0) {
lt = k;
- rt = k+n-1;
+ rt = k + n - 1;
}
} else {
// If k is inside the current Z-box, consider two cases.
bool append_bos(std::vector<llama_vocab::id> & output) const {
if (vocab.tokenizer_add_bos) {
- GGML_ASSERT(vocab.special_bos_id != -1);
+ GGML_ASSERT(vocab.special_bos_id != LLAMA_TOKEN_NULL);
output.push_back(vocab.special_bos_id);
return true;
}
bool append_eos(std::vector<llama_vocab::id> & output) const {
if (vocab.tokenizer_add_eos) {
- GGML_ASSERT(vocab.special_eos_id != -1);
+ GGML_ASSERT(vocab.special_eos_id != LLAMA_TOKEN_NULL);
output.push_back(vocab.special_eos_id);
return true;
}
if (source == 0) {
buffer.erase_after(buffer.before_begin());
} else {
- buffer.erase_after(std::next(buffer.begin(), (source-1)));
+ buffer.erase_after(std::next(buffer.begin(), (source - 1)));
}
// repeat for the right side
if (source == 0) {
buffer.erase_after(buffer.before_begin());
} else {
- buffer.erase_after(std::next(buffer.begin(), (source-1)));
+ buffer.erase_after(std::next(buffer.begin(), (source - 1)));
}
break;
}
bool is_prev_special = true; // prefix with space if first token
if (add_special && vocab.tokenizer_add_bos) {
- GGML_ASSERT(vocab.special_bos_id != -1);
+ GGML_ASSERT(vocab.special_bos_id != LLAMA_TOKEN_NULL);
output.push_back(vocab.special_bos_id);
is_prev_special = true;
}
}
if (add_special && vocab.tokenizer_add_eos) {
- GGML_ASSERT(vocab.special_eos_id != -1);
+ GGML_ASSERT(vocab.special_eos_id != LLAMA_TOKEN_NULL);
output.push_back(vocab.special_eos_id);
}
} break;
case LLAMA_VOCAB_TYPE_WPM:
{
if (add_special) {
- GGML_ASSERT(vocab.special_cls_id != -1);
+ GGML_ASSERT(vocab.special_cls_id != LLAMA_TOKEN_NULL);
output.push_back(vocab.special_cls_id);
}
}
if (add_special) {
- GGML_ASSERT(vocab.special_sep_id != -1);
+ GGML_ASSERT(vocab.special_sep_id != LLAMA_TOKEN_NULL);
output.push_back(vocab.special_sep_id);
}
} break;
case LLAMA_VOCAB_TYPE_UGM:
{
if (add_special && vocab.tokenizer_add_bos) {
- GGML_ASSERT(vocab.special_bos_id != -1);
+ GGML_ASSERT(vocab.special_bos_id != LLAMA_TOKEN_NULL);
output.push_back(vocab.special_bos_id);
}
llm_tokenizer_ugm_session session(vocab);
}
if (add_special && vocab.tokenizer_add_eos) {
- GGML_ASSERT(vocab.special_eos_id != -1);
+ GGML_ASSERT(vocab.special_eos_id != LLAMA_TOKEN_NULL);
output.push_back(vocab.special_eos_id);
}
} break;
}
bool llama_token_is_eog_impl(const struct llama_vocab & vocab, llama_token token) {
- return token != -1 && vocab.special_eog_ids.count(token) > 0;
+ return token != LLAMA_TOKEN_NULL && vocab.special_eog_ids.count(token) > 0;
}
bool llama_token_is_control_impl(const struct llama_vocab & vocab, llama_token token) {
}
if (remove_special && vocab.tokenizer_add_eos) {
- if (n_tokens > 0 && tokens[n_tokens-1] == vocab.special_eos_id) {
+ if (n_tokens > 0 && tokens[n_tokens - 1] == vocab.special_eos_id) {
n_tokens--;
}
}