}
// initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
- std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -FLT_MAX});
+ std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -DBL_MAX});
// at the beginning tokenization score is zero
tokenization_results[0] = { vocab.token_unk(), 0, 0 };
const double challenger_score = current_best.score_sum + token_score;
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
if (challenger_score > current_champ.score_sum) {
- struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score };
+ struct best_tokenization challenger = { token_id, input_offset, challenger_score };
current_champ = challenger;
}
}
prefix_offset = input_offset + n_utf8_code_units;
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
if (challenger_score > current_champ.score_sum) {
- struct best_tokenization challenger = { vocab.token_unk(), input_offset, (float) challenger_score };
+ struct best_tokenization challenger = { vocab.token_unk(), input_offset, challenger_score };
current_champ = challenger;
}
}
struct best_tokenization {
llama_token token_id;
size_t input_offset;
- float score_sum;
+ double score_sum;
};
struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) {