GGML_ASSERT(!original_logits.empty());
}
llama_token id = 0;
- // Get a pointer to the logits
- float * logits = llama_get_logits_ith(ctx_main, idx);
if (temp < 0.0) {
// greedy sampling, with probs
}
if (ctx_sampling->grammar != NULL && !is_resampling) {
+ // Get a pointer to the logits
+ float * logits = llama_get_logits_ith(ctx_main, idx);
+
// Create an array with a single token data element for the sampled id
llama_token_data single_token_data = {id, logits[id], 0.0f};
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };