return;
}
- const int64_t t_start_sample_us = ggml_time_us();
-
llama_sample_softmax(ctx, candidates);
+ const int64_t t_start_sample_us = ggml_time_us();
+
// Compute the cumulative probabilities
float cum_sum = 0.0f;
size_t last_idx = candidates->size;
return;
}
- const int64_t t_start_sample_us = ggml_time_us();
-
llama_sample_softmax(nullptr, candidates);
+ const int64_t t_start_sample_us = ggml_time_us();
// Compute the first and second derivatives
std::vector<float> first_derivatives(candidates->size - 1);
return;
}
- const int64_t t_start_sample_us = ggml_time_us();
-
// Compute the softmax of logits and calculate entropy
llama_sample_softmax(nullptr, candidates);
+ const int64_t t_start_sample_us = ggml_time_us();
+
float entropy = 0.0f;
for (size_t i = 0; i < candidates->size; ++i) {
entropy += -candidates->data[i].p * logf(candidates->data[i].p);
if (ctx) {
ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
- ctx->n_sample++;
}
return X;
}
llama_token llama_sample_token_mirostat_v2(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, float * mu) {
- assert(ctx);
int64_t t_start_sample_us;
t_start_sample_us = ggml_time_us();
candidates->size = 1;
}
+ if (ctx) {
+ ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
+ }
+
// Normalize the probabilities of the remaining words
llama_sample_softmax(ctx, candidates);
// Sample the next word X from the remaining words
- if (ctx) {
- ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
- }
llama_token X = llama_sample_token(ctx, candidates);
t_start_sample_us = ggml_time_us();