// available samplers:
LLAMA_API struct llama_sampler * llama_sampler_init_greedy(void);
- LLAMA_API struct llama_sampler * llama_sampler_init_dist (uint32_t seed);
+
+ /// seed == LLAMA_DEFAULT_SEED to use a random seed.
+ LLAMA_API struct llama_sampler * llama_sampler_init_dist(uint32_t seed);
/// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
/// Setting k <= 0 makes this a noop
#include "server-task.h"
#include "server-queue.h"
-#include "arg.h"
#include "common.h"
#include "llama.h"
#include "log.h"
#include <cstddef>
#include <cinttypes>
#include <memory>
-#include <unordered_set>
#include <filesystem>
// fix problem with std::min and std::max
if (task.params.n_cmpl > 1) {
task.n_children = task.params.n_cmpl - 1;
for (size_t j = 0; j < task.n_children; j++) {
- server_task child = task.create_child(
- task.id,
- rd.get_new_id());
+ server_task child = task.create_child(task.id, rd.get_new_id());
+
+ // use different sampling seed for each child
+ // note: https://github.com/ggml-org/llama.cpp/pull/18700#discussion_r2675115723
+ if (child.params.sampling.seed != LLAMA_DEFAULT_SEED) {
+ child.params.sampling.seed += j + 1;
+ }
+
tasks.push_back(std::move(child));
}
}
assert len(res.body["choices"]) == 2
for choice in res.body["choices"]:
assert "assistant" == choice["message"]["role"]
- assert match_regex("Suddenly", choice["message"]["content"])
assert choice["finish_reason"] == "length"