* Update ggml.c
* Update arg.cpp
* Update speculative.h
));
add_opt(common_arg(
{"--no-context-shift"},
- string_format("disables context shift on inifinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
+ string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
[](common_params & params) {
params.ctx_shift = false;
}
int n_draft = 16; // max drafted tokens
int n_reuse = 256;
- float p_min = 0.9f; // min probabiliy required to accept a token in the draft
+ float p_min = 0.9f; // min probability required to accept a token in the draft
};
struct common_speculative * common_speculative_init(struct llama_context * ctx_dft);
(t0->nb[3] == t1->nb[3]);
}
-// check if t1 can be represented as a repeatition of t0
+// check if t1 can be represented as a repetition of t0
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");