throw std::invalid_argument("unknown speculative decoding type without draft model");
}
}
- ).set_examples({LLAMA_EXAMPLE_SERVER}));
+ ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SPEC_TYPE"));
add_opt(common_arg(
{"--spec-ngram-size-n"}, "N",
string_format("ngram size N for ngram-simple/ngram-map speculative decoding, length of lookup n-gram (default: %d)", params.speculative.ngram_size_n),