From: Martin Krasser Date: Thu, 10 Aug 2023 10:16:38 +0000 (+0200) Subject: Fix grammar-based sampling issue in server (#2566) X-Git-Tag: gguf-v0.4.0~333 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=1638757767072a4957f52b9e3594f0b67610631b;p=pkg%2Fggml%2Fsources%2Fllama.cpp Fix grammar-based sampling issue in server (#2566) --- diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 10ae264f..637f6d6c 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -196,6 +196,7 @@ struct llama_server_context llama_context *ctx = nullptr; gpt_params params; + grammar_parser::parse_state parsed_grammar; llama_grammar *grammar = nullptr; bool truncated = false; @@ -241,10 +242,13 @@ struct llama_server_context stopped_limit = false; stopping_word = ""; multibyte_pending = 0; - grammar = nullptr; - n_remain = 0; n_past = 0; + + if (grammar != nullptr) { + llama_grammar_free(grammar); + grammar = nullptr; + } } bool loadModel(const gpt_params ¶ms_) @@ -265,8 +269,6 @@ struct llama_server_context bool loadGrammar() { if (!params.grammar.empty()) { - grammar_parser::parse_state parsed_grammar; - parsed_grammar = grammar_parser::parse(params.grammar.c_str()); // will be empty (default) if there are parse errors if (parsed_grammar.rules.empty()) {