llama : fix empty ring buffer push (#9358)

author Georgi Gerganov <redacted>

Sat, 7 Sep 2024 21:33:33 +0000 (00:33 +0300)

committer GitHub <redacted>

Sat, 7 Sep 2024 21:33:33 +0000 (00:33 +0300)
author Georgi Gerganov <redacted>
Sat, 7 Sep 2024 21:33:33 +0000 (00:33 +0300)
committer GitHub <redacted>
Sat, 7 Sep 2024 21:33:33 +0000 (00:33 +0300)
diff --git a/common/sampling.cpp b/common/sampling.cpp

index c81b4d233b04e87ddd16d30218ce544d25b265e5..7806b77e06a9fd0faa833e8ff825b5b020928538 100644 (file)
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -145,7 +145,7 @@ struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const st
          /* .params = */ params,
          /* .grmr   = */ llama_sampler_init_grammar(model, params.grammar.c_str(), "root"),
          /* .chain  = */ llama_sampler_chain_init(lparams),
-        /* .prev   = */ ring_buffer<llama_token>(params.n_prev),
+        /* .prev   = */ ring_buffer<llama_token>(std::max(32, params.n_prev)),
          /* .cur    = */ {},
          /* .cur_p  = */ {},
      };
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp

index 61f4cbb9217e88a04a2c6d09f89b59222ddbcae0..1661d9a83ec80ea52f9818d85dec9a174c9b8853 100644 (file)
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -1226,7 +1226,9 @@ static struct llama_sampler_i llama_sampler_penalties_i = {
      /* .name   = */ [](const struct llama_sampler * /*smpl*/) { return "penalties"; },
      /* .accept = */ [](struct llama_sampler * smpl, llama_token token) {
          auto * ctx = (llama_sampler_penalties *) smpl->ctx;
-        ctx->prev.push_back(token);
+        if (ctx->prev.size()) {
+            ctx->prev.push_back(token);
+        }
      },
      /* .apply  = */ [](struct llama_sampler * smpl, llama_token_data_array * cur_p) {
          auto * ctx = (llama_sampler_penalties *) smpl->ctx;
author	Georgi Gerganov <redacted>
	Sat, 7 Sep 2024 21:33:33 +0000 (00:33 +0300)
committer	GitHub <redacted>
	Sat, 7 Sep 2024 21:33:33 +0000 (00:33 +0300)
common/sampling.cpp		patch \| blob \| history
src/llama-sampling.cpp		patch \| blob \| history