llama : fix typical sampling (#4261)

author tarcey <redacted>

Thu, 30 Nov 2023 21:40:23 +0000 (22:40 +0100)

committer GitHub <redacted>

Thu, 30 Nov 2023 21:40:23 +0000 (23:40 +0200)
author tarcey <redacted>
Thu, 30 Nov 2023 21:40:23 +0000 (22:40 +0100)
committer GitHub <redacted>
Thu, 30 Nov 2023 21:40:23 +0000 (23:40 +0200)
diff --git a/llama.cpp b/llama.cpp

index cb544228b9f021c17ec6d039adfc39b8d26ea213..4af4506157842b78564da857301cd541199b6b97 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -7027,6 +7027,7 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
      // Replace the data in candidates with the new_candidates data
      std::copy(new_candidates.begin(), new_candidates.end(), candidates->data);
      candidates->size = new_candidates.size();
+    candidates->sorted = false;
  
      if (ctx) {
          ctx->t_sample_us += ggml_time_us() - t_start_sample_us;