llama : use reserve/emplace_back in sampler_sample (#9534)

author Daniel Bevenius <redacted>

Wed, 18 Sep 2024 11:42:36 +0000 (13:42 +0200)

committer GitHub <redacted>

Wed, 18 Sep 2024 11:42:36 +0000 (14:42 +0300)
author Daniel Bevenius <redacted>
Wed, 18 Sep 2024 11:42:36 +0000 (13:42 +0200)
committer GitHub <redacted>
Wed, 18 Sep 2024 11:42:36 +0000 (14:42 +0300)
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp

index 5275b1d6001e4974996cb09f6ca771774f0e16e3..5299f51160dac41a65f439eff28f27639ab3aa0b 100644 (file)
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -236,9 +236,10 @@ llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_conte
      const int n_vocab = llama_n_vocab(llama_get_model(ctx));
  
      // TODO: do not allocate each time
-    std::vector<llama_token_data> cur(n_vocab);
+    std::vector<llama_token_data> cur;
+    cur.reserve(n_vocab);
      for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
-        cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
+        cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
      }
  
      llama_token_data_array cur_p = {
author	Daniel Bevenius <redacted>
	Wed, 18 Sep 2024 11:42:36 +0000 (13:42 +0200)
committer	GitHub <redacted>
	Wed, 18 Sep 2024 11:42:36 +0000 (14:42 +0300)