llama : fix compile warnings

author Georgi Gerganov <redacted>

Tue, 2 May 2023 20:09:08 +0000 (23:09 +0300)

committer Georgi Gerganov <redacted>

Tue, 2 May 2023 20:09:08 +0000 (23:09 +0300)
author Georgi Gerganov <redacted>
Tue, 2 May 2023 20:09:08 +0000 (23:09 +0300)
committer Georgi Gerganov <redacted>
Tue, 2 May 2023 20:09:08 +0000 (23:09 +0300)
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp

index 2cc1a1477762c9d01193024010e6ddf5af4cf96d..6117ae3abf877e52963ba9e2db8500585da9d2d9 100644 (file)
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -38,9 +38,9 @@ float tensor_sum_elements(struct ggml_tensor * tensor) {
  
  #define TENSOR_TYPE_AS_STR(TYPE) TYPE == GGML_TYPE_F32 ? "FP32" : TYPE == GGML_TYPE_F16 ? "FP16" : TYPE == GGML_TYPE_Q4_0 ? "Q4_0" : TYPE == GGML_TYPE_Q4_1 ? "Q4_1" : "UNKNOWN"
  
-#define TENSOR_DUMP(TENSOR) printf("%15s: type = %i (%5s) ne = %5ld x %5ld x %5ld, nb = (%5li, %5li, %5li) - ", #TENSOR, \
+#define TENSOR_DUMP(TENSOR) printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", #TENSOR, \
          TENSOR->type,TENSOR_TYPE_AS_STR(TENSOR->type),\
-        TENSOR->ne[0], TENSOR->ne[1], TENSOR->ne[2], TENSOR->nb[0], TENSOR->nb[1], TENSOR->nb[2]); \
+        (int) TENSOR->ne[0], (int) TENSOR->ne[1], (int) TENSOR->ne[2], TENSOR->nb[0], TENSOR->nb[1], TENSOR->nb[2]); \
      { float sum = tensor_sum_elements(TENSOR); printf("Sum of tensor %s is %6.2f\n",#TENSOR, sum); }
  
  struct benchmark_params_struct {
@@ -138,7 +138,7 @@ int main(int argc, char ** argv)  {
      ctx = ggml_init(params);
      if (!ctx) {
          fprintf(stderr, "%s: ggml_init() failed\n", __func__);
-        return false;
+        return 1;
      }
  
  
diff --git a/llama.cpp b/llama.cpp

index a8156bcc25e6753a039ec9a297a0cce5915899ba..d4ef056454a3443cd4f3fb22d4c4d2eb09e8febe 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -1702,7 +1702,7 @@ void llama_sample_temperature(struct llama_context * ctx, llama_token_data_array
      }
  }
  
-void llama_sample_repetition_penalty(struct llama_context * ctx, llama_token_data_array * candidates, llama_token * last_tokens, size_t last_tokens_size, float penalty) {
+void llama_sample_repetition_penalty(struct llama_context * ctx, llama_token_data_array * candidates, const llama_token * last_tokens, size_t last_tokens_size, float penalty) {
      if (last_tokens_size == 0 || penalty == 1.0f) {
          return;
      }
@@ -1731,7 +1731,7 @@ void llama_sample_repetition_penalty(struct llama_context * ctx, llama_token_dat
      }
  }
  
-void llama_sample_frequency_and_presence_penalties(struct llama_context * ctx, llama_token_data_array * candidates, llama_token * last_tokens_p, size_t last_tokens_size, float alpha_frequency, float alpha_presence) {
+void llama_sample_frequency_and_presence_penalties(struct llama_context * ctx, llama_token_data_array * candidates, const llama_token * last_tokens_p, size_t last_tokens_size, float alpha_frequency, float alpha_presence) {
      if (last_tokens_size == 0 || (alpha_frequency == 0.0f && alpha_presence == 0.0f)) {
          return;
      }
diff --git a/llama.h b/llama.h

index 4052a8ca2c3a984761cb204e5a202f1cde077554..81f43174af1229642cb89ec265de256de929d5d5 100644 (file)
--- a/llama.h
+++ b/llama.h
@@ -192,10 +192,10 @@ extern "C" {
      // Sampling functions
  
      /// @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
-    LLAMA_API void llama_sample_repetition_penalty(struct llama_context * ctx, llama_token_data_array * candidates, llama_token * last_tokens, size_t last_tokens_size, float penalty);
+    LLAMA_API void llama_sample_repetition_penalty(struct llama_context * ctx, llama_token_data_array * candidates, const llama_token * last_tokens, size_t last_tokens_size, float penalty);
  
      /// @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details.
-    LLAMA_API void llama_sample_frequency_and_presence_penalties(struct llama_context * ctx, llama_token_data_array * candidates, llama_token * last_tokens, size_t last_tokens_size, float alpha_frequency, float alpha_presence);
+    LLAMA_API void llama_sample_frequency_and_presence_penalties(struct llama_context * ctx, llama_token_data_array * candidates, const llama_token * last_tokens, size_t last_tokens_size, float alpha_frequency, float alpha_presence);
  
      /// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
      LLAMA_API void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * candidates);
diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp

index 7eee4f6d3a6456d164a0af1e10ba37f3554869ff..8ce59af3dc94327e9c60d68a204616597ad30e8b 100644 (file)
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@@ -131,7 +131,7 @@ void test_repetition_penalty(
      llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
      llama_sample_softmax(nullptr, &candidates_p);
      DUMP(&candidates_p);
-    llama_sample_repetition_penalty(nullptr, &candidates_p, (llama_token *)last_tokens.data(), last_tokens.size(), penalty);
+    llama_sample_repetition_penalty(nullptr, &candidates_p, (const llama_token *) last_tokens.data(), last_tokens.size(), penalty);
      llama_sample_softmax(nullptr, &candidates_p);
      DUMP(&candidates_p);
  
@@ -160,7 +160,7 @@ void test_frequency_presence_penalty(
      llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
      llama_sample_softmax(nullptr, &candidates_p);
      // DUMP(&candidates_p);
-    llama_sample_frequency_and_presence_penalties(nullptr, &candidates_p, (llama_token *)last_tokens.data(), last_tokens.size(), alpha_frequency, alpha_presence);
+    llama_sample_frequency_and_presence_penalties(nullptr, &candidates_p, (const llama_token *) last_tokens.data(), last_tokens.size(), alpha_frequency, alpha_presence);
      llama_sample_softmax(nullptr, &candidates_p);
      // DUMP(&candidates_p);
author	Georgi Gerganov <redacted>
	Tue, 2 May 2023 20:09:08 +0000 (23:09 +0300)
committer	Georgi Gerganov <redacted>
	Tue, 2 May 2023 20:09:08 +0000 (23:09 +0300)
examples/benchmark/benchmark-matmult.cpp		patch \| blob \| history
llama.cpp		patch \| blob \| history
llama.h		patch \| blob \| history
tests/test-sampling.cpp		patch \| blob \| history