llama : fix integer overflow during quantization (#6063)

author Georgi Gerganov <redacted>

Thu, 14 Mar 2024 20:58:41 +0000 (22:58 +0200)

committer GitHub <redacted>

Thu, 14 Mar 2024 20:58:41 +0000 (22:58 +0200)
author Georgi Gerganov <redacted>
Thu, 14 Mar 2024 20:58:41 +0000 (22:58 +0200)
committer GitHub <redacted>
Thu, 14 Mar 2024 20:58:41 +0000 (22:58 +0200)
diff --git a/llama.cpp b/llama.cpp

index 10fd53469eb6fc10480eb11f462981eb8524f451..2c384197492e87841f57408a511b6ea5902e2938 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -11977,7 +11977,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
      return new_type;
  }
  
-static int32_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
+static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
      std::mutex mutex;
      int counter = 0;
      size_t new_size = 0;
author	Georgi Gerganov <redacted>
	Thu, 14 Mar 2024 20:58:41 +0000 (22:58 +0200)
committer	GitHub <redacted>
	Thu, 14 Mar 2024 20:58:41 +0000 (22:58 +0200)