ggml-opencl, llama: using reserve() if count already known (llama/7272)

author Herman Semenov <redacted>

Mon, 20 May 2024 07:33:21 +0000 (07:33 +0000)

committer Georgi Gerganov <redacted>

Tue, 28 May 2024 11:41:08 +0000 (14:41 +0300)
author Herman Semenov <redacted>
Mon, 20 May 2024 07:33:21 +0000 (07:33 +0000)
committer Georgi Gerganov <redacted>
Tue, 28 May 2024 11:41:08 +0000 (14:41 +0300)
diff --git a/src/ggml-opencl.cpp b/src/ggml-opencl.cpp

index 880a14958cec524e11ccc91b4bc92069eb2bbf5b..922f248376ced69a399be96c617cae9f56a09661 100644 (file)
--- a/src/ggml-opencl.cpp
+++ b/src/ggml-opencl.cpp
@@ -1,4 +1,4 @@
-#include "ggml.h"
+#include "ggml.h"
  #include "ggml-opencl.h"
  #include "ggml-backend-impl.h"
  
@@ -1835,7 +1835,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
                      CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
                  }
  
-                for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
+                int64_t i12 = i02 * r2;
+                int64_t e12 = i12 + r2;
+                events.reserve(e12 - i12);
+                for (; i12 < e12; i12++) {
                      if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
                          // copy src1 to device
                          events.emplace_back();
author	Herman Semenov <redacted>
	Mon, 20 May 2024 07:33:21 +0000 (07:33 +0000)
committer	Georgi Gerganov <redacted>
	Tue, 28 May 2024 11:41:08 +0000 (14:41 +0300)