-#include "ggml.h"
+#include "ggml.h"
#include "ggml-opencl.h"
#include "ggml-backend-impl.h"
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
}
- for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
+ int64_t i12 = i02 * r2;
+ int64_t e12 = i12 + r2;
+ events.reserve(e12 - i12);
+ for (; i12 < e12; i12++) {
if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
// copy src1 to device
events.emplace_back();
}
// make tensors
+ cvec.tensors.reserve(model.hparams.n_layer);
cvec.tensors.push_back(nullptr); // there's never a tensor for layer 0
for (size_t il = 1; il < model.hparams.n_layer; il++) {
struct ggml_context * ctx = ctx_map.at(model.buft_layer[il].buft);
}
// allocate tensors / buffers and zero
+ cvec.ctxs.reserve(ctx_map.size());
+ cvec.bufs.reserve(ctx_map.size());
for (auto it : ctx_map) {
ggml_backend_buffer_type_t buft = it.first;
ggml_context * ctx = it.second;