llama : pre-allocate input tensors in a separate buffer (llama/5100)

author slaren <redacted>

Wed, 24 Jan 2024 11:48:14 +0000 (12:48 +0100)

committer Georgi Gerganov <redacted>

Sat, 27 Jan 2024 15:19:51 +0000 (17:19 +0200)
author slaren <redacted>
Wed, 24 Jan 2024 11:48:14 +0000 (12:48 +0100)
committer Georgi Gerganov <redacted>
Sat, 27 Jan 2024 15:19:51 +0000 (17:19 +0200)
diff --git a/ggml-alloc.c b/ggml-alloc.c

index 89b85d34870d76b058d62c9b87b833cb9bf70903..60141a34d8f6a22a13238934e66ba920f3ea99ac 100644 (file)
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@@ -109,8 +109,8 @@ void ggml_tallocr_alloc(ggml_tallocr_t alloc, struct ggml_tensor * tensor) {
          if (block->size >= size) {
              best_fit_block = alloc->n_free_blocks - 1;
          } else {
-            fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
-                    __func__, size, max_avail);
+            fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, largest block available %zu)\n",
+                    __func__, tensor->name, size, max_avail);
              GGML_ASSERT(!"not enough space in the buffer");
              return;
          }
author	slaren <redacted>
	Wed, 24 Jan 2024 11:48:14 +0000 (12:48 +0100)
committer	Georgi Gerganov <redacted>
	Sat, 27 Jan 2024 15:19:51 +0000 (17:19 +0200)