// call with a worst-case graph to avoid buffer reallocations
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
// returns false if the buffer allocation failed
+// ggml_gallocr_resrve_n_size writes the buffer sizes per galloc buffer that would be allocated by ggml_gallocr_reserve_n to sizes
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
+GGML_API void ggml_gallocr_reserve_n_size(
+ ggml_gallocr_t galloc,
+ struct ggml_cgraph * graph,
+ const int * node_buffer_ids,
+ const int * leaf_buffer_ids,
+ size_t * sizes);
GGML_API bool ggml_gallocr_reserve_n(
ggml_gallocr_t galloc,
struct ggml_cgraph * graph,
// Utils
// Create a buffer and allocate all the tensors in a ggml_context
+// ggml_backend_alloc_ctx_tensors_from_buft_size returns the size of the buffer that would be allocated by ggml_backend_alloc_ctx_tensors_from_buft
+GGML_API size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
}
static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor * t) {
- return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated;
+ return t->data != NULL // tensor data already set externally
+ || t->buffer // tensor on external buffer (but not yet allocated)
+ || ggml_gallocr_is_own(galloc, t); // tensor will be allocated by galloc
}
// free the extra space at the end if the new tensor is smaller
}
}
-bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
+static bool ggml_gallocr_reserve_n_impl(
+ ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids, bool no_alloc) {
size_t min_hash_size = graph->n_nodes + graph->n_leafs;
// add 25% margin to avoid hash collisions
min_hash_size += min_hash_size / 4;
size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0;
if (cur_size > 0) {
GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n",
- __func__, ggml_backend_buft_name(galloc->bufts[i]),
- cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
+ __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
}
}
#endif
ggml_vbuffer_free(galloc->buffers[i]);
- galloc->buffers[i] = ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
- if (galloc->buffers[i] == NULL) {
- GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
- return false;
+ if (no_alloc) {
+ galloc->buffers[i] = NULL;
+ } else {
+ galloc->buffers[i] = ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
+ if (galloc->buffers[i] == NULL) {
+ GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
+ return false;
+ }
}
}
}
return true;
}
+void ggml_gallocr_reserve_n_size(
+ ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids, size_t * sizes) {
+ GGML_ASSERT(ggml_gallocr_reserve_n_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids, /*no_alloc =*/ true));
+ for (int i = 0; i < galloc->n_buffers; i++) {
+ sizes[i] = 0;
+ for (int c = 0; c < galloc->buf_tallocs[i]->n_chunks; c++) {
+ sizes[i] += galloc->buf_tallocs[i]->chunks[c]->max_size;
+ }
+ }
+}
+
+bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
+ return ggml_gallocr_reserve_n_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids, /*no_alloc =*/ false);
+}
+
bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL);
}
return true;
}
-ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
+static ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_impl(
+ struct ggml_context * ctx, ggml_backend_buffer_type_t buft, size_t * nbytes_total, bool no_alloc) {
GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
size_t alignment = ggml_backend_buft_get_alignment(buft);
ggml_backend_buffer_t * buffers = NULL;
size_t n_buffers = 0;
+ *nbytes_total = 0;
size_t cur_buf_size = 0;
struct ggml_tensor * first = ggml_get_first_tensor(ctx);
if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
// allocate tensors in the current buffer
- if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
+ if (!no_alloc && !alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
return NULL;
}
first = t;
+ *nbytes_total += cur_buf_size;
cur_buf_size = this_size;
} else {
cur_buf_size += this_size;
// allocate remaining tensors
if (cur_buf_size > 0) {
- if (!alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) {
+ *nbytes_total += cur_buf_size;
+ if (!no_alloc && !alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) {
return NULL;
}
}
+ if (no_alloc) {
+ return NULL;
+ }
+
if (n_buffers == 0) {
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: all tensors in the context are already allocated\n", __func__);
#endif
+ GGML_ASSERT(!buffers);
return NULL;
}
} else {
buffer = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
}
- free(buffers);
+ if (buffers) {
+ free(buffers); // can be NULL if context is empty or no_alloc
+ }
return buffer;
}
+size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
+ size_t nbytes_total = 0;
+ ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc=*/ true);
+ GGML_ASSERT(!buf);
+ return nbytes_total;
+}
+
+ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
+ size_t nbytes_total = 0;
+ return ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc =*/ false);
+}
+
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend));
}
}
ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
+ GGML_ASSERT(buft);
if (size == 0) {
// return a dummy buffer for zero-sized allocations
return ggml_backend_buffer_init(buft, {}, NULL, 0);
}
-
- GGML_ASSERT(buft);
return buft->iface.alloc_buffer(buft, size);
}
return NULL;
}
+ // FIXME JG: a multi_buffer has a non-zero size, according to the above comment get_base is not optional,
+ // I don't know whether the above comment is correct
+ if (!buffer->iface.get_base) {
+ return NULL;
+ }
+
void * base = buffer->iface.get_base(buffer);
GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL");
sched->is_alloc = false;
}
+void ggml_backend_sched_reserve_size(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph, size_t * sizes) {
+ GGML_ASSERT(sched);
+ GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
+ GGML_ASSERT(sizes);
+
+ ggml_backend_sched_reset(sched);
+
+ ggml_backend_sched_synchronize(sched);
+
+ ggml_backend_sched_split_graph(sched, measure_graph);
+
+ ggml_gallocr_reserve_n_size(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids, sizes);
+}
+
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
GGML_ASSERT(sched);
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);