set_param_lora(lora);
// measure data size
- struct ggml_allocr * alloc = NULL;
- alloc = ggml_allocr_new_measure(tensor_alignment);
- alloc_lora(alloc, lora);
+ size_t size = 0;
+ for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+ size += GGML_PAD(ggml_nbytes(t), tensor_alignment);
+ }
// allocate data
- lora->data.resize(ggml_allocr_max_size(alloc) + tensor_alignment);
- ggml_allocr_free(alloc);
+ struct ggml_allocr * alloc = NULL;
+ lora->data.resize(size + tensor_alignment);
alloc = ggml_allocr_new(lora->data.data(), lora->data.size(), tensor_alignment);
alloc_lora(alloc, lora);
ggml_allocr_free(alloc);
struct ggml_tensor * target_probs = ggml_new_tensor_3d(ctx_input, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
// measure required memory for input tensors
- alloc = ggml_allocr_new_measure(tensor_alignment);
- ggml_allocr_alloc(alloc, tokens_input);
- ggml_allocr_alloc(alloc, target_probs);
- size_t max_input_size = ggml_allocr_max_size(alloc) + tensor_alignment;
- ggml_allocr_free(alloc);
+ size_t max_input_size = GGML_PAD(ggml_nbytes(tokens_input), tensor_alignment) +
+ GGML_PAD(ggml_nbytes(target_probs), tensor_alignment) +
+ tensor_alignment;
printf("%s: input_size = %zu bytes (%.1f MB)\n", __func__, max_input_size, (float) max_input_size / (1024.0f*1024.0f));
// allocate input tensors
return result;
}
+struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx) {
+ struct ggml_object * obj = ctx->objects_begin;
+
+ char * const mem_buffer = ctx->mem_buffer;
+
+ while (obj != NULL) {
+ if (obj->type == GGML_OBJECT_TENSOR) {
+ return (struct ggml_tensor *)(mem_buffer + obj->offs);
+ }
+
+ obj = obj->next;
+ }
+
+ return NULL;
+}
+
+struct ggml_tensor * ggml_get_next_tensor(struct ggml_context * ctx, struct ggml_tensor * tensor) {
+ struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
+ obj = obj->next;
+
+ char * const mem_buffer = ctx->mem_buffer;
+
+ while (obj != NULL) {
+ if (obj->type == GGML_OBJECT_TENSOR) {
+ return (struct ggml_tensor *)(mem_buffer + obj->offs);
+ }
+
+ obj = obj->next;
+ }
+
+ return NULL;
+}
+
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
struct ggml_object * obj = ctx->objects_begin;
GGML_ASSERT(tensor->grad == NULL);
tensor->grad = ggml_dup_tensor(ctx, tensor);
+ ggml_format_name(tensor->grad, "%s (grad)", tensor->name);
}
// ggml_compute_forward_dup
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
+ // Context tensor enumeration and lookup
+ GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
+ GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);