#include <stdio.h>
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
-#define GGML_FILE_VERSION 1
+#define GGML_FILE_VERSION 2
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
GGML_OP_CLAMP,
GGML_OP_CONV_TRANSPOSE_1D,
GGML_OP_IM2COL,
+ GGML_OP_IM2COL_BACK,
GGML_OP_CONV_TRANSPOSE_2D,
GGML_OP_POOL_1D,
GGML_OP_POOL_2D,
+ GGML_OP_POOL_2D_BACK,
GGML_OP_UPSCALE, // nearest interpolate
GGML_OP_PAD,
GGML_OP_ARANGE,
float min,
float max);
+ // im2col
+ // converts data into a format that effectively results in a convolution when combined with matrix multiplication
GGML_API struct ggml_tensor * ggml_im2col(
struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- int s0,
- int s1,
- int p0,
- int p1,
- int d0,
- int d1,
- bool is_2D,
- enum ggml_type dst_type);
+ struct ggml_tensor * a, // convolution kernel
+ struct ggml_tensor * b, // data
+ int s0, // stride dimension 0
+ int s1, // stride dimension 1
+ int p0, // padding dimension 0
+ int p1, // padding dimension 1
+ int d0, // dilation dimension 0
+ int d1, // dilation dimension 1
+ bool is_2D,
+ enum ggml_type dst_type);
+
+ GGML_API struct ggml_tensor * ggml_im2col_back(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a, // convolution kernel
+ struct ggml_tensor * b, // gradient of im2col output
+ int64_t * ne, // shape of im2col input
+ int s0, // stride dimension 0
+ int s1, // stride dimension 1
+ int p0, // padding dimension 0
+ int p1, // padding dimension 1
+ int d0, // dilation dimension 0
+ int d1, // dilation dimension 1
+ bool is_2D);
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- int s0,
- int s1,
- int p0,
- int p1,
- int d0,
- int d1);
+ struct ggml_tensor * a, // convolution kernel
+ struct ggml_tensor * b, // data
+ int s0, // stride dimension 0
+ int s1, // stride dimension 1
+ int p0, // padding dimension 0
+ int p1, // padding dimension 1
+ int d0, // dilation dimension 0
+ int d1); // dilation dimension 1
GGML_API struct ggml_tensor * ggml_conv_1d(
struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
+ struct ggml_tensor * a, // convolution kernel
+ struct ggml_tensor * b, // data
int s0, // stride
int p0, // padding
int d0); // dilation
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- int s,
- int d);
+ struct ggml_tensor * a, // convolution kernel
+ struct ggml_tensor * b, // data
+ int s, // stride
+ int d); // dilation
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- int s0,
- int p0,
- int d0);
+ struct ggml_tensor * a, // convolution kernel
+ struct ggml_tensor * b, // data
+ int s0, // stride
+ int p0, // padding
+ int d0); // dilation
GGML_API struct ggml_tensor * ggml_conv_2d(
struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- int s0,
- int s1,
- int p0,
- int p1,
- int d0,
- int d1);
+ struct ggml_tensor * a, // convolution kernel
+ struct ggml_tensor * b, // data
+ int s0, // stride dimension 0
+ int s1, // stride dimension 1
+ int p0, // padding dimension 0
+ int p1, // padding dimension 1
+ int d0, // dilation dimension 0
+ int d1); // dilation dimension 1
// kernel size is a->ne[0] x a->ne[1]
float p0,
float p1);
+ GGML_API struct ggml_tensor * ggml_pool_2d_back(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * af, // "a"/input used in forward pass
+ enum ggml_op_pool op,
+ int k0,
+ int k1,
+ int s0,
+ int s1,
+ float p0,
+ float p1);
+
// nearest interpolate
// multiplies ne0 and ne1 by scale factor
// used in stable-diffusion
"CLAMP",
"CONV_TRANSPOSE_1D",
"IM2COL",
+ "IM2COL_BACK",
"CONV_TRANSPOSE_2D",
"POOL_1D",
"POOL_2D",
+ "POOL_2D_BACK",
"UPSCALE",
"PAD",
"ARANGE",
"CROSS_ENTROPY_LOSS_BACK",
};
-static_assert(GGML_OP_COUNT == 76, "GGML_OP_COUNT != 76");
+static_assert(GGML_OP_COUNT == 78, "GGML_OP_COUNT != 78");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
"clamp(x)",
"conv_transpose_1d(x)",
"im2col(x)",
+ "im2col_back(x)",
"conv_transpose_2d(x)",
"pool_1d(x)",
"pool_2d(x)",
+ "pool_2d_back(x)",
"upscale(x)",
"pad(x)",
"arange(start, stop, step)",
"cross_entropy_loss_back(x,y)",
};
-static_assert(GGML_OP_COUNT == 76, "GGML_OP_COUNT != 76");
+static_assert(GGML_OP_COUNT == 78, "GGML_OP_COUNT != 78");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
size_t data_size = ggml_row_size(type, ne[0]);
for (int i = 1; i < n_dims; i++) {
+ assert(ne[i] > 0);
data_size *= ne[i];
}
}
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
+ GGML_ASSERT(obj_new);
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
bool is_node = false;
if (!inplace && (a->grad || b->grad)) {
- // TODO: support backward pass for broadcasting
- GGML_ASSERT(ggml_are_same_shape(a, b));
is_node = true;
}
GGML_ASSERT(a->ne[2] == b->ne[2]);
} else {
GGML_ASSERT(a->ne[1] == b->ne[1]);
+ GGML_ASSERT(b->ne[3] == 1);
}
bool is_node = false;
- if (a->grad || b->grad) {
- GGML_ABORT("fatal error"); // TODO: implement backward
+ if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
is_node = true;
}
const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
+ GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
+ GGML_ASSERT((OW > 0) && "b too small compared to a");
+
const int64_t ne[4] = {
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
OW,
return result;
}
+struct ggml_tensor * ggml_im2col_back(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ int64_t * ne,
+ int s0,
+ int s1,
+ int p0,
+ int p1,
+ int d0,
+ int d1,
+ bool is_2D) {
+
+ bool is_node = false;
+
+ if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
+ int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
+ ggml_set_op_params(result, params, sizeof(params));
+
+ result->op = GGML_OP_IM2COL_BACK;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = a;
+ result->src[1] = b;
+
+ return result;
+}
+
// a: [OC,IC, KH, KW]
// b: [N, IC, IH, IW]
// result: [N, OC, OH, OW]
int p1,
int d0,
int d1) {
- struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N, OH, OW, IC * KH * KW]
+ struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
struct ggml_tensor * result =
ggml_mul_mat(ctx,
bool is_node = false;
if (a->grad) {
- GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
struct ggml_tensor * result;
- const int64_t ne[3] = {
+ const int64_t ne[4] = {
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
a->ne[2],
+ a->ne[3],
};
- result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
+ result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
ggml_set_op_params(result, params, sizeof(params));
return result;
}
+struct ggml_tensor * ggml_pool_2d_back(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * af,
+ enum ggml_op_pool op,
+ int k0,
+ int k1,
+ int s0,
+ int s1,
+ float p0,
+ float p1) {
+
+ bool is_node = false;
+
+ if (a->grad) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * result;
+ result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
+
+ int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
+ ggml_set_op_params(result, params, sizeof(params));
+
+ result->op = GGML_OP_POOL_2D_BACK;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = a;
+ result->src[1] = af;
+ return result;
+}
+
// ggml_upscale
static struct ggml_tensor * ggml_upscale_impl(
}
}
+// ggml_compute_forward_im2col_f32
// src0: kernel [OC, IC, KH, KW]
// src1: image [N, IC, IH, IW]
// dst: result [N, OH, OW, IC*KH*KW]
const struct ggml_tensor * src0 = dst->src[0];
const struct ggml_tensor * src1 = dst->src[1];
- GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
int ofs0 = is_2D ? nb13 : nb12;
int ofs1 = is_2D ? nb12 : nb11;
- GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb10 == sizeof(float));
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
}
+// ggml_compute_forward_im2col_f16
// src0: kernel [OC, IC, KH, KW]
// src1: image [N, IC, IH, IW]
// dst: result [N, OH, OW, IC*KH*KW]
}
}
+// ggml_compute_forward_im2col_back_f32
+
+static void ggml_compute_forward_im2col_back_f32(
+ const struct ggml_compute_params * params,
+ struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
+ GGML_ASSERT(src1->type == GGML_TYPE_F32);
+ GGML_ASSERT( dst->type == GGML_TYPE_F32);
+
+ GGML_TENSOR_BINARY_OP_LOCALS;
+
+ const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
+ const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
+ const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
+ const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
+ const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
+ const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
+ const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
+
+ const int ith = params->ith;
+ const int nth = params->nth;
+
+ const int64_t N = is_2D ? ne3 : ne2;
+ const int64_t IC = is_2D ? ne2 : ne1;
+ const int64_t IH = is_2D ? ne1 : 1;
+ const int64_t IW = ne0;
+
+ const int64_t KH = is_2D ? ne01 : 1;
+ const int64_t KW = ne00;
+
+ const int64_t OH = is_2D ? ne12 : 1;
+ const int64_t OW = ne11;
+
+ int ofs0 = is_2D ? nb3 : nb2;
+ int ofs1 = is_2D ? nb2 : nb1;
+
+ GGML_ASSERT(nb0 == sizeof(float));
+
+ // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
+ {
+ float * const wdata = (float *) dst->data;
+
+ for (int64_t in = 0; in < N; in++) {
+ for (int64_t iic = ith; iic < IC; iic += nth) {
+ for (int64_t iih = 0; iih < IH; iih++) {
+ for (int64_t iiw = 0; iiw < IW; iiw++) {
+
+ // micro kernel
+ float grad = 0.0f;
+ for (int64_t ikh = 0; ikh < KH; ikh++) {
+ for (int64_t ikw = 0; ikw < KW; ikw++) {
+ // For s0 > 1 some values were skipped over in the forward pass.
+ // These values have tmpw % s0 != 0 and need to be skipped in the backwards pass as well.
+ const int64_t tmpw = (iiw + p0 - ikw*d0);
+ if (tmpw % s0 != 0) {
+ continue;
+ }
+ const int64_t iow = tmpw / s0;
+
+ // Equivalent logic as above except for s1.
+ int64_t ioh;
+ if (is_2D) {
+ const int64_t tmph = iih + p1 - ikh*d1;
+
+ if (tmph % s1 != 0) {
+ continue;
+ }
+
+ ioh = tmph / s1;
+ } else {
+ ioh = 0;
+ }
+
+ if (iow < 0 || iow >= OW || ioh < 0 || ioh >= OH) {
+ continue;
+ }
+
+ const float * const src_data = (const float *) src1->data
+ + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
+ grad += src_data[iic*(KH*KW) + ikh*KW + ikw];
+ }
+ }
+ float * dst_data = (float *)((char *) wdata + (in*ofs0 + iic*ofs1)); // [IH, IW]
+ dst_data[iih*IW + iiw] = grad;
+ }
+ }
+ }
+ }
+ }
+}
// ggml_compute_forward_conv_transpose_2d
}
}
+// ggml_compute_forward_pool_2d_back
+
+static void ggml_compute_forward_pool_2d_back(
+ const struct ggml_compute_params * params,
+ struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src = dst->src[0];
+ const struct ggml_tensor * dstf = dst->src[1]; // forward tensor of dst
+
+ assert(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
+
+ if (params->ith != 0) {
+ return;
+ }
+
+ const int32_t * opts = (const int32_t *)dst->op_params;
+ enum ggml_op_pool op = opts[0];
+ const int k0 = opts[1];
+ const int k1 = opts[2];
+ const int s0 = opts[3];
+ const int s1 = opts[4];
+ const int p0 = opts[5];
+ const int p1 = opts[6];
+
+ char * cdata = (char *) dst->data;
+ const char * cdataf = (const char *) dstf->data;
+ const char * const data_end = cdata + ggml_nbytes(dst);
+
+ GGML_ASSERT(params->ith == 0);
+ memset(cdata, 0, ggml_nbytes(dst));
+
+ const int64_t px = src->ne[0];
+ const int64_t py = src->ne[1];
+ const int64_t pa = px * py;
+
+ const float * splane = (const float *) src->data;
+
+ const int ka = k0 * k1;
+ const int offset0 = -p0;
+ const int offset1 = -p1;
+
+ while (cdata < data_end) {
+ for (int oy = 0; oy < py; ++oy) {
+ const float * const srow = splane + oy * px;
+ for (int ox = 0; ox < px; ++ox) {
+ const float grad0 = srow[ox];
+
+ const int ix = offset0 + ox * s0;
+ const int iy = offset1 + oy * s1;
+
+ if (op == GGML_OP_POOL_MAX) {
+ float maxval = -FLT_MAX;
+ int kxmax = -1;
+ int kymax = -1;
+
+ for (int ky = 0; ky < k1; ++ky) {
+ if (iy + ky < 0 || iy + ky >= dst->ne[1]) {
+ continue;
+ }
+ const void * drowf = (const void *)(cdataf + dst->nb[1] * (iy + ky));
+ for (int kx = 0; kx < k0; ++kx) {
+ int j = ix + kx;
+ if (j < 0 || j >= dst->ne[0]) {
+ continue;
+ }
+
+ const float val = dst->type == GGML_TYPE_F32 ?
+ ((const float *) drowf)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t *) drowf)[j]);
+ if (val <= maxval) {
+ continue;
+ }
+
+ maxval = val;
+ kxmax = kx;
+ kymax = ky;
+ }
+ }
+
+ if (kxmax == -1 || kymax == -1) {
+ continue;
+ }
+
+ void * drow = (void *)(cdata + dst->nb[1] * (iy + kymax));
+ const int j = ix + kxmax;
+ if (dst->type == GGML_TYPE_F32) {
+ ((float *) drow)[j] += grad0;
+ } else {
+ ((ggml_fp16_t *) drow)[j] = GGML_FP32_TO_FP16(grad0 + GGML_FP16_TO_FP32(((const ggml_fp16_t *) drow)[j]));
+ }
+ } else if (op == GGML_OP_POOL_AVG) {
+ const float grad = grad0 / ka;
+
+ for (int ky = 0; ky < k1; ++ky) {
+ if (iy + ky < 0 || iy + ky >= dst->ne[1]) {
+ continue;
+ }
+ void * drow = (void *)(cdata + dst->nb[1] * (iy + ky));
+ for (int kx = 0; kx < k0; ++kx) {
+ int j = ix + kx;
+ if (j < 0 || j >= dst->ne[0]) {
+ continue;
+ }
+
+ if (dst->type == GGML_TYPE_F32) {
+ ((float *) drow)[j] += grad;
+ } else {
+ ((ggml_fp16_t *) drow)[j] += GGML_FP32_TO_FP16(grad);
+ }
+ }
+ }
+ } else {
+ GGML_ASSERT(false);
+ }
+ }
+ }
+
+ cdata += dst->nb[2];
+ cdataf += dst->nb[2];
+ splane += pa;
+ }
+}
+
// ggml_compute_forward_upscale
static void ggml_compute_forward_upscale_f32(
{
ggml_compute_forward_im2col(params, tensor);
} break;
+ case GGML_OP_IM2COL_BACK:
+ {
+ ggml_compute_forward_im2col_back_f32(params, tensor);
+ } break;
case GGML_OP_CONV_TRANSPOSE_2D:
{
ggml_compute_forward_conv_transpose_2d(params, tensor);
{
ggml_compute_forward_pool_2d(params, tensor);
} break;
+ case GGML_OP_POOL_2D_BACK:
+ {
+ ggml_compute_forward_pool_2d_back(params, tensor);
+ } break;
case GGML_OP_UPSCALE:
{
ggml_compute_forward_upscale(params, tensor);
src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table);
}
if (src1->grad) {
- src1->grad = ggml_add_or_set(ctx, src1->grad, tensor->grad, zero_table);
+ if (ggml_are_same_shape(src0, src1)) {
+ src1->grad = ggml_add_or_set(ctx, src1->grad, tensor->grad, zero_table);
+ } else {
+ src1->grad = ggml_add_or_set(ctx, src1->grad, ggml_repeat_back(ctx, tensor->grad, src1), zero_table);
+ }
}
} break;
case GGML_OP_ADD1:
GGML_ABORT("fatal error"); // TODO: not implemented
}
case GGML_OP_IM2COL:
+ {
+ if (src1->grad) {
+ const int32_t s0 = ggml_get_op_params_i32(tensor, 0);
+ const int32_t s1 = ggml_get_op_params_i32(tensor, 1);
+ const int32_t p0 = ggml_get_op_params_i32(tensor, 2);
+ const int32_t p1 = ggml_get_op_params_i32(tensor, 3);
+ const int32_t d0 = ggml_get_op_params_i32(tensor, 4);
+ const int32_t d1 = ggml_get_op_params_i32(tensor, 5);
+ const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
+
+ src1->grad = ggml_add_or_set(ctx,
+ src1->grad,
+ ggml_im2col_back(ctx, src0, tensor->grad, src1->ne, s0, s1, p0, p1, d0, d1, is_2D),
+ zero_table);
+ }
+ } break;
+ case GGML_OP_IM2COL_BACK:
{
GGML_ABORT("fatal error"); // TODO: not implemented
}
GGML_ABORT("fatal error"); // TODO: not implemented
}
case GGML_OP_POOL_2D:
+ {
+ if (src0->grad) {
+ const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
+ const int32_t k0 = ggml_get_op_params_i32(tensor, 1);
+ const int32_t k1 = ggml_get_op_params_i32(tensor, 2);
+ const int32_t s0 = ggml_get_op_params_i32(tensor, 3);
+ const int32_t s1 = ggml_get_op_params_i32(tensor, 4);
+ const int32_t p0 = ggml_get_op_params_i32(tensor, 5);
+ const int32_t p1 = ggml_get_op_params_i32(tensor, 6);
+
+ src0->grad = ggml_add_or_set(ctx,
+ src0->grad,
+ ggml_pool_2d_back(ctx, tensor->grad, src0, op, k0, k1, s0, s1, p0, p1),
+ zero_table);
+ }
+ } break;
+ case GGML_OP_POOL_2D_BACK:
{
GGML_ABORT("fatal error"); // TODO: not implemented
}
void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
GGML_ASSERT(gf->n_nodes > 0);
+ GGML_ASSERT(gf->grads);
// if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph
if (keep) {
n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
} break;
case GGML_OP_IM2COL:
+ case GGML_OP_IM2COL_BACK:
case GGML_OP_CONV_TRANSPOSE_1D:
case GGML_OP_CONV_TRANSPOSE_2D:
{
} break;
case GGML_OP_POOL_1D:
case GGML_OP_POOL_2D:
+ case GGML_OP_POOL_2D_BACK:
{
n_tasks = 1;
} break;
const uint32_t type = tensor->type;
const uint32_t op = tensor->op;
+ const int32_t flags = tensor->flags;
fwrite(&type, sizeof(uint32_t), 1, fout);
fwrite(&op, sizeof(uint32_t), 1, fout);
+ fwrite(&flags, sizeof(int32_t), 1, fout);
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
const uint64_t ne = tensor->ne[j];
const uint32_t type = tensor->type;
const uint32_t op = tensor->op;
+ const int32_t flags = tensor->flags;
fwrite(&type, sizeof(uint32_t), 1, fout);
fwrite(&op, sizeof(uint32_t), 1, fout);
+ fwrite(&flags, sizeof(int32_t), 1, fout);
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
const uint64_t ne = tensor->ne[j];
}
}
}
+
+ // dump the data
+ // TODO: pad this to 32 byte boundary
+ if ((flags & GGML_TENSOR_FLAG_PARAM)) {
+ const size_t size = ggml_nbytes(tensor);
+
+ fwrite(tensor->data, sizeof(char), size, fout);
+ }
}
}
{
uint32_t type;
uint32_t op;
+ int32_t flags;
for (uint32_t i = 0; i < n_leafs; ++i) {
type = *(const uint32_t *) ptr; ptr += sizeof(type);
op = *(const uint32_t *) ptr; ptr += sizeof(op);
+ flags = *(const int32_t *) ptr; ptr += sizeof(flags);
int64_t ne[GGML_MAX_DIMS];
size_t nb[GGML_MAX_DIMS];
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
- tensor->op = (enum ggml_op) op;
+ tensor->op = (enum ggml_op) op;
+ tensor->flags = flags;
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
- tensor->data = (void *) ptr;
-
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
tensor->nb[j] = nb[j];
}
- result->leafs[i] = tensor;
+ tensor->data = (void *) ptr; ptr += ggml_nbytes(tensor);
- ptr += ggml_nbytes(tensor);
+ result->leafs[i] = tensor;
fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
}
{
uint32_t type;
uint32_t op;
+ int32_t flags;
for (uint32_t i = 0; i < n_nodes; ++i) {
type = *(const uint32_t *) ptr; ptr += sizeof(type);
op = *(const uint32_t *) ptr; ptr += sizeof(op);
+ flags = *(const int32_t *) ptr; ptr += sizeof(flags);
enum ggml_op eop = (enum ggml_op) op;
result->nodes[i] = tensor;
+ // TODO tensor data is be duplicated due to ggml_new_tensor call above
+ if (flags & GGML_TENSOR_FLAG_PARAM) {
+ tensor->data = (void *) ptr; ptr += ggml_nbytes(tensor);
+ }
+
fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
}
}
struct ggml_context * ctx,
struct ggml_opt_params params,
struct ggml_tensor * f) {
+ GGML_ASSERT(f->grad && "ggml_set_param called for at least one parent tensor.");
+
bool free_ctx = false;
if (ctx == NULL) {
struct ggml_init_params params_ctx = {
ggml_opt_callback callback,
void * callback_data) {
+ GGML_ASSERT(f->grad && "ggml_set_param must be called for at least one ancestor");
+
// build forward + backward compute graphs
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
void gguf_add_tensor(
struct gguf_context * ctx,
const struct ggml_tensor * tensor) {
+ GGML_ASSERT(tensor);
if (gguf_find_tensor(ctx, tensor->name) != -1) {
GGML_ABORT("duplicated tensor name");
}