////////////////////////////////////////////////////////////////////////////////
struct gguf_str {
- uint32_t n;
+ uint64_t n; // GGUFv2
char * data;
};
[GGUF_TYPE_FLOAT32] = sizeof(float),
[GGUF_TYPE_BOOL] = sizeof(bool),
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
+ [GGUF_TYPE_UINT64] = sizeof(uint64_t),
+ [GGUF_TYPE_INT64] = sizeof(int64_t),
+ [GGUF_TYPE_FLOAT64] = sizeof(double),
[GGUF_TYPE_ARRAY] = 0, // undefined
};
-static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
+static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = "u8",
[GGUF_TYPE_BOOL] = "bool",
[GGUF_TYPE_STRING] = "str",
[GGUF_TYPE_ARRAY] = "arr",
+ [GGUF_TYPE_UINT64] = "u64",
+ [GGUF_TYPE_INT64] = "i64",
+ [GGUF_TYPE_FLOAT64] = "f64",
};
-static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
+static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
union gguf_value {
uint8_t uint8;
uint32_t uint32;
int32_t int32;
float float32;
+ uint64_t uint64;
+ int64_t int64;
+ double float64;
bool bool_;
struct gguf_str str;
struct {
enum gguf_type type;
- uint32_t n;
+ uint64_t n; // GGUFv2
void * data;
} arr;
};
struct gguf_kv {
struct gguf_str key;
- uint32_t n_bytes; // TODO: is this actually needed?
-
enum gguf_type type;
union gguf_value value;
};
struct gguf_header {
uint32_t magic;
uint32_t version;
- uint32_t n_tensors;
- uint32_t n_kv;
+ uint64_t n_tensors; // GGUFv2
+ uint64_t n_kv; // GGUFv2
};
struct gguf_tensor_info {
struct gguf_str name;
uint32_t n_dims;
- uint32_t ne[GGML_MAX_DIMS];
+ uint64_t ne[GGML_MAX_DIMS];
enum ggml_type type;
return n == size;
}
-static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
+// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
+static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) {
p->n = 0;
p->data = NULL;
bool ok = true;
- // TODO: how to avoid mallocs for strings?
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
return ok;
}
+static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) {
+ p->n = 0;
+ p->data = NULL;
+
+ bool ok = true;
+
+ uint32_t n = 0;
+ ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n;
+ ok = ok && gguf_fread_el(file, p->data, p->n, offset);
+
+ return ok;
+}
+
struct gguf_context * gguf_init_empty(void) {
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
ctx->data = NULL;
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
- ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
- ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
+
+ if (ctx->header.version == 1) {
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
+ uint32_t n_tensors = 0;
+ uint32_t n_kv = 0;
+
+ ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset);
+ ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset);
+
+ ctx->header.n_tensors = n_tensors;
+ ctx->header.n_kv = n_kv;
+ } else {
+ ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
+ ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
+ }
if (!ok) {
fprintf(stderr, "%s: failed to read header\n", __func__);
}
}
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
+ bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur;
+ if (ctx->header.version == 1) {
+ gguf_fread_str = gguf_fread_str_v1;
+ }
+
// read the kv pairs
{
ctx->kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
- ok = ok && gguf_fread_str(file, &kv->key, &offset);
- //ok = ok && gguf_fread_el (file, &kv->n_bytes, sizeof(kv->n_bytes), &offset);
- ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
+ ok = ok && gguf_fread_str(file, &kv->key, &offset);
+ ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
+ case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
+ case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
+ case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
case GGUF_TYPE_ARRAY:
{
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
- ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
+
+ if (ctx->header.version == 1) {
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
+ uint32_t n = 0;
+ ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset);
+ kv->value.arr.n = n;
+ } else {
+ ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
+ }
switch (kv->value.arr.type) {
case GGUF_TYPE_UINT8:
case GGUF_TYPE_UINT32:
case GGUF_TYPE_INT32:
case GGUF_TYPE_FLOAT32:
+ case GGUF_TYPE_UINT64:
+ case GGUF_TYPE_INT64:
+ case GGUF_TYPE_FLOAT64:
case GGUF_TYPE_BOOL:
{
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
ok = ok && gguf_fread_str(file, &info->name, &offset);
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
for (uint32_t j = 0; j < info->n_dims; ++j) {
- ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
+ if (ctx->header.version == 1) {
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
+ uint32_t t = 0;
+ ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset);
+ info->ne[j] = t;
+ } else {
+ ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
+ }
}
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
return ctx->kv[i].value.float32;
}
+uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
+ return ctx->kv[i].value.uint64;
+}
+
+int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
+ return ctx->kv[i].value.int64;
+}
+
+double gguf_get_val_f64(struct gguf_context * ctx, int i) {
+ return ctx->kv[i].value.float64;
+}
+
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
return ctx->kv[i].value.bool_;
}
ctx->kv[idx].value.float32 = val;
}
+void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
+ const int idx = gguf_get_or_add_key(ctx, key);
+
+ ctx->kv[idx].type = GGUF_TYPE_UINT64;
+ ctx->kv[idx].value.uint64 = val;
+}
+
+void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
+ const int idx = gguf_get_or_add_key(ctx, key);
+
+ ctx->kv[idx].type = GGUF_TYPE_INT64;
+ ctx->kv[idx].value.int64 = val;
+}
+
+void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
+ const int idx = gguf_get_or_add_key(ctx, key);
+
+ ctx->kv[idx].type = GGUF_TYPE_FLOAT64;
+ ctx->kv[idx].value.float64 = val;
+}
+
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
const int idx = gguf_get_or_add_key(ctx, key);
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
+ case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
+ case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
+ case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
+ case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
+ case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
+ case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_UINT32:
case GGUF_TYPE_INT32:
case GGUF_TYPE_FLOAT32:
+ case GGUF_TYPE_UINT64:
+ case GGUF_TYPE_INT64:
+ case GGUF_TYPE_FLOAT64:
case GGUF_TYPE_BOOL:
{
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
#
GGUF_MAGIC = 0x46554747
-GGUF_VERSION = 1
+GGUF_VERSION = 2
GGUF_DEFAULT_ALIGNMENT = 32
# general
BOOL = 7
STRING = 8
ARRAY = 9
+ UINT64 = 10
+ INT64 = 11
+ FLOAT64 = 12
@staticmethod
def get_type(val):
return GGUFValueType.BOOL
elif isinstance(val, int):
return GGUFValueType.INT32
+ # TODO: need help with 64-bit types in Python
else:
print("Unknown type: "+str(type(val)))
sys.exit()
def write_header_to_file(self):
self.fout.write(struct.pack("<I", GGUF_MAGIC))
self.fout.write(struct.pack("<I", GGUF_VERSION))
- self.fout.write(struct.pack("<I", self.ti_data_count))
- self.fout.write(struct.pack("<I", self.kv_data_count))
+ self.fout.write(struct.pack("<Q", self.ti_data_count))
+ self.fout.write(struct.pack("<Q", self.kv_data_count))
self.flush()
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
self.add_key(key)
self.add_val(val, GGUFValueType.FLOAT32)
+ def add_uint64(self, key: str, val: int):
+ self.add_key(key)
+ self.add_val(val, GGUFValueType.UINT64)
+
+ def add_int64(self, key: str, val: int):
+ self.add_key(key)
+ self.add_val(val, GGUFValueType.INT64)
+
+ def add_float64(self, key: str, val: float):
+ self.add_key(key)
+ self.add_val(val, GGUFValueType.FLOAT64)
+
def add_bool(self, key: str, val: bool):
self.add_key(key)
self.add_val(val, GGUFValueType.BOOL)
self.kv_data += struct.pack("<i", val)
elif vtype == GGUFValueType.FLOAT32:
self.kv_data += struct.pack("<f", val)
+ elif vtype == GGUFValueType.UINT64:
+ self.kv_data += struct.pack("<Q", val)
+ elif vtype == GGUFValueType.INT64:
+ self.kv_data += struct.pack("<q", val)
+ elif vtype == GGUFValueType.FLOAT64:
+ self.kv_data += struct.pack("<d", val)
elif vtype == GGUFValueType.BOOL:
self.kv_data += struct.pack("?", val)
elif vtype == GGUFValueType.STRING:
encoded_val = val.encode("utf8") if isinstance(val, str) else val
- self.kv_data += struct.pack("<I", len(encoded_val))
+ self.kv_data += struct.pack("<Q", len(encoded_val))
self.kv_data += encoded_val
elif vtype == GGUFValueType.ARRAY:
ltype = set([GGUFValueType.get_type(item) for item in val])
assert len(ltype) == 1, "All items in a GGUF array should be of the same type"
self.kv_data += struct.pack("<I", list(ltype)[0])
- self.kv_data += struct.pack("<I", len(val))
+ self.kv_data += struct.pack("<Q", len(val))
for item in val:
self.add_val(item, add_vtype=False)
else:
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
encoded_name = name.encode("utf8")
- self.ti_data += struct.pack("<I", len(encoded_name))
+ self.ti_data += struct.pack("<Q", len(encoded_name))
self.ti_data += encoded_name
n_dims = len(tensor_shape)
self.ti_data += struct.pack("<I", n_dims)
for i in range(n_dims):
- self.ti_data += struct.pack("<I", tensor_shape[n_dims - 1 - i])
+ self.ti_data += struct.pack("<Q", tensor_shape[n_dims - 1 - i])
if raw_dtype is None:
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
else: