.type_size = sizeof(int32_t),
.is_quantized = false,
},
+ [GGML_TYPE_I64] = {
+ .type_name = "i64",
+ .blck_size = 1,
+ .type_size = sizeof(int64_t),
+ .is_quantized = false,
+ },
+ [GGML_TYPE_F64] = {
+ .type_name = "f64",
+ .blck_size = 1,
+ .type_size = sizeof(double),
+ .is_quantized = false,
+ .nrows = 1,
+ },
[GGML_TYPE_F32] = {
.type_name = "f32",
.blck_size = 1,
case GGML_TYPE_I8:
case GGML_TYPE_I16:
case GGML_TYPE_I32:
+ case GGML_TYPE_I64:
+ case GGML_TYPE_F64:
case GGML_TYPE_COUNT:
{
GGML_ASSERT(false);
case GGML_TYPE_I8:
case GGML_TYPE_I16:
case GGML_TYPE_I32:
+ case GGML_TYPE_I64:
+ case GGML_TYPE_F64:
case GGML_TYPE_COUNT:
{
GGML_ASSERT(false);
I8 = 24
I16 = 25
I32 = 26
+ I64 = 27
+ F64 = 28
class GGUFEndian(IntEnum):
GGMLQuantizationType.I8: (1, 1),
GGMLQuantizationType.I16: (1, 2),
GGMLQuantizationType.I32: (1, 4),
+ GGMLQuantizationType.I64: (1, 8),
+ GGMLQuantizationType.F64: (1, 8),
}
n_bytes = n_elems * type_size // block_size
data_offs = int(start_offs + offset_tensor[0])
item_type: npt.DTypeLike
- if ggml_type == GGMLQuantizationType.F32:
+ if ggml_type == GGMLQuantizationType.F16:
+ item_count = n_elems
+ item_type = np.float16
+ elif ggml_type == GGMLQuantizationType.F32:
item_count = n_elems
item_type = np.float32
- elif ggml_type == GGMLQuantizationType.F16:
+ elif ggml_type == GGMLQuantizationType.F64:
item_count = n_elems
- item_type = np.float16
+ item_type = np.float64
elif ggml_type == GGMLQuantizationType.I8:
item_count = n_elems
item_type = np.int8
elif ggml_type == GGMLQuantizationType.I32:
item_count = n_elems
item_type = np.int32
+ elif ggml_type == GGMLQuantizationType.I64:
+ item_count = n_elems
+ item_type = np.int64
else:
item_count = n_bytes
item_type = np.uint8
for i in range(n_dims):
self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
if raw_dtype is None:
- if tensor_dtype == np.float32:
- dtype = GGMLQuantizationType.F32
- elif tensor_dtype == np.float16:
+ if tensor_dtype == np.float16:
dtype = GGMLQuantizationType.F16
+ elif tensor_dtype == np.float32:
+ dtype = GGMLQuantizationType.F32
+ elif tensor_dtype == np.float64:
+ dtype = GGMLQuantizationType.F64
elif tensor_dtype == np.int8:
dtype = GGMLQuantizationType.I8
elif tensor_dtype == np.int16:
dtype = GGMLQuantizationType.I16
elif tensor_dtype == np.int32:
dtype = GGMLQuantizationType.I32
+ elif tensor_dtype == np.int64:
+ dtype = GGMLQuantizationType.I64
else:
- raise ValueError("Only F32, F16, I8, I16, I32 tensors are supported for now")
+ raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
else:
dtype = raw_dtype
self.ti_data += self._pack("I", dtype)