case 1: wtype = GGML_TYPE_F16; break;
case 2: wtype = GGML_TYPE_Q4_0; break;
case 3: wtype = GGML_TYPE_Q4_1; break;
+ case 5: wtype = GGML_TYPE_Q4_2; break;
+ case 6: wtype = GGML_TYPE_Q4_3; break;
default:
{
fprintf(stderr, "%s: invalid model file '%s' (bad f16 value %d)\n",
}
if (0) {
- static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
+ static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", "q4_2", "q4_3" };
printf("%24s - [%5lld, %5lld], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
}
case 1: bpe = ggml_type_size(GGML_TYPE_F16); break;
case 2: bpe = ggml_type_size(GGML_TYPE_Q4_0); assert(ne[0] % 64 == 0); break;
case 3: bpe = ggml_type_size(GGML_TYPE_Q4_1); assert(ne[0] % 64 == 0); break;
+ case 5: bpe = ggml_type_size(GGML_TYPE_Q4_2); assert(ne[0] % 64 == 0); break;
+ case 6: bpe = ggml_type_size(GGML_TYPE_Q4_3); assert(ne[0] % 64 == 0); break;
default:
{
fprintf(stderr, "%s: unknown ftype %d in model file\n", __func__, ftype);
switch (itype) {
case 2: type = GGML_TYPE_Q4_0; break;
case 3: type = GGML_TYPE_Q4_1; break;
+ case 5: type = GGML_TYPE_Q4_2; break;
+ case 6: type = GGML_TYPE_Q4_3; break;
default: fprintf(stderr, "%s: invalid quantization type %d\n", __func__, itype); return 1;
};
- if (type != GGML_TYPE_Q4_0 && type != GGML_TYPE_Q4_1) {
+ if (!ggml_is_quantized(type)) {
fprintf(stderr, "%s: invalid quantization type %d\n", __func__, type);
return false;
}
finp.read (&name[0], length);
{
- static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
+ static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", "q4_2", "q4_3" };
printf("%24s - [%5d, %5d], type = %6s ", name.data(), ne[0], ne[1], ftype_str[ftype]);
}
{
cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
} break;
+ case GGML_TYPE_Q4_2:
+ {
+ cur_size = ggml_quantize_q4_2(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
+ } break;
+ case GGML_TYPE_Q4_3:
+ {
+ cur_size = ggml_quantize_q4_3(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
+ } break;
default:
{
fprintf(stderr, "%s: unsupported quantization type %d\n", __func__, type);
fprintf(stderr, "usage: %s model-f32.bin model-quant.bin type\n", argv[0]);
fprintf(stderr, " type = 2 - q4_0\n");
fprintf(stderr, " type = 3 - q4_1\n");
+ fprintf(stderr, " type = 5 - q4_2\n");
+ fprintf(stderr, " type = 6 - q4_3\n");
return 1;
}
case 1: wtype = GGML_TYPE_F16; break;
case 2: wtype = GGML_TYPE_Q4_0; break;
case 3: wtype = GGML_TYPE_Q4_1; break;
+ case 5: wtype = GGML_TYPE_Q4_2; break;
+ case 6: wtype = GGML_TYPE_Q4_3; break;
default:
{
fprintf(stderr, "%s: invalid model file '%s' (bad f16 value %d)\n",
}
if (0) {
- static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
+ static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", "q4_2", "q4_3" };
printf("%24s - [%5lld, %5lld], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
}
case 1: bpe = ggml_type_size(GGML_TYPE_F16); break;
case 2: bpe = ggml_type_size(GGML_TYPE_Q4_0); assert(ne[0] % 64 == 0); break;
case 3: bpe = ggml_type_size(GGML_TYPE_Q4_1); assert(ne[0] % 64 == 0); break;
+ case 5: bpe = ggml_type_size(GGML_TYPE_Q4_2); assert(ne[0] % 64 == 0); break;
+ case 6: bpe = ggml_type_size(GGML_TYPE_Q4_3); assert(ne[0] % 64 == 0); break;
default:
{
fprintf(stderr, "%s: unknown ftype %d in model file\n", __func__, ftype);
switch (itype) {
case 2: type = GGML_TYPE_Q4_0; break;
case 3: type = GGML_TYPE_Q4_1; break;
+ case 5: type = GGML_TYPE_Q4_2; break;
+ case 6: type = GGML_TYPE_Q4_3; break;
default: fprintf(stderr, "%s: invalid quantization type %d\n", __func__, itype); return 1;
};
- if (type != GGML_TYPE_Q4_0 && type != GGML_TYPE_Q4_1) {
+ if (!ggml_is_quantized(type)) {
fprintf(stderr, "%s: invalid quantization type %d\n", __func__, type);
return false;
}
finp.read (&name[0], length);
{
- static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
+ static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", "q4_2", "q4_3" };
printf("%48s - [%5d, %5d], type = %6s ", name.data(), ne[0], ne[1], ftype_str[ftype]);
}
{
cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
} break;
+ case GGML_TYPE_Q4_2:
+ {
+ cur_size = ggml_quantize_q4_2(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
+ } break;
+ case GGML_TYPE_Q4_3:
+ {
+ cur_size = ggml_quantize_q4_3(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
+ } break;
default:
{
fprintf(stderr, "%s: unsupported quantization type %d\n", __func__, type);
fprintf(stderr, "usage: %s model-f32.bin model-quant.bin type\n", argv[0]);
fprintf(stderr, " type = 2 - q4_0\n");
fprintf(stderr, " type = 3 - q4_1\n");
+ fprintf(stderr, " type = 5 - q4_2\n");
+ fprintf(stderr, " type = 6 - q4_3\n");
return 1;
}
}
if (0) {
- static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", "q4_2", };
+ static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", "q4_2", "q4_3" };
printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
}
finp.read (&name[0], length);
{
- static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", "q4_2" };
+ static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", "q4_2", "q4_3" };
printf("%64s - [%5d, %5d], type = %6s ", name.data(), ne[0], ne[1], ftype_str[ftype]);
}
switch (itype) {
case 2: type = GGML_TYPE_Q4_0; break;
case 3: type = GGML_TYPE_Q4_1; break;
+ case 5: type = GGML_TYPE_Q4_2; break;
+ case 6: type = GGML_TYPE_Q4_3; break;
default: fprintf(stderr, "%s: invalid quantization type %d\n", __func__, itype); return 1;
};
- if (type != GGML_TYPE_Q4_0 && type != GGML_TYPE_Q4_1) {
+ if (!ggml_is_quantized(type)) {
fprintf(stderr, "%s: invalid quantization type %d\n", __func__, type);
return false;
}
finp.read (&name[0], length);
{
- static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
+ static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", "q4_2", "q4_3" };
printf("%48s - [%5d, %5d, %5d], type = %6s ", name.data(), ne[0], ne[1], ne[2], ftype_str[ftype]);
}
{
cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
} break;
+ case GGML_TYPE_Q4_2:
+ {
+ cur_size = ggml_quantize_q4_2(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
+ } break;
+ case GGML_TYPE_Q4_3:
+ {
+ cur_size = ggml_quantize_q4_3(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
+ } break;
default:
{
fprintf(stderr, "%s: unsupported quantization type %d\n", __func__, type);
fprintf(stderr, "usage: %s model-f32.bin model-quant.bin type\n", argv[0]);
fprintf(stderr, " type = 2 - q4_0\n");
fprintf(stderr, " type = 3 - q4_1\n");
+ fprintf(stderr, " type = 5 - q4_2\n");
+ fprintf(stderr, " type = 6 - q4_3\n");
return 1;
}