std::vector<ggml_fp16_t> data_f16;
std::vector<float> data_f32;
- std::vector<int64_t> hist_all(1 << 4, 0);
-
while (true) {
int32_t n_dims;
int32_t length;
work.resize(nelements); // for quantization
size_t cur_size = 0;
- std::vector<int64_t> hist_cur(1 << 4, 0);
-
switch ((ggml_type) ttype) {
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
{
- cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], hist_cur.data(), nullptr);
+ cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], nullptr);
} break;
case GGML_TYPE_F32:
case GGML_TYPE_F16:
fout.write(reinterpret_cast<char *>(work.data()), cur_size);
total_size_new += cur_size;
- printf("size = %8.2f MB -> %8.2f MB | hist: ", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
- for (int i = 0; i < (int) hist_cur.size(); ++i) {
- hist_all[i] += hist_cur[i];
- }
-
- for (int i = 0; i < (int) hist_cur.size(); ++i) {
- printf("%5.3f ", hist_cur[i] / (float)nelements);
- }
- printf("\n");
+ printf("size = %8.2f MB -> %8.2f MB\n", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
} else {
printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0);
fout.write(reinterpret_cast<char *>(data_u8.data()), data_u8.size());
printf("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
printf("%s: quant size = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype));
- {
- int64_t sum_all = 0;
- for (int i = 0; i < (int) hist_all.size(); ++i) {
- sum_all += hist_all[i];
- }
-
- printf("%s: hist: ", __func__);
- for (int i = 0; i < (int) hist_all.size(); ++i) {
- printf("%5.3f ", hist_all[i] / (float)sum_all);
- }
- printf("\n");
- }
-
return true;
}