From: Georgi Gerganov Date: Sat, 29 Apr 2023 07:30:56 +0000 (+0300) Subject: common : forgot to remove Q4_3 references X-Git-Tag: upstream/0.0.1642~1509 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=5dd92f421ee44f18b8fde0afbf5ca8fc7bf93841;p=pkg%2Fggml%2Fsources%2Fggml common : forgot to remove Q4_3 references --- diff --git a/examples/common-ggml.cpp b/examples/common-ggml.cpp index 6deee198..5835dd70 100644 --- a/examples/common-ggml.cpp +++ b/examples/common-ggml.cpp @@ -6,7 +6,6 @@ static const std::map GGML_FTYPE_MAP = { {"q4_0", GGML_FTYPE_MOSTLY_Q4_0}, {"q4_1", GGML_FTYPE_MOSTLY_Q4_1}, {"q4_2", GGML_FTYPE_MOSTLY_Q4_2}, - {"q4_3", GGML_FTYPE_MOSTLY_Q4_3}, {"q5_0", GGML_FTYPE_MOSTLY_Q5_0}, {"q5_1", GGML_FTYPE_MOSTLY_Q5_1}, {"q8_0", GGML_FTYPE_MOSTLY_Q8_0}, @@ -43,7 +42,6 @@ enum ggml_type ggml_ftype_to_ggml_type(const enum ggml_ftype ftype) { case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break; case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break; case GGML_FTYPE_MOSTLY_Q4_2: wtype = GGML_TYPE_Q4_2; break; - case GGML_FTYPE_MOSTLY_Q4_3: wtype = GGML_TYPE_Q4_3; break; case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break; case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break; case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break; @@ -71,7 +69,6 @@ bool ggml_common_quantize_0( case GGML_FTYPE_MOSTLY_Q4_0: qtype = GGML_TYPE_Q4_0; break; case GGML_FTYPE_MOSTLY_Q4_1: qtype = GGML_TYPE_Q4_1; break; case GGML_FTYPE_MOSTLY_Q4_2: qtype = GGML_TYPE_Q4_2; break; - case GGML_FTYPE_MOSTLY_Q4_3: qtype = GGML_TYPE_Q4_3; break; case GGML_FTYPE_MOSTLY_Q5_0: qtype = GGML_TYPE_Q5_0; break; case GGML_FTYPE_MOSTLY_Q5_1: qtype = GGML_TYPE_Q5_1; break; case GGML_FTYPE_MOSTLY_Q8_0: qtype = GGML_TYPE_Q8_0; break; @@ -200,10 +197,6 @@ bool ggml_common_quantize_0( { cur_size = ggml_quantize_q4_2(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data()); } break; - case GGML_TYPE_Q4_3: - { - cur_size = ggml_quantize_q4_3(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data()); - } break; case GGML_TYPE_Q5_0: { cur_size = ggml_quantize_q5_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data()); diff --git a/examples/common-ggml.h b/examples/common-ggml.h index 377a7fdb..2eb30a34 100644 --- a/examples/common-ggml.h +++ b/examples/common-ggml.h @@ -16,7 +16,6 @@ enum ggml_ftype { GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 GGML_FTYPE_MOSTLY_Q4_2 = 5, // except 1d tensors - GGML_FTYPE_MOSTLY_Q4_3 = 6, // except 1d tensors GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors diff --git a/examples/stablelm/main.cpp b/examples/stablelm/main.cpp index 3cf6b1cd..494b5e21 100644 --- a/examples/stablelm/main.cpp +++ b/examples/stablelm/main.cpp @@ -278,15 +278,15 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_ const int n_layer = hparams.n_layer; const int n_ctx = hparams.n_ctx; - const int n_mem = n_layer*n_ctx; - const int n_elements = n_embd*n_mem; + const int64_t n_mem = n_layer*n_ctx; + const int64_t n_elements = n_embd*n_mem; model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); - printf("%s: memory_size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem); + printf("%s: memory_size = %8.2f MB, n_mem = %lld\n", __func__, memory_size/1024.0/1024.0, n_mem); } // load weights