]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
common : forgot to remove Q4_3 references
authorGeorgi Gerganov <redacted>
Sat, 29 Apr 2023 07:30:56 +0000 (10:30 +0300)
committerGeorgi Gerganov <redacted>
Sat, 29 Apr 2023 07:30:56 +0000 (10:30 +0300)
examples/common-ggml.cpp
examples/common-ggml.h
examples/stablelm/main.cpp

index 6deee19873802b17c7a093d4686d90bf97e82487..5835dd70d5927fc4846d0429719ef7070ea7c7cd 100644 (file)
@@ -6,7 +6,6 @@ static const std::map<std::string, enum ggml_ftype> GGML_FTYPE_MAP = {
     {"q4_0", GGML_FTYPE_MOSTLY_Q4_0},
     {"q4_1", GGML_FTYPE_MOSTLY_Q4_1},
     {"q4_2", GGML_FTYPE_MOSTLY_Q4_2},
-    {"q4_3", GGML_FTYPE_MOSTLY_Q4_3},
     {"q5_0", GGML_FTYPE_MOSTLY_Q5_0},
     {"q5_1", GGML_FTYPE_MOSTLY_Q5_1},
     {"q8_0", GGML_FTYPE_MOSTLY_Q8_0},
@@ -43,7 +42,6 @@ enum ggml_type ggml_ftype_to_ggml_type(const enum ggml_ftype ftype) {
         case GGML_FTYPE_MOSTLY_Q4_0:          wtype = GGML_TYPE_Q4_0;  break;
         case GGML_FTYPE_MOSTLY_Q4_1:          wtype = GGML_TYPE_Q4_1;  break;
         case GGML_FTYPE_MOSTLY_Q4_2:          wtype = GGML_TYPE_Q4_2;  break;
-        case GGML_FTYPE_MOSTLY_Q4_3:          wtype = GGML_TYPE_Q4_3;  break;
         case GGML_FTYPE_MOSTLY_Q5_0:          wtype = GGML_TYPE_Q5_0;  break;
         case GGML_FTYPE_MOSTLY_Q5_1:          wtype = GGML_TYPE_Q5_1;  break;
         case GGML_FTYPE_MOSTLY_Q8_0:          wtype = GGML_TYPE_Q8_0;  break;
@@ -71,7 +69,6 @@ bool ggml_common_quantize_0(
         case GGML_FTYPE_MOSTLY_Q4_0: qtype = GGML_TYPE_Q4_0; break;
         case GGML_FTYPE_MOSTLY_Q4_1: qtype = GGML_TYPE_Q4_1; break;
         case GGML_FTYPE_MOSTLY_Q4_2: qtype = GGML_TYPE_Q4_2; break;
-        case GGML_FTYPE_MOSTLY_Q4_3: qtype = GGML_TYPE_Q4_3; break;
         case GGML_FTYPE_MOSTLY_Q5_0: qtype = GGML_TYPE_Q5_0; break;
         case GGML_FTYPE_MOSTLY_Q5_1: qtype = GGML_TYPE_Q5_1; break;
         case GGML_FTYPE_MOSTLY_Q8_0: qtype = GGML_TYPE_Q8_0; break;
@@ -200,10 +197,6 @@ bool ggml_common_quantize_0(
                     {
                         cur_size = ggml_quantize_q4_2(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
                     } break;
-                case GGML_TYPE_Q4_3:
-                    {
-                        cur_size = ggml_quantize_q4_3(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
-                    } break;
                 case GGML_TYPE_Q5_0:
                     {
                         cur_size = ggml_quantize_q5_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
index 377a7fdb2f942aa5d55e93cc1c7435e9a41999c7..2eb30a342e01d4818f6072ea06f402d2094e2e3b 100644 (file)
@@ -16,7 +16,6 @@ enum ggml_ftype {
     GGML_FTYPE_MOSTLY_Q4_1 = 3,  // except 1d tensors
     GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
     GGML_FTYPE_MOSTLY_Q4_2 = 5,  // except 1d tensors
-    GGML_FTYPE_MOSTLY_Q4_3 = 6,  // except 1d tensors
     GGML_FTYPE_MOSTLY_Q8_0 = 7,  // except 1d tensors
     GGML_FTYPE_MOSTLY_Q5_0 = 8,  // except 1d tensors
     GGML_FTYPE_MOSTLY_Q5_1 = 9,  // except 1d tensors
index 3cf6b1cd79c4853eed77dd90ec226a9428ac02c5..494b5e2152c5fc6aea5966b8b9816f86886037b4 100644 (file)
@@ -278,15 +278,15 @@ bool stablelm_model_load(const std::string & fname, stablelm_model & model, gpt_
         const int n_layer = hparams.n_layer;
         const int n_ctx   = hparams.n_ctx;
 
-        const int n_mem      = n_layer*n_ctx;
-        const int n_elements = n_embd*n_mem;
+        const int64_t n_mem      = n_layer*n_ctx;
+        const int64_t n_elements = n_embd*n_mem;
 
         model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
         model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
 
         const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v);
 
-        printf("%s: memory_size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem);
+        printf("%s: memory_size = %8.2f MB, n_mem = %lld\n", __func__, memory_size/1024.0/1024.0, n_mem);
     }
 
     // load weights