minor : clean-up some warnings and style (#5094)

author Georgi Gerganov <redacted>

Tue, 23 Jan 2024 12:12:57 +0000 (14:12 +0200)

committer GitHub <redacted>

Tue, 23 Jan 2024 12:12:57 +0000 (14:12 +0200)
author Georgi Gerganov <redacted>
Tue, 23 Jan 2024 12:12:57 +0000 (14:12 +0200)
committer GitHub <redacted>
Tue, 23 Jan 2024 12:12:57 +0000 (14:12 +0200)
diff --git a/common/common.cpp b/common/common.cpp

index 0a7096171f2b56e4cefe05c1db1b4745ac1a7b56..6b07f119718c0246a1585e52d780a254758cd518 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -216,12 +216,10 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
              }
              // store the external file name in params
              params.prompt_file = argv[i];
-            file.seekg(0, std::ios::end);
-            size_t size = file.tellg();
-            file.seekg(0, std::ios::beg);
-            params.prompt.resize(size);
-            file.read((char *)params.prompt.data(), size);
-            fprintf(stderr, "Read %zu bytes from binary file %s\n", size, argv[i]);
+            std::ostringstream ss;
+            ss << file.rdbuf();
+            params.prompt = ss.str();
+            fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), argv[i]);
          } else if (arg == "-f" || arg == "--file") {
              if (++i >= argc) {
                  invalid_param = true;
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp

index 6161fd858c29f2307be83f3ebc8fb96525bc8efc..4a0338a37677508068a992966ae7b89dac45023a 100644 (file)
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -2,18 +2,6 @@
  // so there might be still unnecessary artifacts hanging around
  // I'll gradually clean and extend it
  
-#include <cassert>
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iostream>
-#include <map>
-#include <regex>
-#include <stdexcept>
-#include <vector>
-#include <sstream>
-
  #include "clip.h"
  #include "ggml.h"
  #include "ggml-alloc.h"
@@ -30,6 +18,19 @@
  #define STB_IMAGE_IMPLEMENTATION
  #include "stb_image.h"
  
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <regex>
+#include <stdexcept>
+#include <vector>
+#include <sstream>
+#include <cinttypes>
+
  static std::string format(const char * fmt, ...) {
      va_list ap;
      va_list ap2;
@@ -217,9 +218,9 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
  
  static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") {
      size_t tensor_size = ggml_nbytes(tensor);
-    printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%d, %d, %d, %d], type: %d\n",
+    printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n",
              prefix, ggml_n_dims(tensor), tensor->name, tensor_size,
-            tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], tensor->type);
+            tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], ggml_type_name(tensor->type));
  }
  
  static projector_type clip_projector_type_from_string(const std::string & name) {
@@ -592,7 +593,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
                  mlp_3 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_3, 1, 0, 2, 3));
                  mlp_3 = ggml_reshape_4d(ctx0, mlp_3, n_patch, n_patch, mlp_3->ne[1], mlp_3->ne[2]);
                  // stride = 1, padding = 1, bias is nullptr
-                block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_1_block_0_0_w, mlp_3, nullptr, 1, 1, 1, 1, 1, 1);
+                block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_1_block_0_0_w, mlp_3, 1, 1, 1, 1, 1, 1);
  
                  // layer norm
                  // // block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
@@ -640,7 +641,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
              // block_2
              {
                  // stride = 2
-                block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_2_block_0_0_w, block_1, nullptr, 2, 2, 1, 1, 1, 1);
+                block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_2_block_0_0_w, block_1, 2, 2, 1, 1, 1, 1);
  
                  // block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
                  // layer norm
@@ -741,18 +742,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
      {
          std::map<enum ggml_type, uint32_t> n_type;
  
-        uint32_t n_type_max = 0;
-        enum ggml_type type_max = GGML_TYPE_F32;
-
          for (int i = 0; i < n_tensors; i++) {
              enum ggml_type type = gguf_get_tensor_type(ctx, i);
  
              n_type[type]++;
-
-            if (n_type_max < n_type[type]) {
-                n_type_max = n_type[type];
-                type_max   = type;
-            }
          }
  
          printf("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);
@@ -795,14 +788,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
              size_t tensor_size = ggml_nbytes(cur);
              buffer_size += tensor_size;
              if (verbosity >= 3) {
-                printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%d, %d, %d, %d], type: %d\n", __func__, i,
-                       ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], type);
+                printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n",
+                       __func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type));
              }
          }
      }
  
-
-
      buffer_size += n_tensors * 128 /* CLIP PADDING */;
  
      clip_ctx * new_clip = new clip_ctx;
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp

index 1b7f85f4985630ade477be4e70892faaf5d3d33b..de6d3eb4137b901f9ddf43e8d45214ae4909d051 100644 (file)
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -1202,11 +1202,11 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
      printf("Final Winogrande score(%d tasks): %.4lf +/- %.4lf\n", n_done, 100*p, sigma);
  }
  
-static bool deserialize_string(std::istream& in, std::string& str) {
+static bool deserialize_string(std::istream & in, std::string & str) {
      uint32_t size;
      if (!in.read((char *)&size, sizeof(size)).fail()) {
          str.resize(size);
-        if (!in.read((char *)str.data(), size).fail()) return true;
+        if (!in.read((char *)&str[0], size).fail()) return true;
      }
      return false;
  }
diff --git a/ggml.c b/ggml.c

index f85045c9c40ee74fd1ec2abc8074bfa83940dd28..ca98fde8ab2398974d578ed6e8f4350ed7cbf0ce 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -5368,14 +5368,12 @@ struct ggml_tensor * ggml_conv_depthwise_2d(
      struct ggml_context * ctx,
      struct ggml_tensor * a,
      struct ggml_tensor * b,
-    struct ggml_tensor * c,
      int                  s0,
      int                  s1,
      int                  p0,
      int                  p1,
      int                  d0,
      int                  d1) {
-
      struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
      struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
                                          ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
@@ -9991,7 +9989,7 @@ static void ggml_compute_forward_mul_mat(
              return;
          }
  
-        const int64_t tgemm0 = ggml_perf_time_us();
+        //const int64_t tgemm0 = ggml_perf_time_us();
          for (int64_t i13 = 0; i13 < ne13; i13++) {
              for (int64_t i12 = 0; i12 < ne12; i12++) {
                  const int64_t i03 = i13/r3;
@@ -16934,7 +16932,10 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
                      if (ggml_compute_forward_mul_mat_use_blas(node)) {
                          if (node->src[0]->type != GGML_TYPE_F32) {
                              // here we need memory for fully dequantized matrix from src0
-                            cur = ggml_type_size(GGML_TYPE_F32)*ggml_nelements(node->src[0]);
+                            // take into account that src0 can be broadcasted into src1[2,3]
+                            cur = ggml_type_size(GGML_TYPE_F32)
+                                * node->src[0]->ne[0]*node->src[0]->ne[1]
+                                * node->src[1]->ne[2]*node->src[1]->ne[3];
                          }
                      } else
  #endif
diff --git a/ggml.h b/ggml.h

index dca7bd9ceb0d505590e196def782e78e55937726..1c4976271677425c3e54c5cddb9b379f88367650 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -1499,7 +1499,6 @@ extern "C" {
              struct ggml_context * ctx,
              struct ggml_tensor  * a,
              struct ggml_tensor  * b,
-            struct ggml_tensor  * c,
              int                  s0,
              int                  s1,
              int                  p0,
diff --git a/llama.cpp b/llama.cpp

index f6f1ec0f403a89eaebe33e612ebac033b213601e..582e82260ea853178a3c8a1352c511bf1141ab70 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -2300,18 +2300,18 @@ struct llama_model_loader {
              }
  
              switch (type_max) {
-                case GGML_TYPE_F32:  ftype = LLAMA_FTYPE_ALL_F32;       break;
-                case GGML_TYPE_F16:  ftype = LLAMA_FTYPE_MOSTLY_F16;    break;
-                case GGML_TYPE_Q4_0: ftype = LLAMA_FTYPE_MOSTLY_Q4_0;   break;
-                case GGML_TYPE_Q4_1: ftype = LLAMA_FTYPE_MOSTLY_Q4_1;   break;
-                case GGML_TYPE_Q5_0: ftype = LLAMA_FTYPE_MOSTLY_Q5_0;   break;
-                case GGML_TYPE_Q5_1: ftype = LLAMA_FTYPE_MOSTLY_Q5_1;   break;
-                case GGML_TYPE_Q8_0: ftype = LLAMA_FTYPE_MOSTLY_Q8_0;   break;
-                case GGML_TYPE_Q2_K: ftype = LLAMA_FTYPE_MOSTLY_Q2_K;   break;
-                case GGML_TYPE_Q3_K: ftype = LLAMA_FTYPE_MOSTLY_Q3_K_M; break;
-                case GGML_TYPE_Q4_K: ftype = LLAMA_FTYPE_MOSTLY_Q4_K_M; break;
-                case GGML_TYPE_Q5_K: ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M; break;
-                case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K;   break;
+                case GGML_TYPE_F32:     ftype = LLAMA_FTYPE_ALL_F32;        break;
+                case GGML_TYPE_F16:     ftype = LLAMA_FTYPE_MOSTLY_F16;     break;
+                case GGML_TYPE_Q4_0:    ftype = LLAMA_FTYPE_MOSTLY_Q4_0;    break;
+                case GGML_TYPE_Q4_1:    ftype = LLAMA_FTYPE_MOSTLY_Q4_1;    break;
+                case GGML_TYPE_Q5_0:    ftype = LLAMA_FTYPE_MOSTLY_Q5_0;    break;
+                case GGML_TYPE_Q5_1:    ftype = LLAMA_FTYPE_MOSTLY_Q5_1;    break;
+                case GGML_TYPE_Q8_0:    ftype = LLAMA_FTYPE_MOSTLY_Q8_0;    break;
+                case GGML_TYPE_Q2_K:    ftype = LLAMA_FTYPE_MOSTLY_Q2_K;    break;
+                case GGML_TYPE_Q3_K:    ftype = LLAMA_FTYPE_MOSTLY_Q3_K_M;  break;
+                case GGML_TYPE_Q4_K:    ftype = LLAMA_FTYPE_MOSTLY_Q4_K_M;  break;
+                case GGML_TYPE_Q5_K:    ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M;  break;
+                case GGML_TYPE_Q6_K:    ftype = LLAMA_FTYPE_MOSTLY_Q6_K;    break;
                  case GGML_TYPE_IQ2_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XXS; break;
                  case GGML_TYPE_IQ2_XS:  ftype = LLAMA_FTYPE_MOSTLY_IQ2_XS;  break;
                  default:
author	Georgi Gerganov <redacted>
	Tue, 23 Jan 2024 12:12:57 +0000 (14:12 +0200)
committer	GitHub <redacted>
	Tue, 23 Jan 2024 12:12:57 +0000 (14:12 +0200)
common/common.cpp		patch \| blob \| history
examples/llava/clip.cpp		patch \| blob \| history
examples/perplexity/perplexity.cpp		patch \| blob \| history
ggml.c		patch \| blob \| history
ggml.h		patch \| blob \| history
llama.cpp		patch \| blob \| history