fix some warnings from gcc and clang-tidy (#3038)

author Cebtenzzre <redacted>

Thu, 7 Sep 2023 17:22:29 +0000 (13:22 -0400)

committer GitHub <redacted>

Thu, 7 Sep 2023 17:22:29 +0000 (13:22 -0400)
author Cebtenzzre <redacted>
Thu, 7 Sep 2023 17:22:29 +0000 (13:22 -0400)
committer GitHub <redacted>
Thu, 7 Sep 2023 17:22:29 +0000 (13:22 -0400)
diff --git a/.clang-tidy b/.clang-tidy

index 1a42b9abc79edbdd0fcd8069faf78cb31d760544..3078beaccd1d697ea550463fe9f70537569df391 100644 (file)
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -3,6 +3,7 @@ Checks: >
      bugprone-*,
      -bugprone-easily-swappable-parameters,
      -bugprone-implicit-widening-of-multiplication-result,
+    -bugprone-misplaced-widening-cast,
      -bugprone-narrowing-conversions,
      readability-*,
      -readability-avoid-unconditional-preprocessor-if,
@@ -15,4 +16,8 @@ Checks: >
      -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
      performance-*,
      portability-*,
+    misc-*,
+    -misc-const-correctness,
+    -misc-non-private-member-variables-in-classes,
+    -misc-no-recursion,
  FormatStyle: none
diff --git a/CMakeLists.txt b/CMakeLists.txt

index d4ed6179ea7a72f3deebc7337c12fcbba1d4381f..d4fa5c261d2cd77b96cfa4a344fe525220dc0929 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -426,7 +426,7 @@ if (LLAMA_ALL_WARNINGS)
          )
          if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
              # g++ only
-            set(cxx_flags ${cxx_flags} -Wno-format-truncation)
+            set(cxx_flags ${cxx_flags} -Wno-format-truncation -Wno-array-bounds)
          endif()
      else()
          # todo : msvc
diff --git a/Makefile b/Makefile

index 4f311ee2c4e30ddbb542b4cc7235da7ce09bcdd2..86e36ba52a0c0e16f5a54340698a20de1f76e244 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -134,7 +134,7 @@ MK_CXXFLAGS  += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-m
  
  ifeq '' '$(findstring clang++,$(CXX))'
         # g++ only
-       MK_CXXFLAGS += -Wno-format-truncation
+       MK_CXXFLAGS += -Wno-format-truncation -Wno-array-bounds
  endif
  
  # OS specific
diff --git a/common/common.cpp b/common/common.cpp

index 28b7c6300fa514d9651d5a7ed5b5b5f2cad7e85b..6e5d5b4d50757c1b71e6c5256749049edf90d786 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -57,7 +57,7 @@ int32_t get_num_physical_cores() {
              siblings.insert(line);
          }
      }
-    if (siblings.size() > 0) {
+    if (!siblings.empty()) {
          return static_cast<int32_t>(siblings.size());
      }
  #elif defined(__APPLE__) && defined(__MACH__)
diff --git a/common/common.h b/common/common.h

index 85ac0df9b5b3d7d2d2f409764b1c940cc4ac6de9..012bf5e136f213394888bd6d2e0e4d22a2a08285 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -20,6 +20,9 @@
  #define DIRECTORY_SEPARATOR '/'
  #endif // _WIN32
  
+#define die(msg)          do { fputs("error: " msg "\n", stderr);                  exit(1); } while (0)
+#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", ##__VA_ARGS__); exit(1); } while (0)
+
  //
  // CLI argument parsing
  //
diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp

index e76bd11c315983ffa4325a378be0a691ffebe7c6..177d1e3a834809044c0ba914bd92be2817a67c06 100644 (file)
--- a/common/grammar-parser.cpp
+++ b/common/grammar-parser.cpp
@@ -415,6 +415,7 @@ namespace grammar_parser {
  
      std::vector<const llama_grammar_element *> parse_state::c_rules() {
          std::vector<const llama_grammar_element *> ret;
+        ret.reserve(rules.size());
          for (const auto & rule : rules) {
              ret.push_back(rule.data());
          }
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

index 9e856c21a26c4b3da5e026312460d3578a4558d0..293b455d093c367a22bc9bc309b91bbf5908b614 100644 (file)
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -1,5 +1,6 @@
  #include "ggml.h"
  #include "llama.h"
+#include "common.h"
  
  #include <unordered_map>
  #include <vector>
@@ -499,10 +500,10 @@ struct llama_file {
          errno = 0;
          std::size_t ret = std::fread(ptr, size, 1, fp);
          if (ferror(fp)) {
-            throw std::runtime_error(format("read error: %s", strerror(errno)));
+            die_fmt("fread failed: %s", strerror(errno));
          }
          if (ret != 1) {
-            throw std::runtime_error(std::string("unexpectedly reached end of file"));
+            die("unexpectedly reached end of file");
          }
      }
  
@@ -597,8 +598,7 @@ void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab)
          printf("Assuming llama2.c vocabulary since %s is not a gguf file\n", filename);
          llama_file file(filename, "rb");
          if (!file.fp) {
-            fprintf(stderr, "error: %s: %s\n", strerror(errno), filename);
-            exit(1);
+            die_fmt("%s: %s", strerror(errno), filename);
          }
          const int  n_vocab = config->vocab_size;
          /* uint32_t max_token_length =  */ file.read_u32(); // unused
diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp

index 036bdb3987f34752ed7de593ba7d562355dc9d03..87aac3479003cf653a8290c3556db4d30a82556c 100644 (file)
--- a/examples/embd-input/embd-input-lib.cpp
+++ b/examples/embd-input/embd-input-lib.cpp
@@ -23,7 +23,7 @@ extern "C" {
  struct MyModel* create_mymodel(int argc, char ** argv) {
      gpt_params params;
  
-    if (gpt_params_parse(argc, argv, params) == false) {
+    if (!gpt_params_parse(argc, argv, params)) {
          return nullptr;
      }
  
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp

index 93d583b5ce15170358e648dc4251d55ca179feb6..49ab3e0635abb9dac2ffa050eb4eef49e068b819 100644 (file)
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -11,7 +11,7 @@
  int main(int argc, char ** argv) {
      gpt_params params;
  
-    if (gpt_params_parse(argc, argv, params) == false) {
+    if (!gpt_params_parse(argc, argv, params)) {
          return 1;
      }
  
diff --git a/examples/gptneox-wip/falcon-main.cpp b/examples/gptneox-wip/falcon-main.cpp

index d4b130b254c00e90bfef3ffb8df03c26b3e45644..7f9a1620b60bf7e70136233c6befc8021116f2b9 100644 (file)
--- a/examples/gptneox-wip/falcon-main.cpp
+++ b/examples/gptneox-wip/falcon-main.cpp
@@ -953,7 +953,7 @@ int main(int argc, char ** argv) {
  
      gpt_params params;
  
-    if (gpt_params_parse(argc, argv, params) == false) {
+    if (!gpt_params_parse(argc, argv, params)) {
          return 1;
      }
  
diff --git a/examples/gptneox-wip/gptneox-main.cpp b/examples/gptneox-wip/gptneox-main.cpp

index b6cc46c5f4299982c2757799e446627737df7541..55eba0cdcfdfb78885c7ef7852d8fdcb215d324c 100644 (file)
--- a/examples/gptneox-wip/gptneox-main.cpp
+++ b/examples/gptneox-wip/gptneox-main.cpp
@@ -925,7 +925,7 @@ int main(int argc, char ** argv) {
  
      gpt_params params;
  
-    if (gpt_params_parse(argc, argv, params) == false) {
+    if (!gpt_params_parse(argc, argv, params)) {
          return 1;
      }
  
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index 9201b53bd9a74488513d8495176f3eee469130d6..c9ca7719bf01f0ad86ead84723041858dc273811 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -48,8 +48,9 @@ static bool is_interacting = false;
  
  void write_logfile(
      const llama_context * ctx, const gpt_params & params, const llama_model * model,
-    const std::vector<llama_token> input_tokens, const std::string output, const std::vector<llama_token> output_tokens) {
-
+    const std::vector<llama_token> & input_tokens, const std::string & output,
+    const std::vector<llama_token> & output_tokens
+) {
      if (params.logdir.empty()) {
          return;
      }
@@ -109,7 +110,7 @@ int main(int argc, char ** argv) {
      gpt_params params;
      g_params = &params;
  
-    if (gpt_params_parse(argc, argv, params) == false) {
+    if (!gpt_params_parse(argc, argv, params)) {
          return 1;
      }
  
@@ -303,7 +304,7 @@ int main(int argc, char ** argv) {
  
      // debug message about similarity of saved session, if applicable
      size_t n_matching_session_tokens = 0;
-    if (session_tokens.size() > 0) {
+    if (!session_tokens.empty()) {
          for (llama_token id : session_tokens) {
              if (n_matching_session_tokens >= embd_inp.size() || id != embd_inp[n_matching_session_tokens]) {
                  break;
@@ -401,7 +402,7 @@ int main(int argc, char ** argv) {
  
          LOG_TEE("%s: interactive mode on.\n", __func__);
  
-        if (params.antiprompt.size()) {
+        if (!params.antiprompt.empty()) {
              for (const auto & antiprompt : params.antiprompt) {
                  LOG_TEE("Reverse prompt: '%s'\n", antiprompt.c_str());
              }
@@ -499,7 +500,7 @@ int main(int argc, char ** argv) {
  
      while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
          // predict
-        if (embd.size() > 0) {
+        if (!embd.empty()) {
              // Note: n_ctx - 4 here is to match the logic for commandline prompt handling via
              // --prompt or --file which uses the same value.
              int max_embd_size = n_ctx - 4;
@@ -624,7 +625,7 @@ int main(int argc, char ** argv) {
                  LOG("n_past = %d\n", n_past);
              }
  
-            if (embd.size() > 0 && !path_session.empty()) {
+            if (!embd.empty() && !path_session.empty()) {
                  session_tokens.insert(session_tokens.end(), embd.begin(), embd.end());
                  n_session_consumed = session_tokens.size();
              }
@@ -695,7 +696,7 @@ int main(int argc, char ** argv) {
          // if not currently processing queued inputs;
          if ((int) embd_inp.size() <= n_consumed) {
              // check for reverse prompt
-            if (params.antiprompt.size()) {
+            if (!params.antiprompt.empty()) {
                  std::string last_output;
                  for (auto id : last_tokens) {
                      last_output += llama_token_to_piece(ctx, id);
@@ -732,7 +733,7 @@ int main(int argc, char ** argv) {
                  LOG("found EOS token\n");
  
                  if (params.interactive) {
-                    if (params.antiprompt.size() != 0) {
+                    if (!params.antiprompt.empty()) {
                          // tokenize and inject first reverse prompt
                          const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
                          embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp

index 843b2ae3527f60edcd532827b25deaab0ce04cfb..1b760683b0b03fe737080a53d28bd7d2a4a828bc 100644 (file)
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -655,7 +655,7 @@ int main(int argc, char ** argv) {
      gpt_params params;
  
      params.n_batch = 512;
-    if (gpt_params_parse(argc, argv, params) == false) {
+    if (!gpt_params_parse(argc, argv, params)) {
          return 1;
      }
  
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp

index 06ce18f09a346a6f88df811e42f7e22eb1731309..6ce03ba7ba50b12e1a6ba5033fb7cedd4b36d7ac 100644 (file)
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -71,7 +71,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) {
  }
  
  // Check if a layer is included/excluded by command line
-bool layer_included(const quantize_stats_params params, const std::string & layer) {
+bool layer_included(const quantize_stats_params & params, const std::string & layer) {
      for (const auto& excluded : params.exclude_layers) {
          if (std::regex_search(layer, std::regex(excluded))) {
              return false;
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp

index c174be069a9226f0fbad792f9565516a365db588..1bf182482e5ae4e73b596c36297bc1d1430c7cac 100644 (file)
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -143,10 +143,9 @@ int main(int argc, char ** argv) {
          if (!try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) {
              fprintf(stderr, "%s: invalid ftype '%s'\n", __func__, argv[3]);
              return 1;
-        } else {
-            if (ftype_str == "COPY") {
-               params.only_copy = true;
-            }
+        }
+        if (ftype_str == "COPY") {
+           params.only_copy = true;
          }
          arg_idx++;
      }
diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp

index 573bc4ef988a69e60b6d51de27ded2c1291d5629..14e9501ca601b71557d39caa51cfe914ed12fbaf 100644 (file)
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@@ -13,7 +13,7 @@ int main(int argc, char ** argv) {
      params.repeat_last_n = 64;
      params.prompt = "The quick brown fox";
  
-    if (gpt_params_parse(argc, argv, params) == false) {
+    if (!gpt_params_parse(argc, argv, params)) {
          return 1;
      }
  
@@ -44,7 +44,7 @@ int main(int argc, char ** argv) {
          llama_free_model(model);
          return 1;
      }
-    auto tokens = llama_tokenize(ctx, params.prompt.c_str(), true);
+    auto tokens = llama_tokenize(ctx, params.prompt, true);
      auto n_prompt_tokens = tokens.size();
      if (n_prompt_tokens < 1) {
          fprintf(stderr, "%s : failed to tokenize prompt\n", __func__);
diff --git a/examples/server/server.cpp b/examples/server/server.cpp

index 6b606447da749040ae043d644253f5d0d96ae5bb..3f3c646503c79f1b60dcc0eace6986a6c7cc3a4c 100644 (file)
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -139,7 +139,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
  }
  
  // convert a vector of completion_token_output to json
-static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> probs)
+static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> & probs)
  {
      json out = json::array();
      for (const auto &prob : probs)
@@ -271,7 +271,7 @@ struct llama_server_context
          return true;
      }
  
-    std::vector<llama_token> tokenize(json json_prompt, bool add_bos)
+    std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
      {
          // If `add_bos` is true, we only add BOS, when json_prompt is a string,
          // or the first element of the json_prompt array is a string.
@@ -611,7 +611,7 @@ struct llama_server_context
  
      completion_token_output doCompletion()
      {
-        const completion_token_output token_with_probs = nextToken();
+        auto token_with_probs = nextToken();
  
          const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(ctx, token_with_probs.tok);
          generated_text += token_text;
@@ -1255,7 +1255,7 @@ void beam_search_callback(void * callback_data, llama_beams_state beams_state) {
  struct token_translator {
      llama_context * ctx;
      std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
-    std::string operator()(completion_token_output cto) const { return (*this)(cto.tok); }
+    std::string operator()(const completion_token_output & cto) const { return (*this)(cto.tok); }
  };
  
  void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) {
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp

index 6fe85d419618f99ca7ab763bfffba434850fc369..947aa7ed3bd3e625669dbaa4503def9760eb7051 100644 (file)
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -169,10 +169,6 @@ struct my_llama_hparams {
  
      float rope_freq_base  = 10000.0f;
      float rope_freq_scale = 1.0f;
-
-    bool operator!=(const my_llama_hparams& other) const {
-        return memcmp(this, &other, sizeof(my_llama_hparams));
-    }
  };
  
  struct my_llama_layer {
@@ -929,28 +925,6 @@ void get_example_targets_batch(struct llama_context * lctx, const int * train_sa
      }
  }
  
-
-#ifdef __GNUC__
-#ifdef __MINGW32__
-__attribute__((format(gnu_printf, 1, 2)))
-#else
-__attribute__((format(printf, 1, 2)))
-#endif
-#endif
-static std::string format(const char * fmt, ...) {
-    va_list ap, ap2;
-    va_start(ap, fmt);
-    va_copy(ap2, ap);
-    int size = vsnprintf(NULL, 0, fmt, ap);
-    GGML_ASSERT(size >= 0 && size < INT_MAX);
-    std::vector<char> buf(size + 1);
-    int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
-    GGML_ASSERT(size2 == size);
-    va_end(ap2);
-    va_end(ap);
-    return std::string(buf.data(), size);
-}
-
  int tokenize_file(struct llama_context * lctx, const char * filename, std::vector<llama_token>& out) {
      FILE * fp = std::fopen(filename, "rb");
      if (fp == NULL) {
@@ -983,10 +957,10 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
      out.resize(size+1);
  
      if (std::fread(buf.data(), size, 1, fp) != 1) {
-        throw std::runtime_error(std::string("unexpectedly reached end of file"));
+        die("unexpectedly reached end of file");
      }
      if (ferror(fp)) {
-        throw std::runtime_error(format("read error: %s", strerror(errno)));
+        die_fmt("fread failed: %s", strerror(errno));
      }
  
      buf[size] = '\0';
@@ -1047,11 +1021,11 @@ void shuffle_ints(int * begin, int * end) {
      if (kid >= 0) { \
          enum gguf_type ktype = gguf_get_kv_type(ctx, kid); \
          if (ktype != (type)) { \
-            throw std::runtime_error(format("key %s has wrong type: %s", skey.c_str(), gguf_type_name(ktype))); \
+            die_fmt("key %s has wrong type: %s", skey.c_str(), gguf_type_name(ktype)); \
          } \
          (dst) = func(ctx, kid); \
      } else if (req) { \
-        throw std::runtime_error(format("key not found in model: %s", skey.c_str())); \
+        die_fmt("key not found in model: %s", skey.c_str()); \
      } \
  }
  
@@ -1136,7 +1110,7 @@ void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_g
          read_tensor_by_name(opt->lbfgs.lms,  f_ggml_ctx, LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_S);
          read_tensor_by_name(opt->lbfgs.lmy,  f_ggml_ctx, LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_Y);
      } else {
-        throw std::runtime_error("unknown optimizer type\n");
+        die("unknown optimizer type");
      }
  }
  
@@ -1315,20 +1289,20 @@ void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_mod
  
          const int token_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_LIST));
          if (token_idx == -1) {
-            throw std::runtime_error("cannot find tokenizer vocab in model file\n");
+            die("cannot find tokenizer vocab in model file");
          }
          const uint32_t n_vocab = gguf_get_arr_n(vctx, token_idx);
  
          const int score_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_SCORES));
          if (score_idx == -1) {
-            throw std::runtime_error("cannot find tokenizer scores in model file\n");
+            die("cannot find tokenizer scores in model file");
          }
  
          const float * scores = (const float * ) gguf_get_arr_data(vctx, score_idx);
  
          const int toktype_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE));
          if (toktype_idx == -1) {
-            throw std::runtime_error("cannot find token type list in GGUF file\n");
+            die("cannot find token type list in GGUF file");
          }
  
          const int * toktypes = (const int * ) gguf_get_arr_data(vctx, toktype_idx);
@@ -1356,7 +1330,7 @@ void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_mod
              // read and copy bpe merges
              const int merges_keyidx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_MERGES));
              if (merges_keyidx == -1) {
-                throw std::runtime_error("cannot find tokenizer merges in model file\n");
+                die("cannot find tokenizer merges in model file");
              }
  
              const int n_merges = gguf_get_arr_n(vctx, merges_keyidx);
@@ -1988,7 +1962,7 @@ void opt_callback(void * vdata, float * sched) {
      float min_sched = params->adam_min_alpha / params->adam_alpha;
      *sched = min_sched + *sched * (1.0f - min_sched);
  
-    int impr_plot = std::isnan(opt->loss_after) ? 0 : -(int)(1 + (opt->loss_before - opt->loss_after) * 10.0f + 0.5f);
+    int impr_plot = std::isnan(opt->loss_after) ? 0 : -std::lround(1 + (opt->loss_before - opt->loss_after) * 10.0f);
      printf("%s: iter=%*d, sched=%f loss0=%f loss=%f | improvement: %*d>\n", __func__, 6, opt->iter, *sched, opt->loss_before, opt->loss_after, impr_plot, (int)0);
  
      if (data->shuffle_countdown < n_batch) {
diff --git a/ggml-alloc.c b/ggml-alloc.c

index c1939a4b7817b63a173c2b453729592c1457327c..a896601d1f03a1f5a639c7f7d755aece33acfe9b 100644 (file)
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@@ -138,7 +138,7 @@ static bool ggml_allocr_is_own(struct ggml_allocr * alloc, const struct ggml_ten
  
  void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
  #ifdef GGML_ALLOCATOR_DEBUG
-    GGML_ASSERT(ggml_is_view(tensor) == false); // views generally get data pointer from one of their sources
+    GGML_ASSERT(!ggml_is_view(tensor)); // views generally get data pointer from one of their sources
      GGML_ASSERT(tensor->data == NULL); // avoid allocating tensor which already has memory allocated
  #endif
      size_t size = ggml_allocr_get_alloc_size(alloc, tensor);
@@ -165,14 +165,14 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
      if (best_fit_block == -1) {
          // the last block is our last resort
          struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1];
+        max_avail = MAX(max_avail, block->size);
          if (block->size >= size) {
              best_fit_block = alloc->n_free_blocks - 1;
-            max_avail = MAX(max_avail, block->size);
          } else {
              fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
                      __func__, size, max_avail);
              GGML_ASSERT(!"not enough space in the buffer");
-        return;
+            return;
          }
      }
      struct free_block * block = &alloc->free_blocks[best_fit_block];
diff --git a/ggml.c b/ggml.c

index 50adf18ec7f425301b063034555a295f51cf91ee..8a677ab2a3294223fde198b733e16cb5e5d801bc 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -4768,7 +4768,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
  
      size_t obj_alloc_size = 0;
  
-    if (view_src == NULL && ctx->no_alloc == false) {
+    if (view_src == NULL && !ctx->no_alloc) {
          if (ctx->scratch.data != NULL) {
              // allocate tensor data in the scratch buffer
              if (ctx->scratch.offs + data_size > ctx->scratch.size) {
@@ -5469,7 +5469,7 @@ static struct ggml_tensor * ggml_mul_impl(
      }
  
      if (inplace) {
-        GGML_ASSERT(is_node == false);
+        GGML_ASSERT(!is_node);
      }
  
      struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
@@ -5512,7 +5512,7 @@ static struct ggml_tensor * ggml_div_impl(
      }
  
      if (inplace) {
-        GGML_ASSERT(is_node == false);
+        GGML_ASSERT(!is_node);
      }
  
      struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
@@ -19957,7 +19957,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
  
          struct ggml_tensor * data = NULL;
  
-        if (params.no_alloc == false) {
+        if (!params.no_alloc) {
              data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
  
              ok = ok && data != NULL;
@@ -19998,7 +19998,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
              }
  
              // point the data member to the appropriate location in the binary blob using the tensor infos
-            if (params.no_alloc == false) {
+            if (!params.no_alloc) {
                //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
                  cur->data = (char *) data->data + ctx->infos[i].offset;               // offset from data
              }
diff --git a/llama.cpp b/llama.cpp

index 2c9071a8f55c67b7e8005977cb602ca2cf09c7a0..208dcef0e08cfe911628546b0cdfe0422dfdc632 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -3052,33 +3052,10 @@ static bool llama_is_control_token(const llama_vocab & vocab, llama_token id) {
      return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_CONTROL;
  }
  
-static bool llama_is_user_defined_token(const llama_vocab & vocab, llama_token id) {
-    return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_USER_DEFINED;
-}
-
-static bool llama_is_unused_token(const llama_vocab & vocab, llama_token id) {
-    return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_UNUSED;
-}
-
  static bool llama_is_byte_token(const llama_vocab & vocab, llama_token id) {
      return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_BYTE;
  }
  
-static bool llama_is_bos_token(const llama_vocab & vocab, llama_token id) {
-    GGML_ASSERT(llama_is_control_token(vocab, id));
-    return id == vocab.special_bos_id;
-}
-
-static bool llama_is_eos_token(const llama_vocab & vocab, llama_token id ) {
-    GGML_ASSERT(llama_is_control_token(vocab, id));
-    return id == vocab.special_eos_id;
-}
-
-static bool llama_is_pad_token(const llama_vocab & vocab, llama_token id ) {
-    GGML_ASSERT(id < 0 || llama_is_control_token(vocab, id));
-    return id == vocab.special_pad_id;
-}
-
  static uint8_t llama_token_to_byte(const llama_vocab & vocab, llama_token id) {
      GGML_ASSERT(llama_is_byte_token(vocab, id));
      const auto& token_data = vocab.id_to_token.at(id);
@@ -4800,9 +4777,11 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
      std::vector<std::thread> workers;
      std::mutex mutex;
  
+#ifdef GGML_USE_K_QUANTS
      auto use_more_bits = [] (int i_layer, int num_layers) -> bool {
          return i_layer < num_layers/8 || i_layer >= 7*num_layers/8 || (i_layer - num_layers/8)%3 == 2;
      };
+#endif
  
      int idx = 0;
  
@@ -5947,7 +5926,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
          rng_ss.str(std::string(&rng_buf[0], rng_size));
          rng_ss >> ctx->rng;
  
-        GGML_ASSERT(rng_ss.fail() == false);
+        GGML_ASSERT(!rng_ss.fail());
      }
  
      // set logits
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp

index 0bb9537f693ed22ba58bb642d7f124cd2373e8c8..cbea7d4525ca4c7479c48d73fc936aa030a2b537 100644 (file)
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -76,7 +76,7 @@ void * align_with_offset(void * ptr, int offset) {
      return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
  }
  
-void benchmark_function(size_t size, size_t q_size, int64_t iterations, std::function<size_t(void)> function) {
+void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<size_t(void)> & function) {
      int64_t min_time_us = INT64_MAX;
      int64_t total_time_us = 0;
      int64_t min_time_cycles = INT64_MAX;
author	Cebtenzzre <redacted>
	Thu, 7 Sep 2023 17:22:29 +0000 (13:22 -0400)
committer	GitHub <redacted>
	Thu, 7 Sep 2023 17:22:29 +0000 (13:22 -0400)
.clang-tidy		patch \| blob \| history
CMakeLists.txt		patch \| blob \| history
Makefile		patch \| blob \| history
common/common.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
common/grammar-parser.cpp		patch \| blob \| history
examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp		patch \| blob \| history
examples/embd-input/embd-input-lib.cpp		patch \| blob \| history
examples/embedding/embedding.cpp		patch \| blob \| history
examples/gptneox-wip/falcon-main.cpp		patch \| blob \| history
examples/gptneox-wip/gptneox-main.cpp		patch \| blob \| history
examples/main/main.cpp		patch \| blob \| history
examples/perplexity/perplexity.cpp		patch \| blob \| history
examples/quantize-stats/quantize-stats.cpp		patch \| blob \| history
examples/quantize/quantize.cpp		patch \| blob \| history
examples/save-load-state/save-load-state.cpp		patch \| blob \| history
examples/server/server.cpp		patch \| blob \| history
examples/train-text-from-scratch/train-text-from-scratch.cpp		patch \| blob \| history
ggml-alloc.c		patch \| blob \| history
ggml.c		patch \| blob \| history
llama.cpp		patch \| blob \| history
tests/test-quantize-perf.cpp		patch \| blob \| history