check C++ code with -Wmissing-declarations (#3184)

author Cebtenzzre <redacted>

Fri, 15 Sep 2023 19:38:27 +0000 (15:38 -0400)

committer GitHub <redacted>

Fri, 15 Sep 2023 19:38:27 +0000 (15:38 -0400)
author Cebtenzzre <redacted>
Fri, 15 Sep 2023 19:38:27 +0000 (15:38 -0400)
committer GitHub <redacted>
Fri, 15 Sep 2023 19:38:27 +0000 (15:38 -0400)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 3283f7b656db9d9705b4beb0d0cf89cfbd899d10..abecd684ba2fc0b9d6ea7fa49691c1442873ff88 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -427,6 +427,7 @@ if (LLAMA_ALL_WARNINGS)
              -Wextra
              -Wpedantic
              -Wcast-qual
+            -Wmissing-declarations
              -Wno-unused-function
              -Wno-multichar
          )
diff --git a/Makefile b/Makefile

index 7ab1b7a0958b09d6926f219650fad529aad31076..98bf8845c8a84fe2953a586aa2df05faf1f2c7e9 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -172,9 +172,16 @@ endif # LLAMA_DISABLE_LOGS
  # warnings
  MK_CFLAGS    += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
                                 -Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
-MK_CXXFLAGS  += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
+MK_CXXFLAGS  += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
  
-ifeq '' '$(findstring clang,$(shell $(CXX) --version))'
+# TODO(cebtenzzre): remove this once PR #2632 gets merged
+TTFS_CXXFLAGS = $(CXXFLAGS) -Wno-missing-declarations
+
+ifneq '' '$(findstring clang,$(shell $(CXX) --version))'
+       # clang++ only
+       MK_CXXFLAGS   += -Wmissing-prototypes
+       TTFS_CXXFLAGS += -Wno-missing-prototypes
+else
         # g++ only
         MK_CXXFLAGS += -Wno-format-truncation -Wno-array-bounds
  endif
@@ -524,7 +531,7 @@ gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
  
  train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o $(OBJS)
-       $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+       $(CXX) $(TTFS_CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
  
  convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
diff --git a/common/common.cpp b/common/common.cpp

index 9969cb97d3c2aea44bab66d71544c737f574b080..02ec0f8d0861a3d2da69ae38a1530a2a179c09fc 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -78,7 +78,7 @@ int32_t get_num_physical_cores() {
      return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
  }
  
-void process_escapes(std::string& input) {
+static void process_escapes(std::string& input) {
      std::size_t input_len = input.length();
      std::size_t output_idx = 0;
  
diff --git a/common/console.cpp b/common/console.cpp

index 23545e5be84dc81062b5a75a69b7fcfc98a842c0..f65cbc6eda0b1d1e4f45ab976fb8868be33b6c79 100644 (file)
--- a/common/console.cpp
+++ b/common/console.cpp
@@ -158,7 +158,7 @@ namespace console {
          }
      }
  
-    char32_t getchar32() {
+    static char32_t getchar32() {
  #if defined(_WIN32)
          HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
          wchar_t high_surrogate = 0;
@@ -212,7 +212,7 @@ namespace console {
  #endif
      }
  
-    void pop_cursor() {
+    static void pop_cursor() {
  #if defined(_WIN32)
          if (hConsole != NULL) {
              CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
@@ -233,7 +233,7 @@ namespace console {
          putc('\b', out);
      }
  
-    int estimateWidth(char32_t codepoint) {
+    static int estimateWidth(char32_t codepoint) {
  #if defined(_WIN32)
          (void)codepoint;
          return 1;
@@ -242,7 +242,7 @@ namespace console {
  #endif
      }
  
-    int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
+    static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
  #if defined(_WIN32)
          CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
          if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
@@ -303,7 +303,7 @@ namespace console {
  #endif
      }
  
-    void replace_last(char ch) {
+    static void replace_last(char ch) {
  #if defined(_WIN32)
          pop_cursor();
          put_codepoint(&ch, 1, 1);
@@ -312,7 +312,7 @@ namespace console {
  #endif
      }
  
-    void append_utf8(char32_t ch, std::string & out) {
+    static void append_utf8(char32_t ch, std::string & out) {
          if (ch <= 0x7F) {
              out.push_back(static_cast<unsigned char>(ch));
          } else if (ch <= 0x7FF) {
@@ -333,7 +333,7 @@ namespace console {
      }
  
      // Helper function to remove the last UTF-8 character from a string
-    void pop_back_utf8_char(std::string & line) {
+    static void pop_back_utf8_char(std::string & line) {
          if (line.empty()) {
              return;
          }
@@ -349,7 +349,7 @@ namespace console {
          line.erase(pos);
      }
  
-    bool readline_advanced(std::string & line, bool multiline_input) {
+    static bool readline_advanced(std::string & line, bool multiline_input) {
          if (out != stdout) {
              fflush(stdout);
          }
@@ -452,7 +452,7 @@ namespace console {
          return has_more;
      }
  
-    bool readline_simple(std::string & line, bool multiline_input) {
+    static bool readline_simple(std::string & line, bool multiline_input) {
  #if defined(_WIN32)
          std::wstring wline;
          if (!std::getline(std::wcin, wline)) {
diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp

index 177d1e3a834809044c0ba914bd92be2817a67c06..5a545a8076460a55ed83ab4437f387f8aee7d31e 100644 (file)
--- a/common/grammar-parser.cpp
+++ b/common/grammar-parser.cpp
@@ -9,7 +9,7 @@
  namespace grammar_parser {
      // NOTE: assumes valid utf8 (but checks for overrun)
      // copied from llama.cpp
-    std::pair<uint32_t, const char *> decode_utf8(const char * src) {
+    static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
          static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
          uint8_t  first_byte = static_cast<uint8_t>(*src);
          uint8_t  highbits   = first_byte >> 4;
@@ -24,19 +24,19 @@ namespace grammar_parser {
          return std::make_pair(value, pos);
      }
  
-    uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
+    static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
          uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
          auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
          return result.first->second;
      }
  
-    uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
+    static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
          uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
          state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
          return next_id;
      }
  
-    void add_rule(
+    static void add_rule(
              parse_state & state,
              uint32_t      rule_id,
              const std::vector<llama_grammar_element> & rule) {
@@ -46,11 +46,11 @@ namespace grammar_parser {
          state.rules[rule_id] = rule;
      }
  
-    bool is_word_char(char c) {
+    static bool is_word_char(char c) {
          return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
      }
  
-    std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
+    static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
          const char * pos   = src;
          const char * end   = src + size;
          uint32_t     value = 0;
@@ -73,7 +73,7 @@ namespace grammar_parser {
          return std::make_pair(value, pos);
      }
  
-    const char * parse_space(const char * src, bool newline_ok) {
+    static const char * parse_space(const char * src, bool newline_ok) {
          const char * pos = src;
          while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
                  (newline_ok && (*pos == '\r' || *pos == '\n'))) {
@@ -88,7 +88,7 @@ namespace grammar_parser {
          return pos;
      }
  
-    const char * parse_name(const char * src) {
+    static const char * parse_name(const char * src) {
          const char * pos = src;
          while (is_word_char(*pos)) {
              pos++;
@@ -99,7 +99,7 @@ namespace grammar_parser {
          return pos;
      }
  
-    std::pair<uint32_t, const char *> parse_char(const char * src) {
+    static std::pair<uint32_t, const char *> parse_char(const char * src) {
          if (*src == '\\') {
              switch (src[1]) {
                  case 'x': return parse_hex(src + 2, 2);
@@ -129,7 +129,7 @@ namespace grammar_parser {
              uint32_t            rule_id,
              bool                is_nested);
  
-    const char * parse_sequence(
+    static const char * parse_sequence(
              parse_state                        & state,
              const char                         * src,
              const std::string                  & rule_name,
@@ -247,7 +247,7 @@ namespace grammar_parser {
          return pos;
      }
  
-    const char * parse_rule(parse_state & state, const char * src) {
+    static const char * parse_rule(parse_state & state, const char * src) {
          const char * name_end = parse_name(src);
          const char * pos      = parse_space(name_end, false);
          size_t       name_len = name_end - src;
@@ -285,7 +285,7 @@ namespace grammar_parser {
          }
      }
  
-    void print_grammar_char(FILE * file, uint32_t c) {
+    static void print_grammar_char(FILE * file, uint32_t c) {
          if (0x20 <= c && c <= 0x7f) {
              fprintf(file, "%c", static_cast<char>(c));
          } else {
@@ -294,7 +294,7 @@ namespace grammar_parser {
          }
      }
  
-    bool is_char_element(llama_grammar_element elem) {
+    static bool is_char_element(llama_grammar_element elem) {
          switch (elem.type) {
              case LLAMA_GRETYPE_CHAR:           return true;
              case LLAMA_GRETYPE_CHAR_NOT:       return true;
@@ -304,7 +304,7 @@ namespace grammar_parser {
          }
      }
  
-    void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
+    static void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
          for (auto elem : rule) {
              switch (elem.type) {
                  case LLAMA_GRETYPE_END:            fprintf(file, "END");            break;
@@ -334,7 +334,7 @@ namespace grammar_parser {
          fprintf(file, "\n");
      }
  
-    void print_rule(
+    static void print_rule(
              FILE     * file,
              uint32_t   rule_id,
              const std::vector<llama_grammar_element> & rule,
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp

index a99ece9a66fd1cf0992706b7ee259b1a17653bf7..ed61125eaa4da4fd6340b52235ae968b7fb2e3e9 100644 (file)
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -9,12 +9,12 @@
  #endif
  
  #ifdef LLAMA_DEFAULT_RMS_EPS
-static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
+constexpr float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
  #else
-static const float rms_norm_eps = 5e-6f;
+constexpr float rms_norm_eps = 5e-6f;
  #endif
  
-float frand() {
+static float frand() {
      return (float)rand()/(float)RAND_MAX;
  }
  
@@ -25,19 +25,21 @@ struct random_normal_distribution {
      float max;
  };
  
-void init_random_normal_distribution(struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max) {
+static void init_random_normal_distribution(
+    struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max
+) {
      rnd->gen = std::mt19937(seed);
      rnd->nd = std::normal_distribution<float>{mean, std};
      rnd->min = min;
      rnd->max = max;
  }
  
-float frand_normal(struct random_normal_distribution * rnd) {
+static float frand_normal(struct random_normal_distribution * rnd) {
      const float r = rnd->nd(rnd->gen);
      return ((r < rnd->min) ? (rnd->min) : (r > rnd->max) ? (rnd->max) : r);
  }
  
-void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph, int n_threads) {
+static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph, int n_threads) {
      struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
  
      if (plan.work_size > 0) {
@@ -48,13 +50,9 @@ void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph,
      ggml_graph_compute(graph, &plan);
  }
  
-struct ggml_tensor * randomize_tensor(
-        struct ggml_tensor * tensor,
-        int ndims,
-        const int64_t ne[],
-        float fmin,
-        float fmax) {
-
+static struct ggml_tensor * randomize_tensor(
+    struct ggml_tensor * tensor, int ndims, const int64_t ne[], float fmin, float fmax
+) {
      switch (ndims) {
          case 1:
              for (int i0 = 0; i0 < ne[0]; i0++) {
@@ -95,11 +93,9 @@ struct ggml_tensor * randomize_tensor(
      return tensor;
  }
  
-struct ggml_tensor * randomize_tensor_normal(
-        struct ggml_tensor * tensor,
-        int ndims,
-        const int64_t ne[],
-        struct random_normal_distribution * rnd) {
+static struct ggml_tensor * randomize_tensor_normal(
+    struct ggml_tensor * tensor, int ndims, const int64_t ne[], struct random_normal_distribution * rnd
+) {
      float scale = 1.0; // xavier
      switch (ndims) {
          case 1:
@@ -159,7 +155,7 @@ struct llama_hparams {
      }
  };
  
-uint32_t get_n_ff(const struct llama_hparams* hparams) {
+static uint32_t get_n_ff(const struct llama_hparams* hparams) {
      const uint32_t n_ff = ((2*(4*hparams->n_embd)/3 + hparams->n_mult - 1)/hparams->n_mult)*hparams->n_mult;
      return n_ff;
  }
@@ -260,7 +256,7 @@ struct llama_model_lora {
      std::vector<llama_layer_lora> layers;
  };
  
-void init_model(struct llama_model * model) {
+static void init_model(struct llama_model * model) {
      const auto & hparams = model->hparams;
  
      const uint32_t n_embd  = hparams.n_embd;
@@ -297,7 +293,7 @@ void init_model(struct llama_model * model) {
  }
  
  
-void init_model_lora(struct llama_model_lora * model) {
+static void init_model_lora(struct llama_model_lora * model) {
      const auto & hparams = model->hparams;
  
      const uint32_t n_embd  = hparams.n_embd;
@@ -340,7 +336,7 @@ void init_model_lora(struct llama_model_lora * model) {
      }
  }
  
-void set_param_model(struct llama_model * model) {
+static void set_param_model(struct llama_model * model) {
      const auto& hparams = model->hparams;
  
      const uint32_t n_layer = hparams.n_layer;
@@ -366,7 +362,7 @@ void set_param_model(struct llama_model * model) {
      }
  }
  
-void set_param_model_lora(struct llama_model_lora * model) {
+static void set_param_model_lora(struct llama_model_lora * model) {
      const auto& hparams = model->hparams;
  
      const uint32_t n_layer = hparams.n_layer;
@@ -397,7 +393,7 @@ void set_param_model_lora(struct llama_model_lora * model) {
      }
  }
  
-void randomize_model(struct llama_model * model, int seed, float mean, float std, float min, float max) {
+static void randomize_model(struct llama_model * model, int seed, float mean, float std, float min, float max) {
      const auto & hparams = model->hparams;
  
      const uint32_t n_layer = hparams.n_layer;
@@ -426,7 +422,9 @@ void randomize_model(struct llama_model * model, int seed, float mean, float std
  }
  
  
-void randomize_model_lora(struct llama_model_lora * model, int seed, float mean, float std, float min, float max) {
+static void randomize_model_lora(
+    struct llama_model_lora * model, int seed, float mean, float std, float min, float max
+) {
      const auto & hparams = model->hparams;
  
      const uint32_t n_layer = hparams.n_layer;
@@ -459,7 +457,7 @@ void randomize_model_lora(struct llama_model_lora * model, int seed, float mean,
      }
  }
  
-bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
+static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
      const auto & hparams = model->hparams;
  
      const uint32_t n_ctx   = hparams.n_ctx;
@@ -495,7 +493,7 @@ bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int
      return true;
  }
  
-bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) {
+static bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) {
      const auto & hparams = model->hparams;
  
      const uint32_t n_ctx   = hparams.n_ctx;
@@ -531,15 +529,15 @@ bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora *
      return true;
  }
  
-struct ggml_tensor * forward(
-        struct llama_model    * model,
-        struct llama_kv_cache * cache,
-        struct ggml_context   * ctx0,
-        struct ggml_cgraph    * gf,
-        struct ggml_tensor    * tokens_input,
-        const  int              n_tokens,
-        const  int              n_past) {
-
+static struct ggml_tensor * forward(
+    struct llama_model    * model,
+    struct llama_kv_cache * cache,
+    struct ggml_context   * ctx0,
+    struct ggml_cgraph    * gf,
+    struct ggml_tensor    * tokens_input,
+    const  int              n_tokens,
+    const  int              n_past
+) {
      const int N = n_tokens;
  
      struct llama_kv_cache& kv_self = *cache;
@@ -756,25 +754,25 @@ struct ggml_tensor * forward(
      return inpL;
  }
  
-void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) {
+static void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) {
      GGML_ASSERT(tensor->n_dims == 1);
      GGML_ASSERT(tensor->ne[0] == ne0);
  }
  
-void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) {
+static void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) {
      GGML_ASSERT(tensor->n_dims == 2);
      GGML_ASSERT(tensor->ne[0] == ne0);
      GGML_ASSERT(tensor->ne[1] == ne1);
  }
  
-void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) {
+static void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) {
      GGML_ASSERT(tensor->n_dims == 3);
      GGML_ASSERT(tensor->ne[0] == ne0);
      GGML_ASSERT(tensor->ne[1] == ne1);
      GGML_ASSERT(tensor->ne[2] == ne2);
  }
  
-void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
+static void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
      GGML_ASSERT(tensor->n_dims == 4);
      GGML_ASSERT(tensor->ne[0] == ne0);
      GGML_ASSERT(tensor->ne[1] == ne1);
@@ -782,16 +780,16 @@ void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int6
      GGML_ASSERT(tensor->ne[3] == ne3);
  }
  
-struct ggml_tensor * forward_batch(
-        struct llama_model    * model,
-        struct llama_kv_cache * cache,
-        struct ggml_context   * ctx0,
-        struct ggml_cgraph    * gf,
-        struct ggml_tensor    * tokens_input,
-        const  int              n_tokens,
-        const  int              n_past,
-        const  int              n_batch) {
-
+static struct ggml_tensor * forward_batch(
+    struct llama_model    * model,
+    struct llama_kv_cache * cache,
+    struct ggml_context   * ctx0,
+    struct ggml_cgraph    * gf,
+    struct ggml_tensor    * tokens_input,
+    const  int              n_tokens,
+    const  int              n_past,
+    const  int              n_batch
+) {
      const int N = n_tokens;
  
      struct llama_kv_cache& kv_self = *cache;
@@ -1073,16 +1071,15 @@ struct ggml_tensor * forward_batch(
      return inpL;
  }
  
-
-struct ggml_tensor * forward_lora(
-        struct llama_model_lora * model,
-        struct llama_kv_cache   * cache,
-        struct ggml_context     * ctx0,
-        struct ggml_cgraph      * gf,
-        struct ggml_tensor      * tokens_input,
-        const  int                n_tokens,
-        const  int                n_past) {
-
+static struct ggml_tensor * forward_lora(
+    struct llama_model_lora * model,
+    struct llama_kv_cache   * cache,
+    struct ggml_context     * ctx0,
+    struct ggml_cgraph      * gf,
+    struct ggml_tensor      * tokens_input,
+    const  int                n_tokens,
+    const  int                n_past
+) {
      const int N = n_tokens;
  
      struct llama_kv_cache& kv_self = *cache;
@@ -1328,7 +1325,7 @@ struct ggml_tensor * forward_lora(
      return inpL;
  }
  
-void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) {
+static void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) {
      assert(logits->n_dims == 2);
      assert(probs->n_dims == 2);
      assert(best_samples->n_dims == 1);
@@ -1359,7 +1356,10 @@ void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, str
      }
  }
  
-void sample_softmax_batch(struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) {
+static void sample_softmax_batch(
+    struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs,
+    struct ggml_tensor * best_samples
+) {
      GGML_ASSERT(best_samples->n_dims == 2);
      GGML_ASSERT(logits->n_dims == 3);
      GGML_ASSERT(probs->n_dims == 3);
@@ -1393,7 +1393,7 @@ void sample_softmax_batch(struct ggml_context * ctx, struct ggml_tensor * logits
      }
  }
  
-void print_row(struct ggml_tensor * probs, int i) {
+static void print_row(struct ggml_tensor * probs, int i) {
      for (int k = 0; k < probs->ne[0]; ++k) {
          float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k);
          printf(" %.2f", p);
@@ -1401,7 +1401,7 @@ void print_row(struct ggml_tensor * probs, int i) {
      printf("\n");
  }
  
-void print_matrix(struct ggml_tensor * probs) {
+static void print_matrix(struct ggml_tensor * probs) {
      assert(probs->n_dims == 2);
      for (int i = 0; i < probs->ne[1]; ++i) {
          for (int k = 0; k < probs->ne[0]; ++k) {
@@ -1412,7 +1412,7 @@ void print_matrix(struct ggml_tensor * probs) {
      }
  }
  
-void print_token(int token, int n_vocab) {
+static void print_token(int token, int n_vocab) {
      for (int k = 0; k < token; ++k) {
          printf(" ");
      }
@@ -1423,14 +1423,14 @@ void print_token(int token, int n_vocab) {
      printf("\n");
  }
  
-void print_tokens(struct ggml_tensor * tokens, int n_vocab) {
+static void print_tokens(struct ggml_tensor * tokens, int n_vocab) {
      for (int i=0; i<tokens->ne[0]; ++i) {
          int token = ggml_get_i32_1d(tokens, i);
          print_token(token, n_vocab);
      }
  }
  
-void get_example_targets(int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets) {
+static void get_example_targets(int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets) {
      int n_tokens = tokens_input->ne[0];
      int n_vocab = targets->ne[0];
      float randomness = 0.0f;
@@ -1451,7 +1451,9 @@ void get_example_targets(int example_id, struct ggml_tensor * tokens_input, stru
      }
  }
  
-void get_example_targets_batch(struct ggml_context * ctx, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets) {
+static void get_example_targets_batch(
+    struct ggml_context * ctx, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets
+) {
      GGML_ASSERT(tokens_input->n_dims == 2);
      GGML_ASSERT(     targets->n_dims == 3);
      int n_tokens = tokens_input->ne[0];
@@ -1474,7 +1476,7 @@ void get_example_targets_batch(struct ggml_context * ctx, int example_id, struct
      }
  }
  
-void lshift_examples(struct ggml_tensor * tokens_input, struct ggml_tensor * targets, int n_shift) {
+static void lshift_examples(struct ggml_tensor * tokens_input, struct ggml_tensor * targets, int n_shift) {
      int n_tokens = tokens_input->ne[0];
      int n_vocab = targets->ne[0];
      for (int i=0; i<n_tokens-n_shift; ++i) {
@@ -1485,12 +1487,16 @@ void lshift_examples(struct ggml_tensor * tokens_input, struct ggml_tensor * tar
      }
  }
  
-struct ggml_tensor * square_error_loss(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) {
+static struct ggml_tensor * square_error_loss(
+    struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b
+) {
      // todo: instead of a-b: a[1:]-b[:-1]
      return ggml_sum(ctx, ggml_sqr(ctx, ggml_sub(ctx, a, b)));
  }
  
-struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) {
+static struct ggml_tensor * cross_entropy_loss(
+    struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b
+) {
      const float eps = 1e-3f;
      return
          ggml_sum(ctx,
diff --git a/examples/beam-search/beam-search.cpp b/examples/beam-search/beam-search.cpp

index 6b31aea78823ec374b8205dd4ae53586f3374f08..805170c98b20b10d488aa6dd3be1007424c42e04 100644 (file)
--- a/examples/beam-search/beam-search.cpp
+++ b/examples/beam-search/beam-search.cpp
@@ -30,7 +30,8 @@ struct ostream_beam_view {
      llama_context * ctx;
      llama_beam_view beam_view;
  };
-std::ostream& operator<<(std::ostream& os, const ostream_beam_view & obv) {
+
+static std::ostream & operator<<(std::ostream & os, const ostream_beam_view & obv) {
      os << "p(" << obv.beam_view.p << ") eob(" << std::boolalpha << obv.beam_view.eob << ") tokens(";
      for (size_t i = 0 ; i < obv.beam_view.n_tokens ; ++i) {
          os << llama_token_to_piece(obv.ctx, obv.beam_view.tokens[i]);
@@ -46,7 +47,7 @@ struct beam_search_callback_data {
  
  // In this case, end-of-beam (eob) is equivalent to end-of-sentence (eos) but this need not always be the same.
  // For example, eob can be flagged due to maximum token length, stop words, etc.
-bool is_at_eob(const beam_search_callback_data & callback_data, const llama_token * tokens, const size_t n_tokens) {
+static bool is_at_eob(const beam_search_callback_data & callback_data, const llama_token * tokens, size_t n_tokens) {
      return n_tokens && tokens[n_tokens-1] == llama_token_eos(callback_data.ctx);
  }
  
@@ -56,7 +57,7 @@ bool is_at_eob(const beam_search_callback_data & callback_data, const llama_toke
  //  * When all beams converge to a common prefix, they are made available in beams_state.beams[0].
  //    This is also called when the stop condition is met.
  //    Collect tokens into std::vector<llama_token> response which is pointed to by callback_data.
-void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_state) {
+static void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_state) {
      auto& callback_data = *static_cast<beam_search_callback_data*>(callback_data_ptr);
      // Mark beams as EOS as needed.
      for (size_t i = 0 ; i < beams_state.n_beams ; ++i) {
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

index 293b455d093c367a22bc9bc309b91bbf5908b614..c291f0adf20e18bbb09232b0388dd16f1574ce90 100644 (file)
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -115,7 +115,7 @@ struct TransformerWeights {
      }
  };
  
-void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) {
+static void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) {
      // we calloc instead of malloc to keep valgrind happy
      w->token_embedding_table = new float[p->vocab_size * p->dim]();
      printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->token_embedding_table\n",__func__,p->vocab_size , p->dim, p->vocab_size * p->dim);
@@ -158,7 +158,7 @@ void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) {
      }
  }
  
-int checkpoint_init_weights(TransformerWeights *w, Config* p, FILE* f, bool shared_weights) {
+static int checkpoint_init_weights(TransformerWeights *w, Config* p, FILE* f, bool shared_weights) {
      if (fread(w->token_embedding_table, sizeof(float), p->vocab_size * p->dim, f) != static_cast<size_t>(p->vocab_size * p->dim)) return 1;
      if (fread(w->rms_att_weight, sizeof(float), p->n_layers * p->dim, f) != static_cast<size_t>(p->n_layers * p->dim)) return 1;
      if (fread(w->wq, sizeof(float), p->n_layers * p->dim * p->dim, f) != static_cast<size_t>(p->n_layers * p->dim * p->dim)) return 1;
@@ -189,7 +189,7 @@ int checkpoint_init_weights(TransformerWeights *w, Config* p, FILE* f, bool shar
      return 0;
  }
  
-void print_sample_weights(TransformerWeights *w){
+static void print_sample_weights(TransformerWeights *w){
      printf("----- Quick print of first of the weight vales of all the variables\n");
      printf("%f\n", w->token_embedding_table[0]);
      printf("%f\n", w->rms_att_weight[0]);
@@ -324,7 +324,7 @@ struct train_params {
      int mem_compute1_gb;
  };
  
-void print_params(struct my_llama_hparams * params) {
+static void print_params(struct my_llama_hparams * params) {
      printf("%s: n_vocab: %d\n", __func__, params->n_vocab);
      printf("%s: n_ctx:   %d\n", __func__, params->n_ctx);
      printf("%s: n_embd:  %d\n", __func__, params->n_embd);
@@ -335,7 +335,7 @@ void print_params(struct my_llama_hparams * params) {
      printf("%s: n_rot:   %d\n", __func__, params->n_rot);
  }
  
-void init_model(struct my_llama_model * model) {
+static void init_model(struct my_llama_model * model) {
      const auto & hparams = model->hparams;
  
      const uint32_t n_embd  = hparams.n_embd;
@@ -408,17 +408,17 @@ void init_model(struct my_llama_model * model) {
      }
  }
  
-float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
+static float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
      float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
      return *ptr;
  }
  
-int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
+static int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
      int32_t * ptr = (int32_t *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
      return *ptr;
  }
  
-void print_row(struct ggml_tensor * probs, int i) {
+static void print_row(struct ggml_tensor * probs, int i) {
      for (int k = 0; k < probs->ne[0]; ++k) {
          float p = get_f32_2d(probs, k, i);
          printf(" %f", p);
@@ -426,7 +426,7 @@ void print_row(struct ggml_tensor * probs, int i) {
      printf("\n");
  }
  
-void print_matrix(struct ggml_tensor * probs) {
+static void print_matrix(struct ggml_tensor * probs) {
      assert(probs->n_dims == 2);
      for (int i = 0; i < probs->ne[1]; ++i) {
          for (int k = 0; k < probs->ne[0]; ++k) {
@@ -531,7 +531,7 @@ struct llama_file {
      }
  };
  
-bool is_ggml_file(const char *filename) {
+static bool is_ggml_file(const char * filename) {
      llama_file file(filename, "rb");
      if (file.size < 4) {
          return false;
@@ -540,7 +540,7 @@ bool is_ggml_file(const char *filename) {
      return magic == GGUF_MAGIC;
  }
  
-static std::string llama_escape_whitespaces(const std::string& text) {
+static std::string llama_escape_whitespaces(const std::string & text) {
      std::ostringstream out;
      for (char c : text) {
          if (c == ' ') out << "\xe2\x96\x81";
@@ -549,7 +549,7 @@ static std::string llama_escape_whitespaces(const std::string& text) {
      return out.str();
  }
  
-void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) {
+static void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) {
      if (is_ggml_file(filename)) {
          struct ggml_context * ctx_data = NULL;
  
@@ -637,7 +637,7 @@ void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab)
      }
  }
  
-void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) {
+static void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) {
      int ct;
      switch (gg_weights->n_dims){
          case 1:
@@ -673,7 +673,9 @@ void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * kar
      }
  }
  
-void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * model, TransformerWeights* w, const char * filename) {
+static void save_as_llama_model(
+    struct llama_vocab * vocab, struct my_llama_model * model, TransformerWeights* w, const char * filename
+) {
      // convert AK weights into GG weights one by one.
      // w->token_embedding_table -> model->tok_embeddings
      // float*                   -> struct ggml_tensor
@@ -785,7 +787,7 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod
      gguf_free(ctx);
  }
  
-struct train_params get_default_train_params() {
+static struct train_params get_default_train_params() {
      struct train_params params;
      params.fn_vocab_model    = "models/7B/ggml-model-f16.gguf";
      params.fn_llama2c_output_model = "ak_llama_model.bin";
@@ -835,7 +837,7 @@ struct train_params get_default_train_params() {
      return params;
  }
  
-void print_usage(int /*argc*/, char ** argv, const struct train_params * params) {
+static void print_usage(int /*argc*/, char ** argv, const struct train_params * params) {
      fprintf(stderr, "usage: %s [options]\n", argv[0]);
      fprintf(stderr, "\n");
      fprintf(stderr, "options:\n");
@@ -846,7 +848,7 @@ void print_usage(int /*argc*/, char ** argv, const struct train_params * params)
      fprintf(stderr, "\n");
  }
  
-bool params_parse(int argc, char ** argv, struct train_params * params) {
+static bool params_parse(int argc, char ** argv, struct train_params * params) {
      bool invalid_param = false;
      bool reqd_param_found = false;
      std::string arg;
@@ -901,7 +903,7 @@ bool params_parse(int argc, char ** argv, struct train_params * params) {
      return true;
  }
  
-std::string basename(const std::string &path) {
+static std::string basename(const std::string &path) {
      size_t pos = path.find_last_of("/\\");
      if (pos == std::string::npos) {
          return path;
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp

index a34010f1022a77227e17d993a68049132959062b..9ab63a29310ad99c019d01afb9fe15d15aea3ebe 100644 (file)
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@@ -13,14 +13,14 @@
  #define MIN(a, b) ((a) < (b) ? (a) : (b))
  #define MAX(a, b) ((a) > (b) ? (a) : (b))
  
-template<typename T>
+template <typename T>
  static std::string to_string(const T & val) {
      std::stringstream ss;
      ss << val;
      return ss.str();
  }
  
-bool gguf_ex_write(const std::string & fname) {
+static bool gguf_ex_write(const std::string & fname) {
      struct gguf_context * ctx = gguf_init_empty();
  
      gguf_set_val_u8  (ctx, "some.parameter.uint8",    0x12);
@@ -85,7 +85,7 @@ bool gguf_ex_write(const std::string & fname) {
  }
  
  // just read tensor info
-bool gguf_ex_read_0(const std::string & fname) {
+static bool gguf_ex_read_0(const std::string & fname) {
      struct gguf_init_params params = {
          /*.no_alloc = */ false,
          /*.ctx      = */ NULL,
@@ -143,7 +143,7 @@ bool gguf_ex_read_0(const std::string & fname) {
  }
  
  // read and create ggml_context containing the tensors and their data
-bool gguf_ex_read_1(const std::string & fname) {
+static bool gguf_ex_read_1(const std::string & fname) {
      struct ggml_context * ctx_data = NULL;
  
      struct gguf_init_params params = {
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index a8179f1bf011fa3bfda6c43d553923b16a855356..e3cc3d39bd6b5fccf5a0b4a79cf9c2ee3b429b0a 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -41,7 +41,8 @@ static std::ostringstream       * g_output_ss;
  static std::vector<llama_token> * g_output_tokens;
  static bool is_interacting = false;
  
-void write_logfile(
+
+static void write_logfile(
      const llama_context * ctx, const gpt_params & params, const llama_model * model,
      const std::vector<llama_token> & input_tokens, const std::string & output,
      const std::vector<llama_token> & output_tokens
@@ -86,7 +87,7 @@ void write_logfile(
  }
  
  #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
-void sigint_handler(int signo) {
+static void sigint_handler(int signo) {
      if (signo == SIGINT) {
          if (!is_interacting) {
              is_interacting = true;
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp

index 3a1c8c28da09b322a83a76ccaad902067090785b..4620c43ac594a7857a0c53a2cdb04d76f72b6ec2 100644 (file)
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -28,9 +28,10 @@ struct results_log_softmax {
      float  prob;
  };
  
-void write_logfile(const llama_context * ctx, const gpt_params & params,
-                   const llama_model * model, const struct results_perplexity & results) {
-
+static void write_logfile(
+    const llama_context * ctx, const gpt_params & params, const llama_model * model,
+    const struct results_perplexity & results
+) {
      if (params.logdir.empty()) {
          return;
      }
@@ -76,7 +77,7 @@ void write_logfile(const llama_context * ctx, const gpt_params & params,
      fclose(logfile);
  }
  
-std::vector<float> softmax(const std::vector<float>& logits) {
+static std::vector<float> softmax(const std::vector<float>& logits) {
      std::vector<float> probs(logits.size());
      float max_logit = logits[0];
      for (float v : logits) max_logit = std::max(max_logit, v);
@@ -92,7 +93,7 @@ std::vector<float> softmax(const std::vector<float>& logits) {
      return probs;
  }
  
-results_log_softmax log_softmax(int n_vocab, const float * logits, int tok) {
+static results_log_softmax log_softmax(int n_vocab, const float * logits, int tok) {
      float max_logit = logits[0];
      for (int i = 1; i < n_vocab; ++i) max_logit = std::max(max_logit, logits[i]);
      double sum_exp = 0.0;
@@ -100,9 +101,10 @@ results_log_softmax log_softmax(int n_vocab, const float * logits, int tok) {
      return {logits[tok] - max_logit - log(sum_exp), logits[tok], expf(logits[tok] - max_logit) / (float) sum_exp};
  }
  
-void process_logits(int n_vocab, const float * logits, const int * tokens, int n_token, std::vector<std::thread> & workers,
-        double & nll, double & nll2, float * logit_history, float * prob_history) {
-
+static void process_logits(
+    int n_vocab, const float * logits, const int * tokens, int n_token, std::vector<std::thread> & workers,
+    double & nll, double & nll2, float * logit_history, float * prob_history
+) {
      std::mutex mutex;
      int counter = 0;
      auto compute = [&mutex, &counter, &nll, &nll2, logit_history, prob_history, n_vocab, logits, tokens, n_token] () {
@@ -130,7 +132,7 @@ void process_logits(int n_vocab, const float * logits, const int * tokens, int n
  
  }
  
-results_perplexity perplexity_v2(llama_context * ctx, const gpt_params & params) {
+static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params & params) {
      // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
      // Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
      // Output: `perplexity: 13.5106 [114/114]`
@@ -260,8 +262,7 @@ results_perplexity perplexity_v2(llama_context * ctx, const gpt_params & params)
      return {tokens, std::exp(nll / count), logit_history, prob_history};
  }
  
-results_perplexity perplexity(llama_context * ctx, const gpt_params & params) {
-
+static results_perplexity perplexity(llama_context * ctx, const gpt_params & params) {
      if (params.ppl_stride > 0) {
          return perplexity_v2(ctx, params);
      }
@@ -400,8 +401,9 @@ results_perplexity perplexity(llama_context * ctx, const gpt_params & params) {
      return {tokens, ppl, logit_history, prob_history};
  }
  
-std::vector<float> hellaswag_evaluate_tokens(llama_context * ctx, const std::vector<int>& tokens, int n_past, int n_batch,
-        int n_vocab, int n_thread) {
+static std::vector<float> hellaswag_evaluate_tokens(
+    llama_context * ctx, const std::vector<int>& tokens, int n_past, int n_batch, int n_vocab, int n_thread
+) {
      std::vector<float> result;
      result.reserve(tokens.size() * n_vocab);
      size_t n_chunk = (tokens.size() + n_batch - 1)/n_batch;
@@ -421,7 +423,7 @@ std::vector<float> hellaswag_evaluate_tokens(llama_context * ctx, const std::vec
      return result;
  }
  
-void hellaswag_score(llama_context * ctx, const gpt_params & params) {
+static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
      // Calculates hellaswag score (acc_norm) from prompt
      //
      // Data extracted from the HellaSwag validation dataset (MIT license) https://github.com/rowanz/hellaswag/blob/master/data/hellaswag_val.jsonl
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp

index 6ce03ba7ba50b12e1a6ba5033fb7cedd4b36d7ac..bfe70889af6152e6aa445453d8460f0b974d2127 100644 (file)
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -34,8 +34,8 @@ struct quantize_stats_params {
      std::vector<enum ggml_type> include_types;
  };
  
-const size_t HISTOGRAM_BUCKETS = 150;
-const double HISTOGRAM_RANGE = 0.03;
+constexpr size_t HISTOGRAM_BUCKETS = 150;
+constexpr double HISTOGRAM_RANGE = 0.03;
  
  struct error_stats {
      size_t num_samples;
@@ -44,8 +44,7 @@ struct error_stats {
      uint64_t error_histogram[HISTOGRAM_BUCKETS];
  };
  
-
-void quantize_stats_print_usage(int /*argc*/, char ** argv) {
+static void quantize_stats_print_usage(int /*argc*/, char ** argv) {
      quantize_stats_params params;
      fprintf(stderr, "usage: %s [options]\n", argv[0]);
      fprintf(stderr, "\n");
@@ -71,7 +70,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) {
  }
  
  // Check if a layer is included/excluded by command line
-bool layer_included(const quantize_stats_params & params, const std::string & layer) {
+static bool layer_included(const quantize_stats_params & params, const std::string & layer) {
      for (const auto& excluded : params.exclude_layers) {
          if (std::regex_search(layer, std::regex(excluded))) {
              return false;
@@ -86,7 +85,7 @@ bool layer_included(const quantize_stats_params & params, const std::string & la
  }
  
  // Update error statistics given vectors with the before/after result of quantization
-void update_error_stats(int64_t nelements, const float * input, const float * output, error_stats & stats) {
+static void update_error_stats(int64_t nelements, const float * input, const float * output, error_stats & stats) {
      for (int64_t i = 0; i < nelements; i++) {
          double diff = input[i] - output[i];
          stats.total_error += diff * diff;
@@ -96,14 +95,14 @@ void update_error_stats(int64_t nelements, const float * input, const float * ou
      stats.num_samples += nelements;
  }
  
-void combine_error_stats(error_stats & into, const error_stats & from) {
+static void combine_error_stats(error_stats & into, const error_stats & from) {
      into.num_samples += from.num_samples;
      into.total_error += from.total_error;
      if (from.max_error > into.max_error) into.max_error = from.max_error;
      for (size_t i=0; i<HISTOGRAM_BUCKETS; ++i) into.error_histogram[i] += from.error_histogram[i];
  }
  
-double find_quantile(const error_stats & stats, double quantile) {
+static double find_quantile(const error_stats & stats, double quantile) {
      double sum = std::accumulate(std::begin(stats.error_histogram), std::end(stats.error_histogram), 0.0);
  
      double accum = 0;
@@ -116,7 +115,7 @@ double find_quantile(const error_stats & stats, double quantile) {
      return INFINITY;
  }
  
-void print_error_stats(const std::string & name, const error_stats & stats, bool print_histogram) {
+static void print_error_stats(const std::string & name, const error_stats & stats, bool print_histogram) {
      double rmse = sqrt(stats.total_error / (double) stats.num_samples);
      double median = find_quantile(stats, .5);
      double pct95 = find_quantile(stats, .95);
@@ -143,17 +142,10 @@ static bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
          tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
  }
  
-void test_roundtrip_on_chunk(
-        const ggml_tensor * layer,
-        int64_t offset,
-        int64_t chunk_size,
-        const ggml_type_traits_t & qfns,
-        bool use_reference,
-        float * input_scratch,
-        char * quantized_scratch,
-        float * output_scratch,
-        error_stats & stats) {
-
+static void test_roundtrip_on_chunk(
+    const ggml_tensor * layer, int64_t offset, int64_t chunk_size, const ggml_type_traits_t & qfns, bool use_reference,
+    float * input_scratch, char * quantized_scratch, float * output_scratch, error_stats & stats
+) {
      if (layer->type == GGML_TYPE_F16) {
          for (int i = 0; i < chunk_size; i++) {
              input_scratch[i] = ggml_get_f32_1d(layer, i + offset);
@@ -174,18 +166,11 @@ void test_roundtrip_on_chunk(
  
  
  // Run quantization function for a single layer and update error stats
-void test_roundtrip_on_layer(
-        std::string & name,
-        bool print_layer_stats,
-        const ggml_type_traits_t & qfns,
-        bool use_reference,
-        const ggml_tensor * layer,
-        std::vector<float> & input_scratch,
-        std::vector<char> & quantized_scratch,
-        std::vector<float> & output_scratch,
-        error_stats & total_error,
-        int max_thread = 0) {
-
+static void test_roundtrip_on_layer(
+    std::string & name, bool print_layer_stats, const ggml_type_traits_t & qfns, bool use_reference,
+    const ggml_tensor * layer, std::vector<float> & input_scratch, std::vector<char> & quantized_scratch,
+    std::vector<float> & output_scratch, error_stats & total_error, int max_thread = 0
+) {
      assert(tensor_is_contiguous(layer));
      error_stats layer_error {};
      uint64_t nelements = ggml_nelements(layer);
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp

index 1bf182482e5ae4e73b596c36297bc1d1430c7cac..300788c9194fc4bdc053b249c6c122720ec22ddb 100644 (file)
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -40,7 +40,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
  };
  
  
-bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) {
+static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) {
      std::string ftype_str;
  
      for (auto ch : ftype_str_in) {
@@ -72,7 +72,7 @@ bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std:
  // usage:
  //  ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads]
  //
-void usage(const char * executable) {
+static void usage(const char * executable) {
      printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable);
      printf("  --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
      printf("  --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n");
diff --git a/examples/server/server.cpp b/examples/server/server.cpp

index 3f3c646503c79f1b60dcc0eace6986a6c7cc3a4c..1bb8e92c0f95e2af08dae74e70b6b313b8365e5d 100644 (file)
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1083,8 +1083,9 @@ static json format_final_response(llama_server_context &llama, const std::string
      return res;
  }
  
-static json format_partial_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
-{
+static json format_partial_response(
+    llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs
+) {
      json res = json{
          {"content", content},
          {"stop", false},
@@ -1215,7 +1216,7 @@ static void log_server_request(const Request &req, const Response &res)
                             });
  }
  
-bool is_at_eob(llama_server_context & server_context, const llama_token * tokens, const size_t n_tokens) {
+static bool is_at_eob(llama_server_context &server_context, const llama_token *tokens, const size_t n_tokens) {
      return n_tokens && tokens[n_tokens-1] == llama_token_eos(server_context.ctx);
  }
  
@@ -1225,7 +1226,7 @@ bool is_at_eob(llama_server_context & server_context, const llama_token * tokens
  //  * When all beams converge to a common prefix, they are made available in beams_state.beams[0].
  //    This is also called when the stop condition is met.
  //    Collect tokens into std::vector<llama_token> response which is pointed to by callback_data.
-void beam_search_callback(void * callback_data, llama_beams_state beams_state) {
+static void beam_search_callback(void *callback_data, llama_beams_state beams_state) {
      auto & llama = *static_cast<llama_server_context*>(callback_data);
      // Mark beams as EOS as needed.
      for (size_t i = 0 ; i < beams_state.n_beams ; ++i) {
@@ -1258,7 +1259,8 @@ struct token_translator {
      std::string operator()(const completion_token_output & cto) const { return (*this)(cto.tok); }
  };
  
-void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) {
+static void append_to_generated_text_from_generated_token_probs(llama_server_context &llama)
+{
      auto & gtps = llama.generated_token_probs;
      auto translator = token_translator{llama.ctx};
      auto add_strlen = [=](size_t sum, const completion_token_output & cto) { return sum + translator(cto).size(); };
diff --git a/llama.cpp b/llama.cpp

index 8c5b74721cbc112c68ad859c843e3cad46018d47..a6502612232f88cff1f35a405836224489a49978 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -1,3 +1,4 @@
+#define LLAMA_API_INTERNAL
  #include "llama.h"
  
  #include "ggml.h"
@@ -108,7 +109,7 @@ static size_t utf8_len(char src) {
      return lookup[highbits];
  }
  
-void replace_all(std::string & s, const std::string & search, const std::string & replace) {
+static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
      std::string result;
      for (size_t pos = 0; ; pos += search.length()) {
          auto new_pos = s.find(search, pos);
@@ -1589,7 +1590,7 @@ struct llama_model_loader {
  // load LLaMA models
  //
  
-std::string llama_model_ftype_name(enum llama_ftype ftype) {
+static std::string llama_model_ftype_name(enum llama_ftype ftype) {
      if (ftype & LLAMA_FTYPE_GUESSED) {
          return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
      }
@@ -4295,7 +4296,7 @@ struct llama_grammar_candidate {
  
  // Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as
  // pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
-std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
+static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
          const char         * src,
          llama_partial_utf8   partial_start) {
      static const int      lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
@@ -5893,7 +5894,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
  }
  
  // TODO: after the GGUF PR, this likely won't work and needs to be updated
-int llama_apply_lora_from_file_internal(const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads) {
+static int llama_apply_lora_from_file_internal(
+    const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads
+) {
      LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
  
      const int64_t t_start_lora_us = ggml_time_us();
@@ -6440,7 +6443,7 @@ struct llama_context * llama_new_context_with_model(
      return ctx;
  }
  
-struct llama_context * llama_init_from_file(
+static struct llama_context * llama_init_from_file(
                               const char * path_model,
              struct llama_context_params   params) {
      struct llama_model * model = llama_load_model_from_file(path_model, params);
@@ -6645,7 +6648,7 @@ struct llama_data_file_context : llama_data_context {
   * llama_copy_state_data(ctx, &data_ctx);
   *
  */
-void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
+static void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
      // copy rng
      {
          std::stringstream rng_ss;
@@ -7183,7 +7186,9 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
  }
  
  // For internal test use
-const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
+const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
+    struct llama_context * ctx
+) {
      return ctx->model.tensors_by_name;
  }
  
diff --git a/llama.h b/llama.h

index 37975bebed22e239b1f4d04ac1aae7b329ec9f93..c6ee038c727d7aa6d204e1fb7b2f88a42392d0ef 100644 (file)
--- a/llama.h
+++ b/llama.h
@@ -540,7 +540,9 @@ extern "C" {
  
  struct ggml_tensor;
  
-const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
+const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
+    struct llama_context * ctx
+);
  
  #endif // LLAMA_API_INTERNAL
  
diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp

index 48758cda81fdfd7207cb7fa0fb787e610a60ebed..e96372c4b7107dc89198e7066de1c97b1a8060be 100644 (file)
--- a/pocs/vdot/vdot.cpp
+++ b/pocs/vdot/vdot.cpp
@@ -16,7 +16,7 @@
  
  constexpr int kVecSize = 1 << 18;
  
-float drawFromGaussianPdf(std::mt19937& rndm) {
+static float drawFromGaussianPdf(std::mt19937& rndm) {
      constexpr double kScale = 1./(1. + std::mt19937::max());
      constexpr double kTwoPiTimesScale = 6.28318530717958647692*kScale;
      static float lastX;
@@ -28,7 +28,8 @@ float drawFromGaussianPdf(std::mt19937& rndm) {
      haveX = true;
      return r*cos(phi);
  }
-void fillRandomGaussianFloats(std::vector<float>& values, std::mt19937& rndm, float mean = 0) {
+
+static void fillRandomGaussianFloats(std::vector<float>& values, std::mt19937& rndm, float mean = 0) {
      for (auto& v : values) v = mean + drawFromGaussianPdf(rndm);
  }
  
diff --git a/tests/test-opt.cpp b/tests/test-opt.cpp

index 8ab240202a5850da56f10ee468ea5b0b081a0a22..ce49768584be01ac394ef535643f35e94850d2f4 100644 (file)
--- a/tests/test-opt.cpp
+++ b/tests/test-opt.cpp
@@ -36,15 +36,15 @@
  #define GGML_PRINT(...) printf(__VA_ARGS__)
  
  
-float frand(void) {
+static float frand(void) {
      return (float)rand()/(float)RAND_MAX;
  }
  
-int irand(int n) {
+static int irand(int n) {
      return rand()%n;
  }
  
-void get_random_dims(int64_t * dims, int ndims) {
+static void get_random_dims(int64_t * dims, int ndims) {
      dims[0] = dims[1] = dims[2] = dims[3] = 1;
  
      for (int i = 0; i < ndims; i++) {
@@ -52,7 +52,7 @@ void get_random_dims(int64_t * dims, int ndims) {
      }
  }
  
-void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) {
+static void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) {
      dims[0] = dims[1] = dims[2] = dims[3] = 1;
  
      for (int i = 0; i < ndims; i++) {
@@ -61,12 +61,9 @@ void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) {
  }
  
  
-struct ggml_tensor * get_random_tensor(
-        struct ggml_context * ctx0,
-        int ndims,
-        int64_t ne[],
-        float fmin,
-        float fmax) {
+static struct ggml_tensor * get_random_tensor(
+    struct ggml_context * ctx0, int ndims, int64_t ne[], float fmin, float fmax
+) {
      struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
  
      switch (ndims) {
@@ -109,11 +106,11 @@ struct ggml_tensor * get_random_tensor(
      return result;
  }
  
-float get_element(const struct ggml_tensor * t, int idx) {
+static float get_element(const struct ggml_tensor * t, int idx) {
      return ((float *)t->data)[idx];
  }
  
-void set_element(struct ggml_tensor * t, int idx, float value) {
+static void set_element(struct ggml_tensor * t, int idx, float value) {
      ((float *)t->data)[idx] = value;
  }
  
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp

index 8d3c162d2bfa04bdb29a40131368dd9efda7523d..884af40548fb7912cd2e80c3c7e503bba938c06b 100644 (file)
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -13,24 +13,24 @@
  #pragma warning(disable: 4244 4267) // possible loss of data
  #endif
  
-const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
-const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
-const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
-const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
-const float MAX_DOT_PRODUCT_ERROR = 0.02f;
+constexpr float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
+constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f;
  
-const char* RESULT_STR[] = {"ok", "FAILED"};
+static const char* RESULT_STR[] = {"ok", "FAILED"};
  
  
  // Generate synthetic data
-void generate_data(float offset, size_t n, float * dst) {
+static void generate_data(float offset, size_t n, float * dst) {
      for (size_t i = 0; i < n; i++) {
          dst[i] = 0.1 + 2*cosf(i + offset);
      }
  }
  
  // Calculate RMSE between two float arrays
-float array_rmse(const float * a1, const float * a2, size_t n) {
+static float array_rmse(const float * a1, const float * a2, size_t n) {
      double sum = 0;
      for (size_t i = 0; i < n; i++) {
          double diff = a1[i] - a2[i];
@@ -40,7 +40,7 @@ float array_rmse(const float * a1, const float * a2, size_t n) {
  }
  
  // Total quantization error on test data
-float total_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) {
+static float total_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) {
      std::vector<uint8_t> tmp_q(2*test_size);
      std::vector<float> tmp_out(test_size);
  
@@ -50,7 +50,7 @@ float total_quantization_error(ggml_type_traits_t & qfns, size_t test_size, cons
  }
  
  // Total quantization error on test data
-float reference_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) {
+static float reference_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) {
      std::vector<uint8_t> tmp_q(2*test_size);
      std::vector<float> tmp_out(test_size);
      std::vector<float> tmp_out_ref(test_size);
@@ -64,7 +64,7 @@ float reference_quantization_error(ggml_type_traits_t & qfns, size_t test_size,
      return array_rmse(tmp_out.data(), tmp_out_ref.data(), test_size);
  }
  
-float dot_product(const float * a1, const float * a2, size_t test_size) {
+static float dot_product(const float * a1, const float * a2, size_t test_size) {
      double sum = 0;
      for (size_t i = 0; i < test_size; i++) {
          sum += a1[i] * a2[i];
@@ -73,7 +73,9 @@ float dot_product(const float * a1, const float * a2, size_t test_size) {
  }
  
  // Total dot product error
-float dot_product_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data1, const float *test_data2) {
+static float dot_product_error(
+    ggml_type_traits_t & qfns, size_t test_size, const float * test_data1, const float *test_data2
+) {
      std::vector<uint8_t> tmp_q1(2*test_size);
      std::vector<uint8_t> tmp_q2(2*test_size);
  
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp

index cbea7d4525ca4c7479c48d73fc936aa030a2b537..01aa6987731bb11ee28d789ed2ea689eab4c7c88 100644 (file)
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -61,22 +61,22 @@ inline int64_t cpu_cycles() {
  
  
  // Generate synthetic data
-void generate_data(float offset, size_t n, float * dst) {
+static void generate_data(float offset, size_t n, float * dst) {
      for (size_t i = 0; i < n; i++) {
          dst[i] = 0.1 + 2*cosf(i + offset);
      }
  }
  
-float gigabytes_per_second(size_t bytes, int64_t usecs) {
+static float gigabytes_per_second(size_t bytes, int64_t usecs) {
      return bytes / (float) usecs * 1000000 / (1024*1024*1024);
  }
  
-void * align_with_offset(void * ptr, int offset) {
+static void * align_with_offset(void * ptr, int offset) {
      size_t dummy_size = MAX_ALIGNMENT * 4;
      return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
  }
  
-void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<size_t(void)> & function) {
+static void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<size_t(void)> & function) {
      int64_t min_time_us = INT64_MAX;
      int64_t total_time_us = 0;
      int64_t min_time_cycles = INT64_MAX;
@@ -108,7 +108,7 @@ void benchmark_function(size_t size, size_t q_size, int64_t iterations, const st
      printf("      quantized throughput : %9.2f GB/s\n",  gigabytes_per_second(q_size * iterations, total_time_us));
  }
  
-void usage(char * argv[]) {
+static void usage(char * argv[]) {
      printf("Benchmark quantization specific functions on synthetic data\n");
      printf("\n");
      printf("usage: %s [options]\n", argv[0]);
diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp

index 4437c39488e7af2ab1676a3970c6383d211cccdc..019c0d46269fae1dd64bf87a59a66d92c42bf686 100644 (file)
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@@ -12,7 +12,8 @@
  #include <vector>
  #include <algorithm>
  
-void dump(const llama_token_data_array * candidates) {
+
+static void dump(const llama_token_data_array * candidates) {
      for (size_t i = 0; i < candidates->size; i++) {
          printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit);
      }
@@ -21,9 +22,7 @@ void dump(const llama_token_data_array * candidates) {
  #define DUMP(__candidates) do { printf("%s:%d (%s)\n", __FILE__, __LINE__, __func__); dump((__candidates)); printf("-\n"); } while(0)
  
  
-void test_top_k(const std::vector<float> & probs,
-                const std::vector<float> & expected_probs,
-                int k) {
+static void test_top_k(const std::vector<float> & probs, const std::vector<float> & expected_probs, int k) {
      size_t n_vocab = probs.size();
      std::vector<llama_token_data> candidates;
      candidates.reserve(n_vocab);
@@ -45,10 +44,7 @@ void test_top_k(const std::vector<float> & probs,
  }
  
  
-void test_top_p(const std::vector<float> & probs,
-                const std::vector<float> & expected_probs,
-                float p) {
-
+static void test_top_p(const std::vector<float> & probs, const std::vector<float> & expected_probs, float p) {
      size_t n_vocab = probs.size();
      std::vector<llama_token_data> candidates;
      candidates.reserve(n_vocab);
@@ -70,9 +66,7 @@ void test_top_p(const std::vector<float> & probs,
  }
  
  
-void test_tfs(const std::vector<float> & probs,
-                const std::vector<float> & expected_probs,
-                float z) {
+static void test_tfs(const std::vector<float> & probs, const std::vector<float> & expected_probs, float z) {
      size_t n_vocab = probs.size();
      std::vector<llama_token_data> candidates;
      candidates.reserve(n_vocab);
@@ -93,9 +87,7 @@ void test_tfs(const std::vector<float> & probs,
  }
  
  
-void test_typical(const std::vector<float> & probs,
-                const std::vector<float> & expected_probs,
-                float p) {
+static void test_typical(const std::vector<float> & probs, const std::vector<float> & expected_probs, float p) {
      size_t n_vocab = probs.size();
      std::vector<llama_token_data> candidates;
      candidates.reserve(n_vocab);
@@ -116,11 +108,10 @@ void test_typical(const std::vector<float> & probs,
  }
  
  
-void test_repetition_penalty(
-                const std::vector<float> & probs,
-                const std::vector<llama_token> & last_tokens,
-                const std::vector<float> & expected_probs,
-                float penalty) {
+static void test_repetition_penalty(
+    const std::vector<float> & probs, const std::vector<llama_token> & last_tokens,
+    const std::vector<float> & expected_probs, float penalty
+) {
      assert(probs.size() == expected_probs.size());
  
      size_t n_vocab = probs.size();
@@ -145,11 +136,10 @@ void test_repetition_penalty(
  }
  
  
-void test_frequency_presence_penalty(
-                const std::vector<float> & probs,
-                const std::vector<llama_token> & last_tokens,
-                const std::vector<float> & expected_probs,
-                float alpha_frequency, float alpha_presence) {
+static void test_frequency_presence_penalty(
+    const std::vector<float> & probs, const std::vector<llama_token> & last_tokens,
+    const std::vector<float> & expected_probs, float alpha_frequency, float alpha_presence
+) {
      assert(probs.size() == expected_probs.size());
  
      size_t n_vocab = probs.size();
diff --git a/tests/test-tokenizer-1-llama.cpp b/tests/test-tokenizer-1-llama.cpp

index ab3d822f2b1cd1101eb186bf07e8ce6976f9d0bb..804ea2486a67a6336d78f8bfdf819d3e664f38e6 100644 (file)
--- a/tests/test-tokenizer-1-llama.cpp
+++ b/tests/test-tokenizer-1-llama.cpp
@@ -13,7 +13,7 @@
  
  typedef int codepoint;
  
-std::string codepoint_to_utf8(codepoint cp) {
+static std::string codepoint_to_utf8(codepoint cp) {
      std::string result;
      if (0x00 <= cp && cp <= 0x7f) {
          result.push_back(cp);
author	Cebtenzzre <redacted>
	Fri, 15 Sep 2023 19:38:27 +0000 (15:38 -0400)
committer	GitHub <redacted>
	Fri, 15 Sep 2023 19:38:27 +0000 (15:38 -0400)
CMakeLists.txt		patch \| blob \| history
Makefile		patch \| blob \| history
common/common.cpp		patch \| blob \| history
common/console.cpp		patch \| blob \| history
common/grammar-parser.cpp		patch \| blob \| history
examples/baby-llama/baby-llama.cpp		patch \| blob \| history
examples/beam-search/beam-search.cpp		patch \| blob \| history
examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp		patch \| blob \| history
examples/gguf/gguf.cpp		patch \| blob \| history
examples/main/main.cpp		patch \| blob \| history
examples/perplexity/perplexity.cpp		patch \| blob \| history
examples/quantize-stats/quantize-stats.cpp		patch \| blob \| history
examples/quantize/quantize.cpp		patch \| blob \| history
examples/server/server.cpp		patch \| blob \| history
llama.cpp		patch \| blob \| history
llama.h		patch \| blob \| history
pocs/vdot/vdot.cpp		patch \| blob \| history
tests/test-opt.cpp		patch \| blob \| history
tests/test-quantize-fns.cpp		patch \| blob \| history
tests/test-quantize-perf.cpp		patch \| blob \| history
tests/test-sampling.cpp		patch \| blob \| history
tests/test-tokenizer-1-llama.cpp		patch \| blob \| history