build : enable more non-default compiler warnings (#3200)

author Cebtenzzre <redacted>

Thu, 28 Sep 2023 21:41:44 +0000 (17:41 -0400)

committer GitHub <redacted>

Thu, 28 Sep 2023 21:41:44 +0000 (17:41 -0400)
author Cebtenzzre <redacted>
Thu, 28 Sep 2023 21:41:44 +0000 (17:41 -0400)
committer GitHub <redacted>
Thu, 28 Sep 2023 21:41:44 +0000 (17:41 -0400)
diff --git a/.gitignore b/.gitignore

index 8ba3b9f4bf1b0c8f00ce78da84be35918738b33e..f98132a22093cd601875cf8d7e60fe28ebbebe91 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@ models-mnt
  /main
  /metal
  /perplexity
+/q8dot
  /quantize
  /quantize-stats
  /result
diff --git a/CMakeLists.txt b/CMakeLists.txt

index c4a649a9762758d6d353fa04e40253f8f9551edc..d5acf8540d1c8f012c7b13e6ecbf0ee7a9672d40 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -414,37 +414,38 @@ endif()
  
  if (LLAMA_ALL_WARNINGS)
      if (NOT MSVC)
-        set(c_flags
-            -Wall
-            -Wextra
-            -Wpedantic
-            -Wcast-qual
-            -Wdouble-promotion
-            -Wshadow
-            -Wstrict-prototypes
-            -Wpointer-arith
-            -Wmissing-prototypes
-            -Werror=implicit-int
-            -Wno-unused-function
-        )
-        set(cxx_flags
-            -Wall
-            -Wextra
-            -Wpedantic
-            -Wcast-qual
-            -Wmissing-declarations
-            -Wno-unused-function
-            -Wno-multichar
-        )
-        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            # g++ only
-            set(cxx_flags ${cxx_flags} -Wno-format-truncation -Wno-array-bounds)
+        set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
+        set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
+            -Werror=implicit-function-declaration)
+        set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
+
+        if (CMAKE_C_COMPILER_ID MATCHES "Clang")
+            set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
+            set(cxx_flags ${cxx_flags} -Wmissing-prototypes -Wextra-semi)
+
+            if (
+                (CMAKE_C_COMPILER_ID STREQUAL "Clang"      AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
+                (CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3.0)
+            )
+                set(c_flags ${c_flags} -Wdouble-promotion)
+            endif()
+        elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
+            set(c_flags ${c_flags} -Wdouble-promotion)
+            set(cxx_flags ${cxx_flags} -Wno-array-bounds)
+
+            if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
+                set(cxx_flags ${cxx_flags} -Wno-format-truncation)
+            endif()
+            if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
+                set(cxx_flags ${cxx_flags} -Wextra-semi)
+            endif()
          endif()
      else()
          # todo : msvc
      endif()
  
      add_compile_options(
+            ${warning_flags}
              "$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
              "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
      )
diff --git a/Makefile b/Makefile

index 53af3c692d105c5786cc6b6d233aed0c90ef366a..08b83ca7e30d671eb6a7eb5c2768825de99f4fbc 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
  # Define the default target now so that it is always the first target
-BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative parallel finetune export-lora tests/test-c.o
+BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative benchmark-matmult parallel finetune export-lora tests/test-c.o
  
  # Binaries only useful for tests
  TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
@@ -19,6 +19,20 @@ ifndef UNAME_M
  UNAME_M := $(shell uname -m)
  endif
  
+ifeq '' '$(findstring clang,$(shell $(CC) --version))'
+       CC_IS_GCC=1
+       CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
+else
+       CC_IS_CLANG=1
+       ifeq '' '$(findstring Apple LLVM,$(shell $(CC) --version))'
+               CC_IS_LLVM_CLANG=1
+       else
+               CC_IS_APPLE_CLANG=1
+       endif
+       CC_VER := $(shell $(CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
+                               | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
+endif
+
  # Mac OS + Arm can report x86_64
  # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
  ifeq ($(UNAME_S),Darwin)
@@ -87,9 +101,6 @@ CC   := riscv64-unknown-linux-gnu-gcc
  CXX    := riscv64-unknown-linux-gnu-g++
  endif
  
-CCV := $(shell $(CC) --version | head -n 1)
-CXXV := $(shell $(CXX) --version | head -n 1)
-
  #
  # Compile flags
  #
@@ -173,20 +184,33 @@ ifdef LLAMA_DISABLE_LOGS
  endif # LLAMA_DISABLE_LOGS
  
  # warnings
-MK_CFLAGS    += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
-                               -Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
-MK_CXXFLAGS  += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
-
-# TODO(cebtenzzre): remove this once PR #2632 gets merged
-TTFS_CXXFLAGS = $(CXXFLAGS) -Wno-missing-declarations
-
-ifneq '' '$(findstring clang,$(shell $(CXX) --version))'
-       # clang++ only
-       MK_CXXFLAGS   += -Wmissing-prototypes
-       TTFS_CXXFLAGS += -Wno-missing-prototypes
+WARN_FLAGS    = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
+MK_CFLAGS    += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
+                               -Werror=implicit-function-declaration
+MK_CXXFLAGS  += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
+
+ifeq ($(CC_IS_CLANG), 1)
+       # clang options
+       MK_CFLAGS        += -Wunreachable-code-break -Wunreachable-code-return
+       MK_HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
+
+       ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))'
+               MK_CFLAGS += -Wdouble-promotion
+       endif
+       ifneq '' '$(and $(CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 070300)))'
+               MK_CFLAGS += -Wdouble-promotion
+       endif
  else
-       # g++ only
-       MK_CXXFLAGS += -Wno-format-truncation -Wno-array-bounds
+       # gcc options
+       MK_CFLAGS        += -Wdouble-promotion
+       MK_HOST_CXXFLAGS += -Wno-array-bounds
+
+       ifeq ($(shell expr $(CC_VER) \>= 070100), 1)
+               MK_HOST_CXXFLAGS += -Wno-format-truncation
+       endif
+       ifeq ($(shell expr $(CC_VER) \>= 080100), 1)
+               MK_HOST_CXXFLAGS += -Wextra-semi
+       endif
  endif
  
  # OS specific
@@ -382,7 +406,7 @@ ifdef LLAMA_CUDA_CCBIN
         NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
  endif
  ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
-       $(NVCC) $(NVCCFLAGS) -Wno-pedantic -c $< -o $@
+       $(NVCC) $(NVCCFLAGS) -c $< -o $@
  endif # LLAMA_CUBLAS
  
  ifdef LLAMA_CLBLAST
@@ -472,8 +496,8 @@ $(info I CFLAGS:    $(CFLAGS))
  $(info I CXXFLAGS:  $(CXXFLAGS))
  $(info I NVCCFLAGS: $(NVCCFLAGS))
  $(info I LDFLAGS:   $(LDFLAGS))
-$(info I CC:        $(CCV))
-$(info I CXX:       $(CXXV))
+$(info I CC:        $(shell $(CC) --version | head -n 1))
+$(info I CXX:       $(shell $(CXX) --version | head -n 1))
  $(info )
  
  #
@@ -554,7 +578,7 @@ gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
  
  train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o train.o $(OBJS)
-       $(CXX) $(TTFS_CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+       $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
  
  convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
@@ -601,11 +625,18 @@ tests: $(TEST_TARGETS)
  
  benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
+run-benchmark-matmult: benchmark-matmult
         ./$@
  
+.PHONY: run-benchmark-matmult
+
  vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
         $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
  
+q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
+       $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
+
  tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o common.o grammar-parser.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
  
diff --git a/common/common.cpp b/common/common.cpp

index 6e8c08cb883873358ce7241db087c1d353332e34..ec181c6b3b61a81ff9a5de852c7bd09f483c7b04 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -755,10 +755,9 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
          case 7: return "He";
          case 8: return "She";
          case 9: return "They";
-        default: return "To";
      }
  
-    return "The";
+    GGML_UNREACHABLE();
  }
  
  //
diff --git a/common/log.h b/common/log.h

index 18f3b9761a7880cf7ff9b23a31d5f91dd945a3b2..b8953fdcadae4a744b20db9d602a82d35ea94c47 100644 (file)
--- a/common/log.h
+++ b/common/log.h
@@ -225,31 +225,31 @@ enum LogTriState
  //  USE LOG() INSTEAD
  //
  #ifndef _MSC_VER
-    #define LOG_IMPL(str, ...)                                                                                          \
-    {                                                                                                               \
+    #define LOG_IMPL(str, ...)                                                                                      \
+    do {                                                                                                            \
          if (LOG_TARGET != nullptr)                                                                                  \
          {                                                                                                           \
              fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
              fflush(LOG_TARGET);                                                                                     \
          }                                                                                                           \
-    }
+    } while (0)
  #else
-    #define LOG_IMPL(str, ...)                                                                                               \
-    {                                                                                                                    \
+    #define LOG_IMPL(str, ...)                                                                                           \
+    do {                                                                                                                 \
          if (LOG_TARGET != nullptr)                                                                                       \
          {                                                                                                                \
              fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
              fflush(LOG_TARGET);                                                                                          \
          }                                                                                                                \
-    }
+    } while (0)
  #endif
  
  // INTERNAL, DO NOT USE
  //  USE LOG_TEE() INSTEAD
  //
  #ifndef _MSC_VER
-    #define LOG_TEE_IMPL(str, ...)                                                                                                          \
-    {                                                                                                                                   \
+    #define LOG_TEE_IMPL(str, ...)                                                                                                      \
+    do {                                                                                                                                \
          if (LOG_TARGET != nullptr)                                                                                                      \
          {                                                                                                                               \
              fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__);                     \
@@ -260,10 +260,10 @@ enum LogTriState
              fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL, __VA_ARGS__); \
              fflush(LOG_TEE_TARGET);                                                                                                     \
          }                                                                                                                               \
-    }
+    } while (0)
  #else
-    #define LOG_TEE_IMPL(str, ...)                                                                                                               \
-    {                                                                                                                                        \
+    #define LOG_TEE_IMPL(str, ...)                                                                                                           \
+    do {                                                                                                                                     \
          if (LOG_TARGET != nullptr)                                                                                                           \
          {                                                                                                                                    \
              fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__);                     \
@@ -274,7 +274,7 @@ enum LogTriState
              fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL "", ##__VA_ARGS__); \
              fflush(LOG_TEE_TARGET);                                                                                                          \
          }                                                                                                                                    \
-    }
+    } while (0)
  #endif
  
  // The '\0' as a last argument, is a trick to bypass the silly
@@ -435,41 +435,41 @@ inline FILE *log_handler() { return log_handler1_impl(); }
  inline void log_test()
  {
      log_disable();
-    LOG("01 Hello World to nobody, because logs are disabled!\n")
+    LOG("01 Hello World to nobody, because logs are disabled!\n");
      log_enable();
-    LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET))
-    LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n")
+    LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET));
+    LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n");
      log_set_target(stderr);
-    LOG("04 Hello World to stderr!\n")
-    LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n")
+    LOG("04 Hello World to stderr!\n");
+    LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n");
      log_set_target(LOG_DEFAULT_FILE_NAME);
-    LOG("06 Hello World to default log file!\n")
+    LOG("06 Hello World to default log file!\n");
      log_set_target(stdout);
-    LOG("07 Hello World to stdout!\n")
+    LOG("07 Hello World to stdout!\n");
      log_set_target(LOG_DEFAULT_FILE_NAME);
-    LOG("08 Hello World to default log file again!\n")
+    LOG("08 Hello World to default log file again!\n");
      log_disable();
-    LOG("09 Hello World _1_ into the void!\n")
+    LOG("09 Hello World _1_ into the void!\n");
      log_enable();
-    LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n")
+    LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n");
      log_disable();
      log_set_target("llama.anotherlog.log");
-    LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n")
+    LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n");
      log_enable();
-    LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n")
+    LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n");
      log_set_target("llama.yetanotherlog.log");
-    LOG("13 Hello World this time in yet new file?\n")
+    LOG("13 Hello World this time in yet new file?\n");
      log_set_target(log_filename_generator("llama_autonamed", "log"));
-    LOG("14 Hello World in log with generated filename!\n")
+    LOG("14 Hello World in log with generated filename!\n");
  #ifdef _MSC_VER
-    LOG_TEE("15 Hello msvc TEE without arguments\n")
-    LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test")
-    LOG_TEELN("17 Hello msvc TEELN without arguments\n")
-    LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test")
-    LOG("19 Hello msvc LOG without arguments\n")
-    LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test")
-    LOGLN("21 Hello msvc LOGLN without arguments\n")
-    LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test")
+    LOG_TEE("15 Hello msvc TEE without arguments\n");
+    LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test");
+    LOG_TEELN("17 Hello msvc TEELN without arguments\n");
+    LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test");
+    LOG("19 Hello msvc LOG without arguments\n");
+    LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test");
+    LOGLN("21 Hello msvc LOGLN without arguments\n");
+    LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test");
  #endif
  }
  
@@ -542,7 +542,7 @@ inline void log_dump_cmdline_impl(int argc, char **argv)
              buf << " " << argv[i];
          }
      }
-    LOGLN("Cmd:%s", buf.str().c_str())
+    LOGLN("Cmd:%s", buf.str().c_str());
  }
  
  #define log_tostr(var) log_var_to_string_impl(var).c_str()
@@ -620,10 +620,10 @@ inline std::string log_var_to_string_impl(const std::vector<int> & var)
  #define LOGLN(...) // dummy stub
  
  #undef LOG_TEE
-#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__); // convert to normal fprintf
+#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
  
  #undef LOG_TEELN
-#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__); // convert to normal fprintf
+#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
  
  #undef LOG_DISABLE
  #define LOG_DISABLE() // dummy stub
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp

index fb1a15c47b6eed5033f705c74d8116085a16f083..8155101d0ab936d8dd6b0a581626523305cf279a 100644 (file)
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -1,9 +1,12 @@
  #include "ggml.h"
  #include "train.h"
+
  #include <vector>
  #include <cassert>
-#include <random>
+#include <cstdlib>
  #include <cstring>
+#include <random>
+#include <vector>
  
  #if defined(_MSC_VER)
  #pragma warning(disable: 4244 4267) // possible loss of data
@@ -64,7 +67,7 @@ static struct ggml_tensor * randomize_tensor(
              break;
          default:
              assert(false);
-    };
+    }
  
      return tensor;
  }
@@ -389,7 +392,7 @@ static void randomize_model_lora(
      free_random_normal_distribution(rnd);
  }
  
-static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
+static void init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
      const auto & hparams = model->hparams;
  
      const uint32_t n_ctx   = hparams.n_ctx;
@@ -415,14 +418,12 @@ static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * mod
  
          if (!cache->ctx) {
              fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__);
-            return false;
+            exit(1);
          }
      }
  
      cache->k = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements);
      cache->v = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements);
-
-    return true;
  }
  
  static bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) {
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp

index 93bb0c8b1916b50990cbdeb5272df0b4fdd4588b..a04115c962655ac70a3de0cd721537ecf40d3f58 100644 (file)
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -655,9 +655,9 @@ struct printer {
      virtual ~printer() {}
  
      FILE * fout;
-    virtual void print_header(const cmd_params & params) { (void) params; };
+    virtual void print_header(const cmd_params & params) { (void) params; }
      virtual void print_test(const test & t) = 0;
-    virtual void print_footer() { };
+    virtual void print_footer() { }
  };
  
  struct csv_printer : public printer {
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index fd506773f74a345c93d9b5d1dde87a46e08f5951..3a4ed3f7814f8ef5de86853f7a33cda4bfcae8dc 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -852,7 +852,7 @@ int main(int argc, char ** argv) {
      llama_backend_free();
  
  #ifndef LOG_DISABLE_LOGS
-    LOG_TEE("Log end\n")
+    LOG_TEE("Log end\n");
  #endif // LOG_DISABLE_LOGS
  
      return 0;
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp

index 1c1d957e63d5b33430ef327ba388d7478071ecfa..c7dd0d894634cbb242a5dcdbb2fbe45cbe8f104e 100644 (file)
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -72,6 +72,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp
  // usage:
  //  ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads]
  //
+[[noreturn]]
  static void usage(const char * executable) {
      printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable);
      printf("  --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp

index a9cf8a38139e36c79129de72c9758bf9e4f76d32..5043f32d0375d568d431778e9bed2d3b76a3b26c 100644 (file)
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -483,7 +483,7 @@ static struct ggml_tensor * llama_build_train_graphs(
  }
  
  #define GGUF_GET_KEY(ctx, dst, func, type, req, key) \
-{ \
+do { \
      const std::string skey(key); \
      const int kid = gguf_find_key(ctx, skey.c_str()); \
      if (kid >= 0) { \
@@ -495,7 +495,7 @@ static struct ggml_tensor * llama_build_train_graphs(
      } else if (req) { \
          die_fmt("key not found in model: %s", skey.c_str()); \
      } \
-}
+} while (0)
  
  static void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model) {
      // NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read
@@ -786,7 +786,7 @@ struct train_params {
      float rope_freq_scale;
  };
  
-struct train_params get_default_train_params() {
+static struct train_params get_default_train_params() {
      struct train_params params;
      params.common = get_default_train_params_common();
      params.fn_vocab_model    = "ggml-vic7b-uncensored-q4_0.bin";
diff --git a/ggml.c b/ggml.c

index 078b2c42252b270ed39981361abb8f047b6d1bff..820fe2e74b0ae7058aa7d930c2e428623c8044a1 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -245,18 +245,18 @@ inline static void * ggml_aligned_malloc(size_t size) {
  //
  
  #define GGML_TENSOR_UNARY_OP_LOCALS \
-    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
-    GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb); \
-    GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne); \
-    GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb);
+    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
+    GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb) \
+    GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne) \
+    GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb)
  
  #define GGML_TENSOR_BINARY_OP_LOCALS \
-    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
-    GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb); \
-    GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); \
-    GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb); \
-    GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne); \
-    GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb);
+    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
+    GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb) \
+    GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
+    GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb) \
+    GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne) \
+    GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb)
  
  #if defined(GGML_USE_ACCELERATE)
  #include <Accelerate/Accelerate.h>
@@ -1866,7 +1866,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
      #define GGML_F16x8_ADD          vaddq_f16
      #define GGML_F16x8_MUL          vmulq_f16
      #define GGML_F16x8_REDUCE(res, x)                             \
-    {                                                             \
+    do {                                                          \
          int offset = GGML_F16_ARR >> 1;                           \
          for (int i = 0; i < offset; ++i) {                        \
              x[i] = vaddq_f16(x[i], x[offset+i]);                  \
@@ -1882,7 +1882,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
          const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 (x[0])); \
          const float32x4_t t1 = vcvt_f32_f16(vget_high_f16(x[0])); \
          res = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1));         \
-    }
+    } while (0)
  
      #define GGML_F16_VEC                GGML_F16x8
      #define GGML_F16_VEC_ZERO           GGML_F16x8_ZERO
@@ -1943,7 +1943,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
  #define GGML_F32x8_ADD     _mm256_add_ps
  #define GGML_F32x8_MUL     _mm256_mul_ps
  #define GGML_F32x8_REDUCE(res, x)                                 \
-{                                                                 \
+do {                                                              \
      int offset = GGML_F32_ARR >> 1;                               \
      for (int i = 0; i < offset; ++i) {                            \
          x[i] = _mm256_add_ps(x[i], x[offset+i]);                  \
@@ -1960,7 +1960,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
                                   _mm256_extractf128_ps(x[0], 1)); \
      const __m128 t1 = _mm_hadd_ps(t0, t0);                        \
      res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1));                     \
-}
+} while (0)
  // TODO: is this optimal ?
  
  #define GGML_F32_VEC        GGML_F32x8
@@ -5154,31 +5154,31 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
              {
                  GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
                  return ((int8_t *)(tensor->data))[i];
-            } break;
+            }
          case GGML_TYPE_I16:
              {
                  GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
                  return ((int16_t *)(tensor->data))[i];
-            } break;
+            }
          case GGML_TYPE_I32:
              {
                  GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
                  return ((int32_t *)(tensor->data))[i];
-            } break;
+            }
          case GGML_TYPE_F16:
              {
                  GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
                  return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
-            } break;
+            }
          case GGML_TYPE_F32:
              {
                  GGML_ASSERT(tensor->nb[0] == sizeof(float));
                  return ((float *)(tensor->data))[i];
-            } break;
+            }
          default:
              {
                  GGML_ASSERT(false);
-            } break;
+            }
      }
  
      return 0.0f;
@@ -5228,29 +5228,17 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
      void * data   = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
      switch (tensor->type) {
          case GGML_TYPE_I8:
-            {
-                return ((int8_t *) data)[0];
-            } break;
+            return ((int8_t *) data)[0];
          case GGML_TYPE_I16:
-            {
-                return ((int16_t *) data)[0];
-            } break;
+            return ((int16_t *) data)[0];
          case GGML_TYPE_I32:
-            {
-                return ((int32_t *) data)[0];
-            } break;
+            return ((int32_t *) data)[0];
          case GGML_TYPE_F16:
-            {
-                return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
-            } break;
+            return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
          case GGML_TYPE_F32:
-            {
-                return ((float *) data)[0];
-            } break;
+            return ((float *) data)[0];
          default:
-            {
-                GGML_ASSERT(false);
-            } break;
+            GGML_ASSERT(false);
      }
  
      return 0.0f;
@@ -5297,31 +5285,31 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
              {
                  GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
                  return ((int8_t *)(tensor->data))[i];
-            } break;
+            }
          case GGML_TYPE_I16:
              {
                  GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
                  return ((int16_t *)(tensor->data))[i];
-            } break;
+            }
          case GGML_TYPE_I32:
              {
                  GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
                  return ((int32_t *)(tensor->data))[i];
-            } break;
+            }
          case GGML_TYPE_F16:
              {
                  GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
                  return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
-            } break;
+            }
          case GGML_TYPE_F32:
              {
                  GGML_ASSERT(tensor->nb[0] == sizeof(float));
                  return ((float *)(tensor->data))[i];
-            } break;
+            }
          default:
              {
                  GGML_ASSERT(false);
-            } break;
+            }
      }
  
      return 0.0f;
@@ -5371,29 +5359,17 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
      void * data   = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
      switch (tensor->type) {
          case GGML_TYPE_I8:
-            {
-                return ((int8_t *) data)[0];
-            } break;
+            return ((int8_t *) data)[0];
          case GGML_TYPE_I16:
-            {
-                return ((int16_t *) data)[0];
-            } break;
+            return ((int16_t *) data)[0];
          case GGML_TYPE_I32:
-            {
-                return ((int32_t *) data)[0];
-            } break;
+            return ((int32_t *) data)[0];
          case GGML_TYPE_F16:
-            {
-                return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
-            } break;
+            return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
          case GGML_TYPE_F32:
-            {
-                return ((float *) data)[0];
-            } break;
+            return ((float *) data)[0];
          default:
-            {
-                GGML_ASSERT(false);
-            } break;
+            GGML_ASSERT(false);
      }
  
      return 0.0f;
@@ -8542,7 +8518,7 @@ static void ggml_compute_forward_dup_f16(
          return;
      }
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
@@ -8813,7 +8789,7 @@ static void ggml_compute_forward_dup_f32(
          return;
      }
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
@@ -9094,7 +9070,7 @@ static void ggml_compute_forward_add_f32(
  
      const int nr  = ggml_nrows(src0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      GGML_ASSERT( nb0 == sizeof(float));
      GGML_ASSERT(nb00 == sizeof(float));
@@ -9167,7 +9143,7 @@ static void ggml_compute_forward_add_f16_f32(
  
      const int nr  = ggml_nrows(src0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      GGML_ASSERT(src0->type == GGML_TYPE_F16);
      GGML_ASSERT(src1->type == GGML_TYPE_F32);
@@ -9221,7 +9197,7 @@ static void ggml_compute_forward_add_f16_f16(
  
      const int nr  = ggml_nrows(src0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      GGML_ASSERT(src0->type == GGML_TYPE_F16);
      GGML_ASSERT(src1->type == GGML_TYPE_F16);
@@ -9272,7 +9248,7 @@ static void ggml_compute_forward_add_q_f32(
  
      const int nr  = ggml_nrows(src0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -9398,7 +9374,7 @@ static void ggml_compute_forward_add1_f32(
  
      const int nr  = ggml_nrows(src0);
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      GGML_ASSERT( nb0 == sizeof(float));
      GGML_ASSERT(nb00 == sizeof(float));
@@ -9453,7 +9429,7 @@ static void ggml_compute_forward_add1_f16_f32(
  
      const int nr  = ggml_nrows(src0);
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      GGML_ASSERT(src0->type == GGML_TYPE_F16);
      GGML_ASSERT(src1->type == GGML_TYPE_F32);
@@ -9503,7 +9479,7 @@ static void ggml_compute_forward_add1_f16_f16(
  
      const int nr  = ggml_nrows(src0);
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      GGML_ASSERT(src0->type == GGML_TYPE_F16);
      GGML_ASSERT(src1->type == GGML_TYPE_F16);
@@ -9553,7 +9529,7 @@ static void ggml_compute_forward_add1_q_f32(
  
      const int nr  = ggml_nrows(src0);
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      const enum ggml_type type = src0->type;
      ggml_to_float_t const dequantize_row_q = type_traits[type].to_float;
@@ -9681,8 +9657,8 @@ static void ggml_compute_forward_acc_f32(
      const int nr = ggml_nrows(src1);
      const int nc = src1->ne[0];
  
-    GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
-    GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb);
+    GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne)
+    GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb)
  
      // src0 and dst as viewed during acc
      const size_t nb0 = ggml_element_size(src0);
@@ -9771,7 +9747,7 @@ static void ggml_compute_forward_sub_f32(
  
      const int nr  = ggml_nrows(src0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      GGML_ASSERT( nb0 == sizeof(float));
      GGML_ASSERT(nb00 == sizeof(float));
@@ -9861,7 +9837,7 @@ static void ggml_compute_forward_mul_f32(
  
      const int64_t nr = ggml_nrows(src0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      GGML_ASSERT( nb0 == sizeof(float));
      GGML_ASSERT(nb00 == sizeof(float));
@@ -9952,7 +9928,7 @@ static void ggml_compute_forward_div_f32(
  
      const int nr  = ggml_nrows(src0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      GGML_ASSERT( nb0 == sizeof(float));
      GGML_ASSERT(nb00 == sizeof(float));
@@ -10161,8 +10137,8 @@ static void ggml_compute_forward_sum_f32(
      assert(ggml_is_scalar(dst));
      assert(src0->nb[0] == sizeof(float));
  
-    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
-    GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb);
+    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+    GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb)
  
      ggml_float sum     = 0;
      ggml_float row_sum = 0;
@@ -10193,8 +10169,8 @@ static void ggml_compute_forward_sum_f16(
  
      assert(src0->nb[0] == sizeof(ggml_fp16_t));
  
-    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
-    GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb);
+    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+    GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb)
  
      float sum = 0;
      float row_sum = 0;
@@ -10247,7 +10223,7 @@ static void ggml_compute_forward_sum_rows_f32(
      GGML_ASSERT(src0->nb[0] == sizeof(float));
      GGML_ASSERT(dst->nb[0] == sizeof(float));
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      GGML_ASSERT(ne0 == 1);
      GGML_ASSERT(ne1 == ne01);
@@ -10297,7 +10273,7 @@ static void ggml_compute_forward_mean_f32(
  
      assert(src0->nb[0] == sizeof(float));
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      assert(ne0 == 1);
      assert(ne1 == ne01);
@@ -10397,7 +10373,7 @@ static void ggml_compute_forward_repeat_f32(
          return;
      }
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      // guaranteed to be an integer due to the check in ggml_can_repeat
      const int nr0 = (int)(ne0/ne00);
@@ -10508,7 +10484,7 @@ static void ggml_compute_forward_repeat_back_f32(
          return;
      }
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      // guaranteed to be an integer due to the check in ggml_can_repeat
      const int nr0 = (int)(ne00/ne0);
@@ -10586,7 +10562,7 @@ static void ggml_compute_forward_concat_f32(
  
      const int ith = params->ith;
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      // TODO: support for transposed / permuted tensors
      GGML_ASSERT(nb0  == sizeof(float));
@@ -11188,7 +11164,7 @@ static void ggml_compute_forward_norm_f32(
      const int ith = params->ith;
      const int nth = params->nth;
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      float eps;
      memcpy(&eps, dst->op_params, sizeof(float));
@@ -11257,7 +11233,7 @@ static void ggml_compute_forward_rms_norm_f32(
      const int ith = params->ith;
      const int nth = params->nth;
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      float eps;
      memcpy(&eps, dst->op_params, sizeof(float));
@@ -11322,7 +11298,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
      const int ith = params->ith;
      const int nth = params->nth;
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      float eps;
      memcpy(&eps, dst->op_params, sizeof(float));
@@ -11497,7 +11473,7 @@ static void ggml_compute_forward_group_norm_f32(
      const int ith = params->ith;
      const int nth = params->nth;
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      const float eps = 1e-6f; // TODO: make this a parameter
  
@@ -11608,7 +11584,7 @@ static void ggml_compute_forward_mul_mat(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -11826,7 +11802,7 @@ static void ggml_compute_forward_out_prod_f32(
      // int64_t t0 = ggml_perf_time_us();
      // UNUSED(t0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -12200,8 +12176,8 @@ static void ggml_compute_forward_set_f32(
      const int nr = ggml_nrows(src1);
      const int nc = src1->ne[0];
  
-    GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
-    GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb);
+    GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne)
+    GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb)
  
      // src0 and dst as viewed during set
      const size_t nb0 = ggml_element_size(src0);
@@ -12588,7 +12564,7 @@ static void ggml_compute_forward_diag_f32(
  
      // TODO: handle transposed/permuted matrices
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      GGML_ASSERT(ne00 == ne0);
      GGML_ASSERT(ne00 == ne1);
@@ -13163,7 +13139,7 @@ static void ggml_compute_forward_rope_f32(
      memcpy(&xpos_base,  (int32_t *) dst->op_params + 6, sizeof(float));
      memcpy(&xpos_down,  (int32_t *) dst->op_params + 7, sizeof(bool));
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
      //printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -13295,7 +13271,7 @@ static void ggml_compute_forward_rope_f16(
      memcpy(&freq_base,  (int32_t *) dst->op_params + 4, sizeof(float));
      memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
      //printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -13458,7 +13434,7 @@ static void ggml_compute_forward_rope_back_f32(
      memcpy(&xpos_base,  (int32_t *) dst->op_params + 6, sizeof(float));
      memcpy(&xpos_down,  (int32_t *) dst->op_params + 7, sizeof(bool));
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
      //printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -13558,7 +13534,7 @@ static void ggml_compute_forward_rope_back_f16(
      const int n_dims = ((int32_t *) dst->op_params)[1];
      const int mode   = ((int32_t *) dst->op_params)[2];
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
      //printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -13672,7 +13648,7 @@ static void ggml_compute_forward_conv_1d_s1_ph_f16_f32(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -13763,7 +13739,7 @@ static void ggml_compute_forward_conv_1d_s1_ph_f32(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -13875,7 +13851,7 @@ static void ggml_compute_forward_conv_1d_s2_ph_f16_f32(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -13966,7 +13942,7 @@ static void ggml_compute_forward_conv_1d_s2_ph_f32(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -14084,7 +14060,7 @@ static void ggml_compute_forward_conv_1d(
          ggml_compute_forward_conv_1d_s2_ph(params, src0, src1, dst);
      } else {
          GGML_ASSERT(false); // only stride 1 and 2 supported
-    };
+    }
  }
  
  // ggml_compute_forward_conv_2d
@@ -14101,7 +14077,7 @@ static void ggml_compute_forward_conv_2d_f16_f32(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -14221,7 +14197,7 @@ static void ggml_compute_forward_conv_transpose_2d(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_BINARY_OP_LOCALS;
+    GGML_TENSOR_BINARY_OP_LOCALS
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -14480,7 +14456,7 @@ static void ggml_compute_forward_upscale_f32(
  
      const int ith = params->ith;
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      const int scale_factor = dst->op_params[0];
  
@@ -14532,14 +14508,14 @@ static void ggml_compute_forward_flash_attn_f32(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_LOCALS(int64_t, neq, q,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nbq, q,   nb);
-    GGML_TENSOR_LOCALS(int64_t, nek, k,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nbk, k,   nb);
-    GGML_TENSOR_LOCALS(int64_t, nev, v,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nbv, v,   nb);
-    GGML_TENSOR_LOCALS(int64_t, ne,  dst, ne);
-    GGML_TENSOR_LOCALS(size_t,  nb,  dst, nb);
+    GGML_TENSOR_LOCALS(int64_t, neq, q,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nbq, q,   nb)
+    GGML_TENSOR_LOCALS(int64_t, nek, k,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nbk, k,   nb)
+    GGML_TENSOR_LOCALS(int64_t, nev, v,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nbv, v,   nb)
+    GGML_TENSOR_LOCALS(int64_t, ne,  dst, ne)
+    GGML_TENSOR_LOCALS(size_t,  nb,  dst, nb)
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -14722,14 +14698,14 @@ static void ggml_compute_forward_flash_attn_f16(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_LOCALS(int64_t, neq, q,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nbq, q,   nb);
-    GGML_TENSOR_LOCALS(int64_t, nek, k,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nbk, k,   nb);
-    GGML_TENSOR_LOCALS(int64_t, nev, v,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nbv, v,   nb);
-    GGML_TENSOR_LOCALS(int64_t, ne,  dst, ne);
-    GGML_TENSOR_LOCALS(size_t,  nb,  dst, nb);
+    GGML_TENSOR_LOCALS(int64_t, neq, q,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nbq, q,   nb)
+    GGML_TENSOR_LOCALS(int64_t, nek, k,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nbk, k,   nb)
+    GGML_TENSOR_LOCALS(int64_t, nev, v,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nbv, v,   nb)
+    GGML_TENSOR_LOCALS(int64_t, ne,  dst, ne)
+    GGML_TENSOR_LOCALS(size_t,  nb,  dst, nb)
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -14974,18 +14950,18 @@ static void ggml_compute_forward_flash_ff_f16(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_LOCALS(int64_t, nea,  a,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nba,  a,   nb);
-    GGML_TENSOR_LOCALS(int64_t, neb0, b0,  ne);
-    GGML_TENSOR_LOCALS(size_t,  nbb0, b0,  nb);
-    GGML_TENSOR_LOCALS(int64_t, neb1, b1,  ne);
-    GGML_TENSOR_LOCALS(size_t,  nbb1, b1,  nb);
-    GGML_TENSOR_LOCALS(int64_t, nec0, c0,  ne);
-    GGML_TENSOR_LOCALS(size_t,  nbc0, c0,  nb);
-    GGML_TENSOR_LOCALS(int64_t, nec1, c1,  ne);
-    GGML_TENSOR_LOCALS(size_t,  nbc1, c1,  nb);
-    GGML_TENSOR_LOCALS(int64_t, ne,   dst, ne);
-    GGML_TENSOR_LOCALS(size_t,  nb,   dst, nb);
+    GGML_TENSOR_LOCALS(int64_t, nea,  a,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nba,  a,   nb)
+    GGML_TENSOR_LOCALS(int64_t, neb0, b0,  ne)
+    GGML_TENSOR_LOCALS(size_t,  nbb0, b0,  nb)
+    GGML_TENSOR_LOCALS(int64_t, neb1, b1,  ne)
+    GGML_TENSOR_LOCALS(size_t,  nbb1, b1,  nb)
+    GGML_TENSOR_LOCALS(int64_t, nec0, c0,  ne)
+    GGML_TENSOR_LOCALS(size_t,  nbc0, c0,  nb)
+    GGML_TENSOR_LOCALS(int64_t, nec1, c1,  ne)
+    GGML_TENSOR_LOCALS(size_t,  nbc1, c1,  nb)
+    GGML_TENSOR_LOCALS(int64_t, ne,   dst, ne)
+    GGML_TENSOR_LOCALS(size_t,  nb,   dst, nb)
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -15133,16 +15109,16 @@ static void ggml_compute_forward_flash_attn_back_f32(
      int64_t t0 = ggml_perf_time_us();
      UNUSED(t0);
  
-    GGML_TENSOR_LOCALS(int64_t, neq, q,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nbq, q,   nb);
-    GGML_TENSOR_LOCALS(int64_t, nek, k,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nbk, k,   nb);
-    GGML_TENSOR_LOCALS(int64_t, nev, v,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nbv, v,   nb);
-    GGML_TENSOR_LOCALS(int64_t, ned, d,   ne);
-    GGML_TENSOR_LOCALS(size_t,  nbd, d,   nb);
-    GGML_TENSOR_LOCALS(int64_t, ne,  dst, ne);
-    GGML_TENSOR_LOCALS(size_t,  nb,  dst, nb);
+    GGML_TENSOR_LOCALS(int64_t, neq, q,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nbq, q,   nb)
+    GGML_TENSOR_LOCALS(int64_t, nek, k,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nbk, k,   nb)
+    GGML_TENSOR_LOCALS(int64_t, nev, v,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nbv, v,   nb)
+    GGML_TENSOR_LOCALS(int64_t, ned, d,   ne)
+    GGML_TENSOR_LOCALS(size_t,  nbd, d,   nb)
+    GGML_TENSOR_LOCALS(int64_t, ne,  dst, ne)
+    GGML_TENSOR_LOCALS(size_t,  nb,  dst, nb)
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -15505,8 +15481,8 @@ static void ggml_compute_forward_win_part_f32(
          return;
      }
  
-    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
-    GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne);
+    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+    GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne)
  
      const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
      const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
@@ -15567,8 +15543,8 @@ static void ggml_compute_forward_win_unpart_f32(
          return;
      }
  
-    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
-    GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne);
+    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+    GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne)
  
      const int32_t w = ((const int32_t *)(dst->op_params))[0];
  
@@ -15685,7 +15661,7 @@ static void ggml_compute_forward_get_rel_pos_f16(
  
      // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L292-L322
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
+    GGML_TENSOR_UNARY_OP_LOCALS
  
      const int64_t w = ne1;
  
@@ -19637,7 +19613,7 @@ static enum ggml_opt_result linesearch_backtracking(
          (*step) *= width;
      }
  
-    return GGML_LINESEARCH_FAIL;
+    GGML_UNREACHABLE();
  }
  
  static enum ggml_opt_result ggml_opt_lbfgs(
@@ -19904,7 +19880,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
          step[0] = 1.0;
      }
  
-    return GGML_OPT_DID_NOT_CONVERGE;
+    GGML_UNREACHABLE();
  }
  
  struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
@@ -20638,10 +20614,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                                  } break;
                              case GGUF_TYPE_ARRAY:
                              case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
-                        };
+                        }
                      } break;
                  case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
-            };
+            }
  
              if (!ok) {
                  break;
@@ -21369,10 +21345,10 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
                              } break;
                          case GGUF_TYPE_ARRAY:
                          case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
-                    };
+                    }
                  } break;
              case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
-        };
+        }
      }
  
      // write tensor infos
diff --git a/ggml.h b/ggml.h

index d61c28b2cd96a185195a86d95a95c6face3bb2db..460857fa4cbd1a74eab4ad053a3d3bf0d69f4ea3 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -248,6 +248,14 @@
          } \
      } while (0)
  
+#ifndef NDEBUG
+#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
+#elif defined(__GNUC__)
+#define GGML_UNREACHABLE() __builtin_unreachable()
+#else
+#define GGML_UNREACHABLE() ((void) 0)
+#endif
+
  // used to copy the number of elements and stride in bytes of tensors into local variables.
  // main purpose is to reduce code duplication and improve readability.
  //
diff --git a/llama.cpp b/llama.cpp

index 685712d172666d42e6200b166e5cd1d7836c3f76..666acc21275327c12cf201e0d562322b6ae288c7 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -449,7 +449,7 @@ struct LLM_TN {
  //
  
  #define GGUF_GET_KEY(ctx, dst, func, type, req, key) \
-{ \
+do { \
      const std::string skey(key); \
      const int kid = gguf_find_key(ctx, skey.c_str()); \
      if (kid >= 0) { \
@@ -461,7 +461,7 @@ struct LLM_TN {
      } else if (req) { \
          throw std::runtime_error(format("key not found in model: %s", skey.c_str())); \
      } \
-}
+} while (0)
  
  //
  // ggml helpers
@@ -1913,7 +1913,7 @@ static void llm_load_hparams(
                  }
              } break;
          default: (void)0;
-    };
+    }
  
      model.ftype = ml.ftype;
  }
@@ -2438,7 +2438,7 @@ static void llm_load_tensors(
                  } break;
              default:
                  throw std::runtime_error("unknown architecture");
-        };
+        }
      }
  
      ml.done_getting_tensors();
@@ -3981,7 +3981,7 @@ static struct ggml_cgraph * llama_build_graph(
              } break;
          default:
              GGML_ASSERT(false);
-    };
+    }
  
      return result;
  }
@@ -4626,7 +4626,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
                  llm_tokenizer_bpe tokenizer(vocab);
                  tokenizer.tokenize(raw_text, output);
              } break;
-    };
+    }
  
      return output;
  }
@@ -7520,7 +7520,7 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
              buf[2] = '\x85';
              return 3;
          } else if (llama_is_control_token(model->vocab, token)) {
-            ;
+            // do nothing
          } else if (llama_is_byte_token(model->vocab, token)) {
              if (length < 1) {
                  return -1;
diff --git a/pocs/vdot/q8dot.cpp b/pocs/vdot/q8dot.cpp

index 4e0e023575322fda13c7d4a359fd3093c0534188..111770d5519cbfd08c41247e7950e629d6488a3c 100644 (file)
--- a/pocs/vdot/q8dot.cpp
+++ b/pocs/vdot/q8dot.cpp
@@ -43,7 +43,7 @@ static_assert(QK4_1 == QK8_0, "QK4_1 and QK8_0 must be the same");
  static_assert(QK4_0 == QK8_0, "QK4_0 and QK8_0 must be the same");
  
  template <typename T>
-void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
+static void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
      for (auto& b : blocks) {
          b.d = 1;
          for (int i=0; i<QK4_1/2; ++i) {
@@ -54,7 +54,7 @@ void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
      }
  }
  
-void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
+static void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
      for (auto& b : blocks) {
          b.d = 1;
          int sum = 0;
@@ -66,7 +66,7 @@ void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
      }
  }
  
-float simpleDot(const block_q4_0& x, const block_q8_0& y) {
+static float simpleDot(const block_q4_0& x, const block_q8_0& y) {
      int s1 = 0; //, s2 = 0;
      for (int i=0; i<QK4_1/2; i+=2) {
          int v1 = x.qs[i+0] & 0xf;
@@ -81,7 +81,7 @@ float simpleDot(const block_q4_0& x, const block_q8_0& y) {
      //return y.d * x.d * (s1 - 8 * s2);
  }
  
-float simpleDot(const block_q4_1& x, const block_q8_0& y) {
+static float simpleDot(const block_q4_1& x, const block_q8_0& y) {
      int s1 = 0; //, s2 = 0;
      for (int i=0; i<QK4_1/2; i+=2) {
          int v1 = x.qs[i+0] & 0xf;
diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp

index 4f49dc55aa487123d03790402d9760dd2cb64fd8..c3cd73bcbed2b3ba2255f3b3c8e1dd25809b9d7b 100644 (file)
--- a/tests/test-grad0.cpp
+++ b/tests/test-grad0.cpp
@@ -107,7 +107,7 @@ static struct ggml_tensor * get_random_tensor_f32(
              break;
          default:
              assert(false);
-    };
+    }
  
      return result;
  }
@@ -155,7 +155,7 @@ static struct ggml_tensor * get_random_tensor_f16(
              break;
          default:
              assert(false);
-    };
+    }
  
      return result;
  }
@@ -203,7 +203,7 @@ static struct ggml_tensor * get_random_tensor_i32(
              break;
          default:
              assert(false);
-    };
+    }
  
      return result;
  }
diff --git a/tests/test-opt.cpp b/tests/test-opt.cpp

index ce49768584be01ac394ef535643f35e94850d2f4..fb4e0be98d4bca2cbcb81fd9e9f314396120c436 100644 (file)
--- a/tests/test-opt.cpp
+++ b/tests/test-opt.cpp
@@ -101,7 +101,7 @@ static struct ggml_tensor * get_random_tensor(
              break;
          default:
              assert(false);
-    };
+    }
  
      return result;
  }
@@ -124,7 +124,7 @@ int main(void) {
      struct ggml_context * ctx = ggml_init(params);
  
      int64_t ne1[4] = {4, 128, 1, 1};
-    int64_t ne2[4] = {4, 256, 1, 1};;
+    int64_t ne2[4] = {4, 256, 1, 1};
      int64_t ne3[4] = {128, 256, 1, 1};
  
      struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1);
author	Cebtenzzre <redacted>
	Thu, 28 Sep 2023 21:41:44 +0000 (17:41 -0400)
committer	GitHub <redacted>
	Thu, 28 Sep 2023 21:41:44 +0000 (17:41 -0400)
.gitignore		patch \| blob \| history
CMakeLists.txt		patch \| blob \| history
Makefile		patch \| blob \| history
common/common.cpp		patch \| blob \| history
common/log.h		patch \| blob \| history
examples/baby-llama/baby-llama.cpp		patch \| blob \| history
examples/llama-bench/llama-bench.cpp		patch \| blob \| history
examples/main/main.cpp		patch \| blob \| history
examples/quantize/quantize.cpp		patch \| blob \| history
examples/train-text-from-scratch/train-text-from-scratch.cpp		patch \| blob \| history
ggml.c		patch \| blob \| history
ggml.h		patch \| blob \| history
llama.cpp		patch \| blob \| history
pocs/vdot/q8dot.cpp		patch \| blob \| history
tests/test-grad0.cpp		patch \| blob \| history
tests/test-opt.cpp		patch \| blob \| history