From: Cebtenzzre <redacted>
Date: Mon, 28 Aug 2023 11:44:18 +0000 (-0400)
Subject: cmake : enable some basic warnings globally (#482)
X-Git-Tag: upstream/0.0.1642~1259
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=3545eeb48a1a2cacd6e17e91505f42d8d6cbe55e;p=pkg%2Fggml%2Fsources%2Fggml

cmake : enable some basic warnings globally (#482)

* cmake : make -Werror=vla global

* cmake : make -Wuninitialized global (part of -Wall)

* tests : fix some -Wunused warnings

This flag is not enabled by default. There are still some warnings
remaining.

* cmake : make -Wsign-compare global (part of -Wall)

* cmake : make -Wall global (minus -Wunused)

* cmake : make -Wstrict-prototypes global

* cmake : add -Wpedantic -Wformat=2 globally

---------

Co-authored-by: Georgi Gerganov <redacted>
---

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 155e3d39..6589e78b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -69,6 +69,20 @@ endif()
 
 # warning flags
 
+if (GGML_ALL_WARNINGS)
+    if (NOT MSVC)
+        set(c_flags   -Wall -Wpedantic -Wformat=2 -Wno-unused -Wstrict-prototypes)
+        set(cxx_flags -Wall -Wpedantic -Wformat=2)
+    else()
+        # todo : windows
+    endif()
+
+    add_compile_options(
+        "$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
+        "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
+    )
+endif()
+
 if (NOT MSVC)
     add_compile_options(-Werror=vla)
 endif()
diff --git a/examples/dolly-v2/main.cpp b/examples/dolly-v2/main.cpp
index a09cad61..18ad1ad8 100644
--- a/examples/dolly-v2/main.cpp
+++ b/examples/dolly-v2/main.cpp
@@ -705,8 +705,8 @@ std::string execute_prompt(
     params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int)embd_inp.size());
 
     printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
-    for (int i = 0; i < embd_inp.size(); i++) {
-        printf("%s: token[%d] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
+    for (size_t i = 0; i < embd_inp.size(); i++) {
+        printf("%s: token[%zu] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
     }
     printf("\n");
 
@@ -716,7 +716,7 @@ std::string execute_prompt(
 
     const int32_t end_token = vocab.token_to_id["### End"];
 
-    for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
+    for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
         // predict
         if (embd.size() > 0) {
             const int64_t t_start_us = ggml_time_us();
@@ -754,9 +754,9 @@ std::string execute_prompt(
             embd.push_back(id);
         } else {
             // if here, it means we are still processing the input prompt
-            for (int k = i; k < embd_inp.size(); k++) {
+            for (size_t k = i; k < embd_inp.size(); k++) {
                 embd.push_back(embd_inp[k]);
-                if (embd.size() > params.n_batch) {
+                if (int32_t(embd.size()) > params.n_batch) {
                     break;
                 }
             }
@@ -875,7 +875,7 @@ int main(int argc, char ** argv) {
     }
 
 #if defined(DOLLY_INTERACTIVE_PORT)
-    int sockfd;
+    int sockfd = -1;
     if (params.interactive_port != -1) {
         sockfd = setup_port(params.interactive_port);
         if (sockfd == -1) {
@@ -890,7 +890,7 @@ int main(int argc, char ** argv) {
         while (true) {
             std::string prompt_input;
 #if defined(DOLLY_INTERACTIVE_PORT)
-            int clientfd;
+            int clientfd = -1;
             if (params.interactive_port != -1) {
                 sockaddr_in clientaddr;
                 socklen_t clientaddrlen = sizeof(clientaddr);
diff --git a/examples/gpt-2/main.cpp b/examples/gpt-2/main.cpp
index ed405002..14caf2cc 100644
--- a/examples/gpt-2/main.cpp
+++ b/examples/gpt-2/main.cpp
@@ -816,7 +816,7 @@ int main(int argc, char ** argv) {
     // this reduces the memory usage during inference, at the cost of a bit of speed at the beginning
     std::vector<gpt_vocab::id> embd;
 
-    for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
+    for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
         // predict
         if (embd.size() > 0) {
             const int64_t t_start_us = ggml_time_us();
@@ -854,9 +854,9 @@ int main(int argc, char ** argv) {
             embd.push_back(id);
         } else {
             // if here, it means we are still processing the input prompt
-            for (int k = i; k < embd_inp.size(); k++) {
+            for (size_t k = i; k < embd_inp.size(); k++) {
                 embd.push_back(embd_inp[k]);
-                if (embd.size() >= params.n_batch) {
+                if (int32_t(embd.size()) >= params.n_batch) {
                     break;
                 }
             }
diff --git a/examples/gpt-j/main.cpp b/examples/gpt-j/main.cpp
index b23ad3d2..d5fca51b 100644
--- a/examples/gpt-j/main.cpp
+++ b/examples/gpt-j/main.cpp
@@ -671,7 +671,7 @@ int main(int argc, char ** argv) {
     size_t mem_per_token = 0;
     gptj_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
 
-    for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
+    for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
         // predict
         if (embd.size() > 0) {
             const int64_t t_start_us = ggml_time_us();
@@ -709,9 +709,9 @@ int main(int argc, char ** argv) {
             embd.push_back(id);
         } else {
             // if here, it means we are still processing the input prompt
-            for (int k = i; k < embd_inp.size(); k++) {
+            for (size_t k = i; k < embd_inp.size(); k++) {
                 embd.push_back(embd_inp[k]);
-                if (embd.size() > params.n_batch) {
+                if (int32_t(embd.size()) > params.n_batch) {
                     break;
                 }
             }
diff --git a/examples/gpt-neox/main.cpp b/examples/gpt-neox/main.cpp
index 80ee6643..68ba723d 100644
--- a/examples/gpt-neox/main.cpp
+++ b/examples/gpt-neox/main.cpp
@@ -726,8 +726,8 @@ int main(int argc, char ** argv) {
     params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
 
     printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
-    for (int i = 0; i < embd_inp.size(); i++) {
-        printf("%s: token[%d] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
+    for (size_t i = 0; i < embd_inp.size(); i++) {
+        printf("%s: token[%zu] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
     }
     printf("\n");
 
@@ -737,7 +737,7 @@ int main(int argc, char ** argv) {
     size_t mem_per_token = 0;
     gpt_neox_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
 
-    for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
+    for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
         // predict
         if (embd.size() > 0) {
             const int64_t t_start_us = ggml_time_us();
@@ -775,9 +775,9 @@ int main(int argc, char ** argv) {
             embd.push_back(id);
         } else {
             // if here, it means we are still processing the input prompt
-            for (int k = i; k < embd_inp.size(); k++) {
+            for (size_t k = i; k < embd_inp.size(); k++) {
                 embd.push_back(embd_inp[k]);
-                if (embd.size() > params.n_batch) {
+                if (int32_t(embd.size()) > params.n_batch) {
                     break;
                 }
             }
diff --git a/examples/mnist/main-cpu.cpp b/examples/mnist/main-cpu.cpp
index ba0c3136..6e1e3980 100644
--- a/examples/mnist/main-cpu.cpp
+++ b/examples/mnist/main-cpu.cpp
@@ -42,7 +42,7 @@ int mnist_eval(
     struct ggml_cgraph gfi = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);
 
     // param export/import test
-    GGML_ASSERT(ggml_graph_get_tensor(&gfi, "fc1_bias")->op_params[0] == 0xdeadbeef);
+    GGML_ASSERT(ggml_graph_get_tensor(&gfi, "fc1_bias")->op_params[0] == int(0xdeadbeef));
 
     // allocate work context
     // needed during ggml_graph_compute() to allocate a work tensor
diff --git a/examples/mpt/main.cpp b/examples/mpt/main.cpp
index 2fda67cc..5fec3c12 100644
--- a/examples/mpt/main.cpp
+++ b/examples/mpt/main.cpp
@@ -243,7 +243,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
             // Convert token from utf-8
             std::wstring word_multibytes = convert_to_wstring(word);
             word.resize(word_multibytes.size());
-            for (int w = 0; w < word_multibytes.size(); w++) {
+            for (size_t w = 0; w < word_multibytes.size(); w++) {
                 word[w] = uint8_t(word_multibytes[w]);
             }
 
diff --git a/examples/replit/main.cpp b/examples/replit/main.cpp
index 3fb664d8..bd614399 100644
--- a/examples/replit/main.cpp
+++ b/examples/replit/main.cpp
@@ -52,9 +52,9 @@ std::pair<std::vector<std::size_t>, float> encode_word(const std::string & word,
     std::vector<float> best_segmentations_scores(word.length() + 1, -std::numeric_limits<float>::infinity());
     best_segmentations_scores[0] = 1.0;
 
-    for (int start_idx = 0; start_idx < word.length(); ++start_idx) {
+    for (size_t start_idx = 0; start_idx < word.length(); ++start_idx) {
         float best_score_at_start = best_segmentations_scores[start_idx];
-        for (int end_idx = start_idx + 1; end_idx <= word.length(); ++end_idx) {
+        for (size_t end_idx = start_idx + 1; end_idx <= word.length(); ++end_idx) {
             std::string token = word.substr(start_idx, end_idx - start_idx);
             if (model.count(token) && best_score_at_start != -std::numeric_limits<float>::infinity()) {
                 float token_score = model.at(token).second;
@@ -92,7 +92,7 @@ bool replit_tokenizer_load(replit_tokenizer & tokenizer, std::istream & fin, int
     std::string word;
     std::vector<char> buf(128);
 
-    for (std::size_t i = 0; i < max_vocab_size; i++) {
+    for (int i = 0; i < max_vocab_size; i++) {
         uint32_t len;
         fin.read((char *)&len, sizeof(len));
 
@@ -702,8 +702,8 @@ int main(int argc, char ** argv) {
 
     printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
 
-    for (int i = 0; i < embd_inp.size(); i++) {
-        printf("%s: token[%d] = %6zu\n", __func__, i, embd_inp[i]);
+    for (size_t i = 0; i < embd_inp.size(); i++) {
+        printf("%s: token[%zu] = %6zu\n", __func__, i, embd_inp[i]);
         // vocab.id_to_token.at(embd_inp[i]).c_str()
     }
     printf("\n");
@@ -716,7 +716,7 @@ int main(int argc, char ** argv) {
     size_t mem_per_token = 0;
     replit_eval(model, params.n_threads, 0, {0, 1, 2, 3}, logits, false, mem_per_token);
 
-    for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
+    for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
         // predict
         if (embd.size() > 0) {
             const int64_t t_start_us = ggml_time_us();
@@ -755,9 +755,9 @@ int main(int argc, char ** argv) {
             embd.push_back(id);
         } else {
             // if here, it means we are still processing the input prompt
-            for (int k = i; k < embd_inp.size(); k++) {
+            for (size_t k = i; k < embd_inp.size(); k++) {
                 embd.push_back(embd_inp[k]);
-                if (embd.size() > params.n_batch) {
+                if (int32_t(embd.size()) > params.n_batch) {
                     break;
                 }
             }
diff --git a/examples/sam/main.cpp b/examples/sam/main.cpp
index c1fff545..f5715691 100644
--- a/examples/sam/main.cpp
+++ b/examples/sam/main.cpp
@@ -11,6 +11,7 @@
 
 #include <cassert>
 #include <cmath>
+#include <cstddef>
 #include <cstdio>
 #include <cstring>
 #include <fstream>
@@ -1027,7 +1028,7 @@ bool sam_model_load(const std::string & fname, sam_model & model) {
             }
         }
 
-        if (n_tensors != model.tensors.size()) {
+        if (n_tensors != ptrdiff_t(model.tensors.size())) {
             fprintf(stderr, "%s: model file has %d tensors, but %d tensors were expected\n", __func__, n_tensors, (int) model.tensors.size());
             return false;
         }
diff --git a/examples/starcoder/main.cpp b/examples/starcoder/main.cpp
index 56576a66..548ebed7 100644
--- a/examples/starcoder/main.cpp
+++ b/examples/starcoder/main.cpp
@@ -146,7 +146,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp
         }
 
         // Add StarChat special tokens.
-        for (const std::string & token : {
+        for (std::string token : {
                 "<|system|>",
                 "<|user|>",
                 "<|assistant|>",
@@ -809,8 +809,8 @@ int main(int argc, char ** argv) {
 
     printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
     printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
-    for (int i = 0; i < embd_inp.size(); i++) {
-        printf("%s: token[%d] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
+    for (size_t i = 0; i < embd_inp.size(); i++) {
+        printf("%s: token[%zu] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
     }
     printf("\n\n");
 
@@ -836,7 +836,7 @@ int main(int argc, char ** argv) {
     size_t mem_per_token = 0;
     starcoder_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
 
-    for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
+    for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
         // predict
         if (embd.size() > 0) {
             const int64_t t_start_us = ggml_time_us();
@@ -876,13 +876,13 @@ int main(int argc, char ** argv) {
             last_n_tokens.push_back(id);
         } else {
             // if here, it means we are still processing the input prompt
-            for (int k = i; k < embd_inp.size(); k++) {
+            for (size_t k = i; k < embd_inp.size(); k++) {
                 embd.push_back(embd_inp[k]);
 
                 last_n_tokens.erase(last_n_tokens.begin());
                 last_n_tokens.push_back(embd_inp[k]);
 
-                if (embd.size() >= params.n_batch) {
+                if (int32_t(embd.size()) >= params.n_batch) {
                     break;
                 }
             }
diff --git a/examples/starcoder/starcoder-mmap.cpp b/examples/starcoder/starcoder-mmap.cpp
index b7d26f47..b8692694 100644
--- a/examples/starcoder/starcoder-mmap.cpp
+++ b/examples/starcoder/starcoder-mmap.cpp
@@ -264,7 +264,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp
         }
 
         // Add StarChat special tokens.
-        for (const std::string & token : {
+        for (std::string token : {
                 "<|system|>",
                 "<|user|>",
                 "<|assistant|>",
@@ -1009,8 +1009,8 @@ int main(int argc, char ** argv) {
 
     printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
     printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
-    for (int i = 0; i < embd_inp.size(); i++) {
-        printf("%s: token[%d] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
+    for (size_t i = 0; i < embd_inp.size(); i++) {
+        printf("%s: token[%zu] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
     }
     printf("\n\n");
 
@@ -1032,7 +1032,7 @@ int main(int argc, char ** argv) {
     printf("Calling starcoder_eval\n");
     starcoder_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
 
-    for (int i = int(embd.size()); i < embd_inp.size() + params.n_predict; i++) {
+    for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
         // predict
         if (embd.size() > 0) {
             const int64_t t_start_us = ggml_time_us();
@@ -1073,9 +1073,9 @@ int main(int argc, char ** argv) {
             embd.push_back(id);
         } else {
             // if here, it means we are still processing the input prompt
-            for (int k = i; k < embd_inp.size(); k++) {
+            for (size_t k = i; k < embd_inp.size(); k++) {
                 embd.push_back(embd_inp[k]);
-                if (embd.size() >= params.n_batch) {
+                if (int32_t(embd.size()) >= params.n_batch) {
                     break;
                 }
             }
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b329c08e..81d04151 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,14 +1,10 @@
 if (GGML_ALL_WARNINGS)
-    if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
-        #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra")
+    if (NOT MSVC)
         set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
-            -Wall                           \
+            -Wunused                        \
             -Wextra                         \
-            -Wpedantic                      \
             -Wshadow                        \
             -Wcast-qual                     \
-            -Wstrict-prototypes             \
-            -Wpointer-arith                 \
             -Wdouble-promotion              \
             -Wno-unused-function            \
             -Wmissing-prototypes            \
diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp
index 75a698d7..8b912bae 100644
--- a/tests/test-grad0.cpp
+++ b/tests/test-grad0.cpp
@@ -208,26 +208,6 @@ static struct ggml_tensor * get_random_tensor_i32(
     return result;
 }
 
-static void print_elements(const char* label, const struct ggml_tensor * t) {
-    if (!t) {
-        printf("%s: %s = null\n", __func__, label);
-        return;
-    }
-    const int nelements = ggml_nelements(t);
-    printf("%s: %s = [", __func__, label);
-    for (int k = 0; k < nelements; ++k) {
-        if (k > 0) { printf(", "); }
-        printf("%.5f", ggml_get_f32_1d(t, k));
-    }
-    printf("] shape: [");
-    for (int k = 0; k < t->n_dims; ++k) {
-        if (k > 0) { printf(", "); }
-        printf("%d", (int)t->ne[k]);
-    }
-    printf("]\n");
-
-}
-
 static bool check_gradient(
         const char * op_name,
         struct ggml_context * ctx0,
diff --git a/tests/test-mul-mat0.c b/tests/test-mul-mat0.c
index 1bd6e140..6212da41 100644
--- a/tests/test-mul-mat0.c
+++ b/tests/test-mul-mat0.c
@@ -13,7 +13,7 @@
 
 #define MAX_NARGS 2
 
-float frand() {
+float frand(void) {
     return (float)rand()/(float)RAND_MAX;
 }
 
@@ -163,10 +163,6 @@ bool check_mat_mul(
         const struct ggml_tensor * y,
         const struct ggml_tensor * x0,
         const struct ggml_tensor * x1) {
-    float * dst  = (float *) y->data;
-    float * src0 = (float *) x0->data;
-    float * src1 = (float *) x1->data;
-
     const int64_t n00 = x0->ne[0];
     const int64_t n10 = x0->ne[1];
     const int64_t n20 = x0->ne[2];
diff --git a/tests/test-mul-mat2.c b/tests/test-mul-mat2.c
index 944c48e9..89af2863 100644
--- a/tests/test-mul-mat2.c
+++ b/tests/test-mul-mat2.c
@@ -54,7 +54,7 @@ const int K = 1280;
 #define gq_t_bits 64
 #define gq_quant_t uint64_t
 
-float frand() {
+float frand(void) {
     return (float) rand() / (float) RAND_MAX;
 }
 
@@ -127,7 +127,7 @@ static inline int quantize_1_blocks_per_row(int k) {
     return k/QK;
 }
 
-static inline int quantize_1_quants_per_block() {
+static inline int quantize_1_quants_per_block(void) {
     return QK/gq_t_bits;
 }
 
@@ -286,7 +286,7 @@ static inline int quantize_2_blocks_per_row(int k) {
     return k/QK;
 }
 
-static inline int quantize_2_quants_per_block() {
+static inline int quantize_2_quants_per_block(void) {
     return QK/gq_t_bits;
 }
 
@@ -662,9 +662,6 @@ void mul_mat_gq_2(
     int m, int n, int k) {
     assert(k % QK == 0);
 
-    const int nb = quantize_2_blocks_per_row(k);
-    const int nq = quantize_2_quants_per_block();
-
     for (int ir0 = 0; ir0 < m; ir0++) {
         for (int ir1 = 0; ir1 < n; ir1++) {
             vec_dot_gq_2(k, dst + ir1, src0, src1);
@@ -686,7 +683,7 @@ static inline int quantize_3_blocks_per_row(int k) {
     return k/QK;
 }
 
-static inline int quantize_3_quants_per_block() {
+static inline int quantize_3_quants_per_block(void) {
     return QK/gq_t_bits;
 }
 
@@ -2355,8 +2352,6 @@ void mul_mat_gq_6(
     int m, int n, int k) {
     assert(k % 32 == 0);
 
-    const int nb = quantize_6_blocks_per_row(k);
-
     for (int ir0 = 0; ir0 < m; ir0++) {
         for (int ir1 = 0; ir1 < n; ir1++) {
             vec_dot_gq_6(k, dst + ir1, src0, src1);
diff --git a/tests/test-vec1.c b/tests/test-vec1.c
index fefcd68f..567cb061 100644
--- a/tests/test-vec1.c
+++ b/tests/test-vec1.c
@@ -460,7 +460,7 @@ void mul_mat_vec_f16_3(
     }
 }
 
-uint64_t get_time_us() {
+uint64_t get_time_us(void) {
     struct timeval tv;
     gettimeofday(&tv, NULL);
     return tv.tv_sec * 1000000 + tv.tv_usec;