From: Cebtenzzre Date: Mon, 28 Aug 2023 11:44:18 +0000 (-0400) Subject: cmake : enable some basic warnings globally (#482) X-Git-Tag: upstream/0.0.1642~1259 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=3545eeb48a1a2cacd6e17e91505f42d8d6cbe55e;p=pkg%2Fggml%2Fsources%2Fggml cmake : enable some basic warnings globally (#482) * cmake : make -Werror=vla global * cmake : make -Wuninitialized global (part of -Wall) * tests : fix some -Wunused warnings This flag is not enabled by default. There are still some warnings remaining. * cmake : make -Wsign-compare global (part of -Wall) * cmake : make -Wall global (minus -Wunused) * cmake : make -Wstrict-prototypes global * cmake : add -Wpedantic -Wformat=2 globally --------- Co-authored-by: Georgi Gerganov --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 155e3d39..6589e78b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,20 @@ endif() # warning flags +if (GGML_ALL_WARNINGS) + if (NOT MSVC) + set(c_flags -Wall -Wpedantic -Wformat=2 -Wno-unused -Wstrict-prototypes) + set(cxx_flags -Wall -Wpedantic -Wformat=2) + else() + # todo : windows + endif() + + add_compile_options( + "$<$:${c_flags}>" + "$<$:${cxx_flags}>" + ) +endif() + if (NOT MSVC) add_compile_options(-Werror=vla) endif() diff --git a/examples/dolly-v2/main.cpp b/examples/dolly-v2/main.cpp index a09cad61..18ad1ad8 100644 --- a/examples/dolly-v2/main.cpp +++ b/examples/dolly-v2/main.cpp @@ -705,8 +705,8 @@ std::string execute_prompt( params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int)embd_inp.size()); printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i = 0; i < embd_inp.size(); i++) { - printf("%s: token[%d] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str()); + for (size_t i = 0; i < embd_inp.size(); i++) { + printf("%s: token[%zu] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str()); } printf("\n"); @@ -716,7 +716,7 @@ std::string execute_prompt( const int32_t end_token = vocab.token_to_id["### End"]; - for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { + for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); @@ -754,9 +754,9 @@ std::string execute_prompt( embd.push_back(id); } else { // if here, it means we are still processing the input prompt - for (int k = i; k < embd_inp.size(); k++) { + for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); - if (embd.size() > params.n_batch) { + if (int32_t(embd.size()) > params.n_batch) { break; } } @@ -875,7 +875,7 @@ int main(int argc, char ** argv) { } #if defined(DOLLY_INTERACTIVE_PORT) - int sockfd; + int sockfd = -1; if (params.interactive_port != -1) { sockfd = setup_port(params.interactive_port); if (sockfd == -1) { @@ -890,7 +890,7 @@ int main(int argc, char ** argv) { while (true) { std::string prompt_input; #if defined(DOLLY_INTERACTIVE_PORT) - int clientfd; + int clientfd = -1; if (params.interactive_port != -1) { sockaddr_in clientaddr; socklen_t clientaddrlen = sizeof(clientaddr); diff --git a/examples/gpt-2/main.cpp b/examples/gpt-2/main.cpp index ed405002..14caf2cc 100644 --- a/examples/gpt-2/main.cpp +++ b/examples/gpt-2/main.cpp @@ -816,7 +816,7 @@ int main(int argc, char ** argv) { // this reduces the memory usage during inference, at the cost of a bit of speed at the beginning std::vector embd; - for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { + for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); @@ -854,9 +854,9 @@ int main(int argc, char ** argv) { embd.push_back(id); } else { // if here, it means we are still processing the input prompt - for (int k = i; k < embd_inp.size(); k++) { + for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); - if (embd.size() >= params.n_batch) { + if (int32_t(embd.size()) >= params.n_batch) { break; } } diff --git a/examples/gpt-j/main.cpp b/examples/gpt-j/main.cpp index b23ad3d2..d5fca51b 100644 --- a/examples/gpt-j/main.cpp +++ b/examples/gpt-j/main.cpp @@ -671,7 +671,7 @@ int main(int argc, char ** argv) { size_t mem_per_token = 0; gptj_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); - for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { + for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); @@ -709,9 +709,9 @@ int main(int argc, char ** argv) { embd.push_back(id); } else { // if here, it means we are still processing the input prompt - for (int k = i; k < embd_inp.size(); k++) { + for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); - if (embd.size() > params.n_batch) { + if (int32_t(embd.size()) > params.n_batch) { break; } } diff --git a/examples/gpt-neox/main.cpp b/examples/gpt-neox/main.cpp index 80ee6643..68ba723d 100644 --- a/examples/gpt-neox/main.cpp +++ b/examples/gpt-neox/main.cpp @@ -726,8 +726,8 @@ int main(int argc, char ** argv) { params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i = 0; i < embd_inp.size(); i++) { - printf("%s: token[%d] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str()); + for (size_t i = 0; i < embd_inp.size(); i++) { + printf("%s: token[%zu] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str()); } printf("\n"); @@ -737,7 +737,7 @@ int main(int argc, char ** argv) { size_t mem_per_token = 0; gpt_neox_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); - for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { + for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); @@ -775,9 +775,9 @@ int main(int argc, char ** argv) { embd.push_back(id); } else { // if here, it means we are still processing the input prompt - for (int k = i; k < embd_inp.size(); k++) { + for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); - if (embd.size() > params.n_batch) { + if (int32_t(embd.size()) > params.n_batch) { break; } } diff --git a/examples/mnist/main-cpu.cpp b/examples/mnist/main-cpu.cpp index ba0c3136..6e1e3980 100644 --- a/examples/mnist/main-cpu.cpp +++ b/examples/mnist/main-cpu.cpp @@ -42,7 +42,7 @@ int mnist_eval( struct ggml_cgraph gfi = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval); // param export/import test - GGML_ASSERT(ggml_graph_get_tensor(&gfi, "fc1_bias")->op_params[0] == 0xdeadbeef); + GGML_ASSERT(ggml_graph_get_tensor(&gfi, "fc1_bias")->op_params[0] == int(0xdeadbeef)); // allocate work context // needed during ggml_graph_compute() to allocate a work tensor diff --git a/examples/mpt/main.cpp b/examples/mpt/main.cpp index 2fda67cc..5fec3c12 100644 --- a/examples/mpt/main.cpp +++ b/examples/mpt/main.cpp @@ -243,7 +243,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo // Convert token from utf-8 std::wstring word_multibytes = convert_to_wstring(word); word.resize(word_multibytes.size()); - for (int w = 0; w < word_multibytes.size(); w++) { + for (size_t w = 0; w < word_multibytes.size(); w++) { word[w] = uint8_t(word_multibytes[w]); } diff --git a/examples/replit/main.cpp b/examples/replit/main.cpp index 3fb664d8..bd614399 100644 --- a/examples/replit/main.cpp +++ b/examples/replit/main.cpp @@ -52,9 +52,9 @@ std::pair, float> encode_word(const std::string & word, std::vector best_segmentations_scores(word.length() + 1, -std::numeric_limits::infinity()); best_segmentations_scores[0] = 1.0; - for (int start_idx = 0; start_idx < word.length(); ++start_idx) { + for (size_t start_idx = 0; start_idx < word.length(); ++start_idx) { float best_score_at_start = best_segmentations_scores[start_idx]; - for (int end_idx = start_idx + 1; end_idx <= word.length(); ++end_idx) { + for (size_t end_idx = start_idx + 1; end_idx <= word.length(); ++end_idx) { std::string token = word.substr(start_idx, end_idx - start_idx); if (model.count(token) && best_score_at_start != -std::numeric_limits::infinity()) { float token_score = model.at(token).second; @@ -92,7 +92,7 @@ bool replit_tokenizer_load(replit_tokenizer & tokenizer, std::istream & fin, int std::string word; std::vector buf(128); - for (std::size_t i = 0; i < max_vocab_size; i++) { + for (int i = 0; i < max_vocab_size; i++) { uint32_t len; fin.read((char *)&len, sizeof(len)); @@ -702,8 +702,8 @@ int main(int argc, char ** argv) { printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i = 0; i < embd_inp.size(); i++) { - printf("%s: token[%d] = %6zu\n", __func__, i, embd_inp[i]); + for (size_t i = 0; i < embd_inp.size(); i++) { + printf("%s: token[%zu] = %6zu\n", __func__, i, embd_inp[i]); // vocab.id_to_token.at(embd_inp[i]).c_str() } printf("\n"); @@ -716,7 +716,7 @@ int main(int argc, char ** argv) { size_t mem_per_token = 0; replit_eval(model, params.n_threads, 0, {0, 1, 2, 3}, logits, false, mem_per_token); - for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { + for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); @@ -755,9 +755,9 @@ int main(int argc, char ** argv) { embd.push_back(id); } else { // if here, it means we are still processing the input prompt - for (int k = i; k < embd_inp.size(); k++) { + for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); - if (embd.size() > params.n_batch) { + if (int32_t(embd.size()) > params.n_batch) { break; } } diff --git a/examples/sam/main.cpp b/examples/sam/main.cpp index c1fff545..f5715691 100644 --- a/examples/sam/main.cpp +++ b/examples/sam/main.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -1027,7 +1028,7 @@ bool sam_model_load(const std::string & fname, sam_model & model) { } } - if (n_tensors != model.tensors.size()) { + if (n_tensors != ptrdiff_t(model.tensors.size())) { fprintf(stderr, "%s: model file has %d tensors, but %d tensors were expected\n", __func__, n_tensors, (int) model.tensors.size()); return false; } diff --git a/examples/starcoder/main.cpp b/examples/starcoder/main.cpp index 56576a66..548ebed7 100644 --- a/examples/starcoder/main.cpp +++ b/examples/starcoder/main.cpp @@ -146,7 +146,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp } // Add StarChat special tokens. - for (const std::string & token : { + for (std::string token : { "<|system|>", "<|user|>", "<|assistant|>", @@ -809,8 +809,8 @@ int main(int argc, char ** argv) { printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i = 0; i < embd_inp.size(); i++) { - printf("%s: token[%d] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str()); + for (size_t i = 0; i < embd_inp.size(); i++) { + printf("%s: token[%zu] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str()); } printf("\n\n"); @@ -836,7 +836,7 @@ int main(int argc, char ** argv) { size_t mem_per_token = 0; starcoder_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); - for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { + for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); @@ -876,13 +876,13 @@ int main(int argc, char ** argv) { last_n_tokens.push_back(id); } else { // if here, it means we are still processing the input prompt - for (int k = i; k < embd_inp.size(); k++) { + for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.push_back(embd_inp[k]); - if (embd.size() >= params.n_batch) { + if (int32_t(embd.size()) >= params.n_batch) { break; } } diff --git a/examples/starcoder/starcoder-mmap.cpp b/examples/starcoder/starcoder-mmap.cpp index b7d26f47..b8692694 100644 --- a/examples/starcoder/starcoder-mmap.cpp +++ b/examples/starcoder/starcoder-mmap.cpp @@ -264,7 +264,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp } // Add StarChat special tokens. - for (const std::string & token : { + for (std::string token : { "<|system|>", "<|user|>", "<|assistant|>", @@ -1009,8 +1009,8 @@ int main(int argc, char ** argv) { printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i = 0; i < embd_inp.size(); i++) { - printf("%s: token[%d] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str()); + for (size_t i = 0; i < embd_inp.size(); i++) { + printf("%s: token[%zu] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str()); } printf("\n\n"); @@ -1032,7 +1032,7 @@ int main(int argc, char ** argv) { printf("Calling starcoder_eval\n"); starcoder_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); - for (int i = int(embd.size()); i < embd_inp.size() + params.n_predict; i++) { + for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); @@ -1073,9 +1073,9 @@ int main(int argc, char ** argv) { embd.push_back(id); } else { // if here, it means we are still processing the input prompt - for (int k = i; k < embd_inp.size(); k++) { + for (size_t k = i; k < embd_inp.size(); k++) { embd.push_back(embd_inp[k]); - if (embd.size() >= params.n_batch) { + if (int32_t(embd.size()) >= params.n_batch) { break; } } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b329c08e..81d04151 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,14 +1,10 @@ if (GGML_ALL_WARNINGS) - if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang") - #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra") + if (NOT MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \ - -Wall \ + -Wunused \ -Wextra \ - -Wpedantic \ -Wshadow \ -Wcast-qual \ - -Wstrict-prototypes \ - -Wpointer-arith \ -Wdouble-promotion \ -Wno-unused-function \ -Wmissing-prototypes \ diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp index 75a698d7..8b912bae 100644 --- a/tests/test-grad0.cpp +++ b/tests/test-grad0.cpp @@ -208,26 +208,6 @@ static struct ggml_tensor * get_random_tensor_i32( return result; } -static void print_elements(const char* label, const struct ggml_tensor * t) { - if (!t) { - printf("%s: %s = null\n", __func__, label); - return; - } - const int nelements = ggml_nelements(t); - printf("%s: %s = [", __func__, label); - for (int k = 0; k < nelements; ++k) { - if (k > 0) { printf(", "); } - printf("%.5f", ggml_get_f32_1d(t, k)); - } - printf("] shape: ["); - for (int k = 0; k < t->n_dims; ++k) { - if (k > 0) { printf(", "); } - printf("%d", (int)t->ne[k]); - } - printf("]\n"); - -} - static bool check_gradient( const char * op_name, struct ggml_context * ctx0, diff --git a/tests/test-mul-mat0.c b/tests/test-mul-mat0.c index 1bd6e140..6212da41 100644 --- a/tests/test-mul-mat0.c +++ b/tests/test-mul-mat0.c @@ -13,7 +13,7 @@ #define MAX_NARGS 2 -float frand() { +float frand(void) { return (float)rand()/(float)RAND_MAX; } @@ -163,10 +163,6 @@ bool check_mat_mul( const struct ggml_tensor * y, const struct ggml_tensor * x0, const struct ggml_tensor * x1) { - float * dst = (float *) y->data; - float * src0 = (float *) x0->data; - float * src1 = (float *) x1->data; - const int64_t n00 = x0->ne[0]; const int64_t n10 = x0->ne[1]; const int64_t n20 = x0->ne[2]; diff --git a/tests/test-mul-mat2.c b/tests/test-mul-mat2.c index 944c48e9..89af2863 100644 --- a/tests/test-mul-mat2.c +++ b/tests/test-mul-mat2.c @@ -54,7 +54,7 @@ const int K = 1280; #define gq_t_bits 64 #define gq_quant_t uint64_t -float frand() { +float frand(void) { return (float) rand() / (float) RAND_MAX; } @@ -127,7 +127,7 @@ static inline int quantize_1_blocks_per_row(int k) { return k/QK; } -static inline int quantize_1_quants_per_block() { +static inline int quantize_1_quants_per_block(void) { return QK/gq_t_bits; } @@ -286,7 +286,7 @@ static inline int quantize_2_blocks_per_row(int k) { return k/QK; } -static inline int quantize_2_quants_per_block() { +static inline int quantize_2_quants_per_block(void) { return QK/gq_t_bits; } @@ -662,9 +662,6 @@ void mul_mat_gq_2( int m, int n, int k) { assert(k % QK == 0); - const int nb = quantize_2_blocks_per_row(k); - const int nq = quantize_2_quants_per_block(); - for (int ir0 = 0; ir0 < m; ir0++) { for (int ir1 = 0; ir1 < n; ir1++) { vec_dot_gq_2(k, dst + ir1, src0, src1); @@ -686,7 +683,7 @@ static inline int quantize_3_blocks_per_row(int k) { return k/QK; } -static inline int quantize_3_quants_per_block() { +static inline int quantize_3_quants_per_block(void) { return QK/gq_t_bits; } @@ -2355,8 +2352,6 @@ void mul_mat_gq_6( int m, int n, int k) { assert(k % 32 == 0); - const int nb = quantize_6_blocks_per_row(k); - for (int ir0 = 0; ir0 < m; ir0++) { for (int ir1 = 0; ir1 < n; ir1++) { vec_dot_gq_6(k, dst + ir1, src0, src1); diff --git a/tests/test-vec1.c b/tests/test-vec1.c index fefcd68f..567cb061 100644 --- a/tests/test-vec1.c +++ b/tests/test-vec1.c @@ -460,7 +460,7 @@ void mul_mat_vec_f16_3( } } -uint64_t get_time_us() { +uint64_t get_time_us(void) { struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec * 1000000 + tv.tv_usec;