examples : fix build + compile warnings (close #1256)

author Georgi Gerganov <redacted>

Thu, 7 Sep 2023 09:33:12 +0000 (12:33 +0300)

committer Georgi Gerganov <redacted>

Thu, 7 Sep 2023 09:33:12 +0000 (12:33 +0300)
author Georgi Gerganov <redacted>
Thu, 7 Sep 2023 09:33:12 +0000 (12:33 +0300)
committer Georgi Gerganov <redacted>
Thu, 7 Sep 2023 09:33:12 +0000 (12:33 +0300)
diff --git a/examples/common.cpp b/examples/common.cpp

index 2b8da8f734219bc72f7ac0921d6c97dc8f1c7cd7..11064b8d339a95dcd112ff5b2ab2fcb3d9aff272 100644 (file)
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -792,7 +792,7 @@ bool sam_params_parse(int argc, char ** argv, sam_params & params) {
      return true;
  }
  
-void sam_print_usage(int argc, char ** argv, const sam_params & params) {
+void sam_print_usage(int /*argc*/, char ** argv, const sam_params & params) {
      fprintf(stderr, "usage: %s [options]\n", argv[0]);
      fprintf(stderr, "\n");
      fprintf(stderr, "options:\n");
diff --git a/examples/lsp/lsp.cpp b/examples/lsp/lsp.cpp

index d866b660057e653e99778040d85a44db65eec542..b8001b95702f1c1b141b22a5327806458477478d 100644 (file)
--- a/examples/lsp/lsp.cpp
+++ b/examples/lsp/lsp.cpp
@@ -324,12 +324,12 @@ json register_commandset(struct whisper_context * ctx, json jparams, std::vector
      commandset_list.push_back(cs);
      return json{{"index",index}};
  }
-json seek(struct whisper_context * ctx, audio_async &audio, json params) {
+json seek(struct whisper_context * /*ctx*/, audio_async & /*audio*/, json /*params*/) {
      // whisper_state has the pertinent offsets, but there also seem to be a large
      // number of scratch buffers that would prevent rewinding context in a manner similar to llama
      // I'll give this a another pass once everything else is implemented,
      // but for now, it's unsupported
-    throw json{
+    throw json {
          {"code", -32601},
              {"message", "Seeking is not yet supported."}
      };
@@ -412,7 +412,7 @@ void process_loop(struct whisper_context * ctx, audio_async &audio, const whispe
              jobqueue.pop_front();
              // send response
              std::string data = resp.dump(-1, ' ', false, json::error_handler_t::replace);
-            fprintf(stdout, "Content-Length: %d\r\n\r\n%s\n", data.length()+1, data.c_str());
+            fprintf(stdout, "Content-Length: %d\r\n\r\n%s\n", (int)data.length()+1, data.c_str());
              std::cout.flush();
  
          }
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index fa399c6d78114a9d886e626b3c3bf4b48a68451a..60c1cca756a683de1321e72a38838957e464b629 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -260,7 +260,7 @@ std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s
  
      return speaker;
  }
-void whisper_print_progress_callback(struct whisper_context * ctx, struct whisper_state * /*state*/, int progress, void * user_data) {
+void whisper_print_progress_callback(struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
      int progress_step = ((whisper_print_user_data *) user_data)->params->progress_step;
      int * progress_prev  = &(((whisper_print_user_data *) user_data)->progress_prev);
      if (progress >= *progress_prev + progress_step) {
@@ -492,7 +492,7 @@ bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_
      return true;
  }
  
-bool output_score(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
+bool output_score(struct whisper_context * ctx, const char * fname, const whisper_params & /*params*/, std::vector<std::vector<float>> /*pcmf32s*/) {
      std::ofstream fout(fname);
      fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
  
diff --git a/examples/talk-llama/llama.cpp b/examples/talk-llama/llama.cpp

index 77550faa43c22b598f2b90e971f0b71185225858..aecae009d056d0002947d74e0498e379c138a185 100644 (file)
--- a/examples/talk-llama/llama.cpp
+++ b/examples/talk-llama/llama.cpp
@@ -1164,7 +1164,7 @@ static bool llama_eval_internal(
      const llama_token * tokens,
              const int   n_tokens,
              const int   n_past,
-            const int   n_threads) {
+                  int   n_threads) {
  
      // enforce that the first token is BOS
      if (n_past == 0 && tokens[0] != llama_token_bos()) {
@@ -1190,6 +1190,8 @@ static bool llama_eval_internal(
      const int n_vocab = hparams.n_vocab;
      const int n_rot   = hparams.n_embd/hparams.n_head;
  
+    const float eps = 5e-6f; // TODO: take from hparams
+
      auto & mem_per_token = lctx.mem_per_token;
      auto & buf_compute   = lctx.buf_compute;
  
@@ -1204,7 +1206,7 @@ static bool llama_eval_internal(
      // for big prompts, if BLAS is enabled, it is better to use only one thread
      // otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance
      ggml_cgraph gf = {};
-    gf.n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads;
+    n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads;
  
      struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
      ggml_set_name(embd, "embd");
@@ -1221,7 +1223,7 @@ static bool llama_eval_internal(
  
          // norm
          {
-            cur = ggml_rms_norm(ctx0, inpL);
+            cur = ggml_rms_norm(ctx0, inpL, eps);
  
              // cur = cur*attention_norm(broadcasted)
              cur = ggml_mul(ctx0, cur, model.layers[il].attention_norm);
@@ -1329,7 +1331,7 @@ static bool llama_eval_internal(
          {
              // norm
              {
-                cur = ggml_rms_norm(ctx0, inpFF);
+                cur = ggml_rms_norm(ctx0, inpFF, eps);
  
                  // cur = cur*ffn_norm(broadcasted)
                  cur = ggml_mul(ctx0, cur, model.layers[il].ffn_norm);
@@ -1367,7 +1369,7 @@ static bool llama_eval_internal(
      // norm
      {
  
-        inpL = ggml_rms_norm(ctx0, inpL);
+        inpL = ggml_rms_norm(ctx0, inpL, eps);
  
          // inpL = inpL*norm(broadcasted)
          inpL = ggml_mul(ctx0, inpL, model.norm);
@@ -1384,8 +1386,8 @@ static bool llama_eval_internal(
      //inpL = ggml_soft_max_inplace(ctx0, inpL);
  
      // run the computation
-    ggml_build_forward_expand(&gf, inpL);
-    ggml_graph_compute       (ctx0, &gf);
+    ggml_build_forward_expand  (&gf, inpL);
+    ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  
  #ifdef GGML_PERF
      // print timing information per ggml operation (for debugging purposes)
@@ -2488,8 +2490,7 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
              }
  
              struct ggml_cgraph gf = ggml_build_forward(r);
-            gf.n_threads = n_threads;
-            ggml_graph_compute(lora_ctx, &gf);
+            ggml_graph_compute_with_ctx(lora_ctx, &gf, n_threads);
  
              // we won't need these tensors again, reset the context to save memory
              ggml_free(lora_ctx);
@@ -2635,7 +2636,6 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
  
              ggml_context * cpy_ctx = ggml_init({ sizeof(buffer), buffer, /* no_alloc */ true });
              ggml_cgraph gf{};
-            gf.n_threads = 1;
  
              ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_ntok, n_layer);
              kout3d->data = out;
@@ -2655,7 +2655,7 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
  
              ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d));
              ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d));
-            ggml_graph_compute(cpy_ctx, &gf);
+            ggml_graph_compute_with_ctx(cpy_ctx, &gf, 1);
  
              ggml_free(cpy_ctx);
          }
@@ -2743,7 +2743,6 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
  
              ggml_context * cpy_ctx = ggml_init({ sizeof(buffer), buffer, /* no_alloc */ true });
              ggml_cgraph gf{};
-            gf.n_threads = 1;
  
              ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_ntok, n_layer);
              kin3d->data = (void *) inp;
@@ -2763,7 +2762,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
  
              ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d));
              ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d));
-            ggml_graph_compute(cpy_ctx, &gf);
+            ggml_graph_compute_with_ctx(cpy_ctx, &gf, 1);
  
              ggml_free(cpy_ctx);
          }
diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp

index 57a02eacf627068d0092641a4c51e8e652c63504..61d8583d24c7a8ab5ea0ebad7ae51bc30d811e72 100644 (file)
--- a/examples/talk-llama/talk-llama.cpp
+++ b/examples/talk-llama/talk-llama.cpp
@@ -649,7 +649,10 @@ int main(int argc, char ** argv) {
                  }
  
                  text_to_speak = ::replace(text_to_speak, "\"", "");
-                system((params.speak + " " + std::to_string(voice_id) + " \"" + text_to_speak + "\"").c_str());
+                int ret = system((params.speak + " " + std::to_string(voice_id) + " \"" + text_to_speak + "\"").c_str());
+                if (ret != 0) {
+                    fprintf(stderr, "%s: failed to speak\n", __func__);
+                }
  
                  audio.clear();
  
diff --git a/examples/talk/gpt-2.cpp b/examples/talk/gpt-2.cpp

index 1ee4e728c15b56223db7a2ee880baf3392991057..a2319db6be6a10a6edac2346a2b8897b57c0a8d7 100644 (file)
--- a/examples/talk/gpt-2.cpp
+++ b/examples/talk/gpt-2.cpp
@@ -191,9 +191,9 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
      // create the ggml context
      {
          struct ggml_init_params params = {
-            .mem_size   = ctx_size,
-            .mem_buffer = NULL,
-            .no_alloc   = false,
+            /*.mem_size   =*/ ctx_size,
+            /*.mem_buffer =*/ NULL,
+            /*.no_alloc   =*/ false,
          };
  
          model.ctx = ggml_init(params);
diff --git a/examples/talk/talk.cpp b/examples/talk/talk.cpp

index 651ca2008f3e426d063bd1f625c7d5b2e0415605..85c103dae87eaf740dcdc7d8255f0945a516bef7 100644 (file)
--- a/examples/talk/talk.cpp
+++ b/examples/talk/talk.cpp
@@ -349,7 +349,10 @@ int main(int argc, char ** argv) {
                  gpt2_set_prompt(ctx_gpt, prompt_base.c_str());
  
                  text_to_speak = ::replace(text_to_speak, params.person + ": ", "");
-                system((params.speak + " " + std::to_string(voice_id) + " \"" + text_to_speak + "\"").c_str());
+                int ret = system((params.speak + " " + std::to_string(voice_id) + " \"" + text_to_speak + "\"").c_str());
+                if (ret != 0) {
+                    fprintf(stderr, "%s: system() failed!\n", __func__);
+                }
  
                  audio.clear();
author	Georgi Gerganov <redacted>
	Thu, 7 Sep 2023 09:33:12 +0000 (12:33 +0300)
committer	Georgi Gerganov <redacted>
	Thu, 7 Sep 2023 09:33:12 +0000 (12:33 +0300)
examples/common.cpp		patch \| blob \| history
examples/lsp/lsp.cpp		patch \| blob \| history
examples/main/main.cpp		patch \| blob \| history
examples/talk-llama/llama.cpp		patch \| blob \| history
examples/talk-llama/talk-llama.cpp		patch \| blob \| history
examples/talk/gpt-2.cpp		patch \| blob \| history
examples/talk/talk.cpp		patch \| blob \| history