server: fixed wrong variable name in timing json (#2579)

author Equim <redacted>

Fri, 11 Aug 2023 22:35:14 +0000 (06:35 +0800)

committer GitHub <redacted>

Fri, 11 Aug 2023 22:35:14 +0000 (00:35 +0200)
author Equim <redacted>
Fri, 11 Aug 2023 22:35:14 +0000 (06:35 +0800)
committer GitHub <redacted>
Fri, 11 Aug 2023 22:35:14 +0000 (00:35 +0200)
diff --git a/examples/server/server.cpp b/examples/server/server.cpp

index 637f6d6c26ff856519fbcef79fde0b8765185281..2340f93aca75840d9f47d0fe17c936719bdce692 100644 (file)
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1008,7 +1008,7 @@ static json format_timings(llama_server_context &llama)
      assert(timings.n_eval == llama.num_tokens_predicted);
  
      return json{
-        {"prompt_n", timings.n_eval},
+        {"prompt_n", timings.n_p_eval},
          {"prompt_ms", timings.t_p_eval_ms},
          {"prompt_per_token_ms", timings.t_p_eval_ms / timings.n_p_eval},
          {"prompt_per_second", 1e3 / timings.t_p_eval_ms * timings.n_p_eval},
@@ -1037,7 +1037,6 @@ static json format_final_response(llama_server_context &llama, const std::string
          {"stopped_limit", llama.stopped_limit},
          {"stopping_word", llama.stopping_word},
          {"tokens_cached", llama.n_past},
-        {"tokens_predicted", llama.num_tokens_predicted},
          {"timings", format_timings(llama)},
      };