speculative : support `--color` (#4343)

author stduhpf <redacted>

Wed, 6 Dec 2023 08:08:17 +0000 (09:08 +0100)

committer GitHub <redacted>

Wed, 6 Dec 2023 08:08:17 +0000 (10:08 +0200)
author stduhpf <redacted>
Wed, 6 Dec 2023 08:08:17 +0000 (09:08 +0100)
committer GitHub <redacted>
Wed, 6 Dec 2023 08:08:17 +0000 (10:08 +0200)
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp

index ace755c51d8a317319e53c71f3ff3fe47f88e3df..dca3f84a5756273071819c965c6eed8cbf978473 100644 (file)
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@@ -203,8 +203,9 @@ int main(int argc, char ** argv) {
  
              const std::string token_str = llama_token_to_piece(ctx_tgt, id);
  
-            printf("%s", token_str.c_str());
-            fflush(stdout);
+            if (!params.use_color) {
+                printf("%s", token_str.c_str());
+            }
  
              if (id == llama_token_eos(model_tgt)) {
                  has_eos = true;
@@ -236,10 +237,18 @@ int main(int argc, char ** argv) {
                      ++n_past_tgt;
                      ++n_past_dft;
                      ++i_dft;
-
+                    if (params.use_color) {
+                        // Color token according to its origin sequence
+                        printf("\u001b[%dm%s\u001b[37m", (36 - s_keep % 6), token_str.c_str());
+                        fflush(stdout);
+                    }
                      continue;
                  }
              }
+            if (params.use_color) {
+                printf("%s", token_str.c_str());
+            }
+            fflush(stdout);
  
              LOG("the sampled target token (%d, '%s') did not match, or we ran out of drafted tokens\n", id, token_str.c_str());
author	stduhpf <redacted>
	Wed, 6 Dec 2023 08:08:17 +0000 (09:08 +0100)
committer	GitHub <redacted>
	Wed, 6 Dec 2023 08:08:17 +0000 (10:08 +0200)