Fix issue where interactive mode crashes when input exceeds ctx size (#1789)

author Kerfuffle <redacted>

Sun, 11 Jun 2023 14:19:17 +0000 (08:19 -0600)

committer GitHub <redacted>

Sun, 11 Jun 2023 14:19:17 +0000 (08:19 -0600)
author Kerfuffle <redacted>
Sun, 11 Jun 2023 14:19:17 +0000 (08:19 -0600)
committer GitHub <redacted>
Sun, 11 Jun 2023 14:19:17 +0000 (08:19 -0600)
diff --git a/examples/common.cpp b/examples/common.cpp

index f5d886acf653931290e35b13d8d313a85c2dcff3..df69f2736406abe46699801adfcb948e29bfb1bc 100644 (file)
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -632,6 +632,9 @@ void console_set_color(console_state & con_st, console_color_t color) {
              case CONSOLE_COLOR_USER_INPUT:
                  fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_GREEN);
                  break;
+            case CONSOLE_COLOR_ERROR:
+                fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_RED);
+                break;
          }
          con_st.color = color;
          fflush(con_st.out);
diff --git a/examples/common.h b/examples/common.h

index 826e2ae59cec1ad93b3b2e28692415b5e8f5c850..6fedb414a7659f6d48833348654f14f0baa2409c 100644 (file)
--- a/examples/common.h
+++ b/examples/common.h
@@ -112,7 +112,8 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params);
  enum console_color_t {
      CONSOLE_COLOR_DEFAULT=0,
      CONSOLE_COLOR_PROMPT,
-    CONSOLE_COLOR_USER_INPUT
+    CONSOLE_COLOR_USER_INPUT,
+    CONSOLE_COLOR_ERROR
  };
  
  struct console_state {
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index de63faa3eea767d1e9ffa7f6580877e93768ddc0..66d563143a5c624c7a2222f4fccd865148e0ae76 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -81,6 +81,9 @@ int main(int argc, char ** argv) {
      if (params.n_ctx > 2048) {
          fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
                  "expect poor results\n", __func__, params.n_ctx);
+    } else if (params.n_ctx < 8) {
+        fprintf(stderr, "%s: warning: minimum context size is 8, using minimum size.\n", __func__);
+        params.n_ctx = 8;
      }
  
      fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
@@ -331,6 +334,19 @@ int main(int argc, char ** argv) {
      while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
          // predict
          if (embd.size() > 0) {
+            // Note: n_ctx - 4 here is to match the logic for commandline prompt handling via
+            // --prompt or --file which uses the same value.
+            auto max_embd_size = n_ctx - 4;
+            // Ensure the input doesn't exceed the context size by truncating embd if necessary.
+            if ((int)embd.size() > max_embd_size) {
+                auto skipped_tokens = embd.size() - max_embd_size;
+                console_set_color(con_st, CONSOLE_COLOR_ERROR);
+                printf("<<input too long: skipped %ld token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
+                console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
+                fflush(stdout);
+                embd.resize(max_embd_size);
+            }
+
              // infinite text generation via context swapping
              // if we run out of context:
              // - take the n_keep first tokens from the original prompt (via n_past)
author	Kerfuffle <redacted>
	Sun, 11 Jun 2023 14:19:17 +0000 (08:19 -0600)
committer	GitHub <redacted>
	Sun, 11 Jun 2023 14:19:17 +0000 (08:19 -0600)
examples/common.cpp		patch \| blob \| history
examples/common.h		patch \| blob \| history
examples/main/main.cpp		patch \| blob \| history