llama : have n_batch default to 512 (#1091)

author eiery <redacted>

Sat, 22 Apr 2023 08:27:05 +0000 (04:27 -0400)

committer GitHub <redacted>

Sat, 22 Apr 2023 08:27:05 +0000 (11:27 +0300)
author eiery <redacted>
Sat, 22 Apr 2023 08:27:05 +0000 (04:27 -0400)
committer GitHub <redacted>
Sat, 22 Apr 2023 08:27:05 +0000 (11:27 +0300)
diff --git a/examples/common.h b/examples/common.h

index cbbc2dfab16de11969f67e0fcfd8cd15b7f5e62b..0470368d58acbc0004478f3cd5132dabd4590204 100644 (file)
--- a/examples/common.h
+++ b/examples/common.h
@@ -20,7 +20,7 @@ struct gpt_params {
      int32_t repeat_last_n = 64;   // last n tokens to penalize
      int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
      int32_t n_ctx         = 512;  // context size
-    int32_t n_batch       = 8;    // batch size for prompt processing
+    int32_t n_batch       = 512;  // batch size for prompt processing (must be >=32 to use BLAS)
      int32_t n_keep        = 0;    // number of tokens to keep from initial prompt
  
      // sampling parameters