From: eiery <redacted>
Date: Sat, 22 Apr 2023 08:27:05 +0000 (-0400)
Subject: llama : have n_batch default to 512 (#1091)
X-Git-Tag: gguf-v0.4.0~893
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=10f19c1121068ce3dab9bece03a8b9caaea2db36;p=pkg%2Fggml%2Fsources%2Fllama.cpp

llama : have n_batch default to 512 (#1091)

* set default n_batch to 512 when using BLAS

* spacing

* alternate implementation of setting different n_batch for BLAS

* set n_batch to 512 for all cases
---

diff --git a/examples/common.h b/examples/common.h
index cbbc2dfa..0470368d 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -20,7 +20,7 @@ struct gpt_params {
     int32_t repeat_last_n = 64;   // last n tokens to penalize
     int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
     int32_t n_ctx         = 512;  // context size
-    int32_t n_batch       = 8;    // batch size for prompt processing
+    int32_t n_batch       = 512;  // batch size for prompt processing (must be >=32 to use BLAS)
     int32_t n_keep        = 0;    // number of tokens to keep from initial prompt
 
     // sampling parameters