From: eiery Date: Sat, 22 Apr 2023 08:27:05 +0000 (-0400) Subject: llama : have n_batch default to 512 (#1091) X-Git-Tag: gguf-v0.4.0~893 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=10f19c1121068ce3dab9bece03a8b9caaea2db36;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : have n_batch default to 512 (#1091) * set default n_batch to 512 when using BLAS * spacing * alternate implementation of setting different n_batch for BLAS * set n_batch to 512 for all cases --- diff --git a/examples/common.h b/examples/common.h index cbbc2dfa..0470368d 100644 --- a/examples/common.h +++ b/examples/common.h @@ -20,7 +20,7 @@ struct gpt_params { int32_t repeat_last_n = 64; // last n tokens to penalize int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions) int32_t n_ctx = 512; // context size - int32_t n_batch = 8; // batch size for prompt processing + int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) int32_t n_keep = 0; // number of tokens to keep from initial prompt // sampling parameters