* set default n_batch to 512 when using BLAS
* spacing
* alternate implementation of setting different n_batch for BLAS
* set n_batch to 512 for all cases
int32_t repeat_last_n = 64; // last n tokens to penalize
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
int32_t n_ctx = 512; // context size
- int32_t n_batch = 8; // batch size for prompt processing
+ int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt
// sampling parameters