From: Kawrakow Date: Tue, 25 Jul 2023 15:35:53 +0000 (+0300) Subject: Add LLAMA_DEFAULT_RMS_EPS so we can change the default (#2384) X-Git-Tag: gguf-v0.4.0~393 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=eb542d39324574a6778fad9ba9e34ba7a14a82a3;p=pkg%2Fggml%2Fsources%2Fllama.cpp Add LLAMA_DEFAULT_RMS_EPS so we can change the default (#2384) Co-authored-by: Iwan Kawrakow --- diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index f9dc0aaa..6fa55b31 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -8,7 +8,11 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -static const float rms_norm_eps = 1e-6f; +#ifdef LLAMA_DEFAULT_RMS_EPS +static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS; +#else +static const float rms_norm_eps = 5e-6f; +#endif float frand() { return (float)rand()/(float)RAND_MAX; diff --git a/examples/common.h b/examples/common.h index 2d87c923..672dcf77 100644 --- a/examples/common.h +++ b/examples/common.h @@ -34,7 +34,7 @@ struct gpt_params { int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. - float rms_norm_eps = 1e-6; // rms norm epsilon + float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS; // rms norm epsilon float rope_freq_base = 10000.0f; // RoPE base frequency float rope_freq_scale = 1.0f; // RoPE frequency scaling factor diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 4bbf6b78..54dc2bee 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -16,7 +16,7 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -static const float rms_norm_eps = 1e-6f; +static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS; struct random_normal_distribution { std::mt19937 gen; diff --git a/llama.cpp b/llama.cpp index febefbac..30d4b0a6 100644 --- a/llama.cpp +++ b/llama.cpp @@ -186,7 +186,7 @@ struct llama_hparams { // LLaMAv2 // TODO: load from model data hparams float f_ffn_mult = 1.0f; - float f_rms_norm_eps = 1e-6f; + float f_rms_norm_eps = LLAMA_DEFAULT_RMS_EPS; float rope_freq_base = 10000.0f; float rope_freq_scale = 1.0f; @@ -870,7 +870,7 @@ struct llama_context_params llama_context_default_params() { /*.n_ctx =*/ 512, /*.n_batch =*/ 512, /*.n_gqa =*/ 1, - /*.rms_norm_eps =*/ 1e-6f, + /*.rms_norm_eps =*/ LLAMA_DEFAULT_RMS_EPS, /*.gpu_layers =*/ 0, /*.main_gpu =*/ 0, /*.tensor_split =*/ nullptr, diff --git a/llama.h b/llama.h index 843b0bf5..df46f9b9 100644 --- a/llama.h +++ b/llama.h @@ -53,6 +53,10 @@ #define LLAMA_SUPPORTS_GPU_OFFLOAD #endif +#ifndef LLAMA_DEFAULT_RMS_EPS +#define LLAMA_DEFAULT_RMS_EPS 5e-6f +#endif + #ifdef __cplusplus extern "C" { #endif