n_head_kv: int
f_norm_eps: float
+ f_rope_freq_base: Optional[float] = None
+
ftype: Optional[GGMLFileType] = None
# path to the directory containing the model files
def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
config = json.load(open(config_path))
- n_vocab = config["vocab_size"] if "vocab_size" in config else -1
- n_embd = config["dim"]
- n_layer = config["n_layers"]
- n_mult = config["multiple_of"]
- n_ctx = 2048 if config["norm_eps"] == 1e-06 else 4096 # hack to determine LLaMA v1 vs v2
- n_ff = -1
- n_head = config["n_heads"]
- n_head_kv = config["n_kv_heads"] if "n_kv_heads" in config else n_head
- f_norm_eps = config["norm_eps"]
+ n_vocab = config["vocab_size"] if "vocab_size" in config else -1
+ n_embd = config["dim"]
+ n_layer = config["n_layers"]
+ n_mult = config["multiple_of"]
+ n_ctx = 2048 if config["norm_eps"] == 1e-06 else 4096 # hack to determine LLaMA v1 vs v2
+ n_ff = -1
+ n_head = config["n_heads"]
+ n_head_kv = config["n_kv_heads"] if "n_kv_heads" in config else n_head
+ f_norm_eps = config["norm_eps"]
+ f_rope_freq_base = config["rope_theta"] if "rope_theta" in config else None
if n_vocab == -1:
n_vocab = model["tok_embeddings.weight"].shape[0]
n_ff = model["layers.0.feed_forward.w1.weight"].shape[0]
return Params(
- n_vocab = n_vocab,
- n_embd = n_embd,
- n_mult = n_mult,
- n_layer = n_layer,
- n_ctx = n_ctx,
- n_ff = n_ff,
- n_head = n_head,
- n_head_kv = n_head_kv,
- f_norm_eps = f_norm_eps,
+ n_vocab = n_vocab,
+ n_embd = n_embd,
+ n_mult = n_mult,
+ n_layer = n_layer,
+ n_ctx = n_ctx,
+ n_ff = n_ff,
+ n_head = n_head,
+ n_head_kv = n_head_kv,
+ f_norm_eps = f_norm_eps,
+ f_rope_freq_base = f_rope_freq_base,
)
@staticmethod
self.gguf.add_head_count_kv (params.n_head_kv)
self.gguf.add_layer_norm_rms_eps (params.f_norm_eps)
+ if params.f_rope_freq_base:
+ self.gguf.add_rope_freq_base(params.f_rope_freq_base)
+
if params.ftype:
self.gguf.add_file_type(params.ftype)
# RoPE
KEY_ROPE_DIMENSION_COUNT = "{arch}.rope.dimension_count"
+KEY_ROPE_FREQ_BASE = "{arch}.rope.freq_base"
KEY_ROPE_SCALE_LINEAR = "{arch}.rope.scale_linear"
# tokenization
self.add_uint32(
KEY_ROPE_DIMENSION_COUNT.format(arch=self.arch), count)
- def add_rope_scale_linear(self, value: float):
+ def add_rope_freq_base(self, value: float):
+ self.add_float32(KEY_ROPE_FREQ_BASE.format(arch=self.arch), value)
+
+ def add_rope_scale_linear(self, value: float):
self.add_float32(KEY_ROPE_SCALE_LINEAR.format(arch=self.arch), value)
def add_tokenizer_model(self, model: str):
LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
LLM_KV_ROPE_DIMENSION_COUNT,
+ LLM_KV_ROPE_FREQ_BASE,
LLM_KV_ROPE_SCALE_LINEAR,
LLM_KV_TOKENIZER_MODEL,
{ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
+ { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
{ LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
hparams.n_head_kv = hparams.n_head;
GGUF_GET_KEY(ctx, hparams.n_head_kv, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_ATTENTION_HEAD_COUNT_KV));
- // TODO: manually setting rope scale should override this
+ // TODO: manually setting rope freq base and scale should override this
+ // FIXME: partial fix when the param specified is not the default value, but
+ // will not work for overriding the model value to the params default
+
+ llama_context_params defaults = llama_context_default_params();
+
+ // rope_freq_base
+ {
+ float ropebase = 10000.0f;
+ GGUF_GET_KEY(ctx, ropebase, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_FREQ_BASE));
+ if (ropebase != 10000.0f && rope_freq_base == defaults.rope_freq_base) {
+ rope_freq_base = ropebase;
+ }
+ }
+
// rope_freq_scale (inverse of the kv) is optional
{
float ropescale = 1.0f;
GGUF_GET_KEY(ctx, ropescale, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_SCALE_LINEAR));
- if (ropescale != 1.0f) {
+ if (ropescale != 1.0f && rope_freq_scale == defaults.rope_freq_scale) {
rope_freq_scale = 1.0f/ropescale;
}
}