]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
convert: fix Mistral3/Gemma3 model hparams init (#12571)
authorSigbjørn Skjæret <redacted>
Tue, 25 Mar 2025 22:03:10 +0000 (23:03 +0100)
committerGitHub <redacted>
Tue, 25 Mar 2025 22:03:10 +0000 (23:03 +0100)
* Fix Mistral3/Gemma3 model hparams init

* set positional args correctly

* use existing hparams if passed

convert_hf_to_gguf.py

index d9fa57027b771d7da7171245377ac11aacf397d2..76ab4233ef2c1638706f4c2bdff543f077649083 100755 (executable)
@@ -1752,7 +1752,7 @@ class Mistral3Model(LlamaModel):
 
     # we need to merge the text_config into the root level of hparams
     def __init__(self, *args, **kwargs):
-        hparams = Model.load_hparams(kwargs["dir_model"])
+        hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0])
         if "text_config" in hparams:
             hparams = {**hparams, **hparams["text_config"]}
             kwargs["hparams"] = hparams
@@ -3385,7 +3385,7 @@ class Gemma3Model(Model):
 
     # we need to merge the text_config into the root level of hparams
     def __init__(self, *args, **kwargs):
-        hparams = Model.load_hparams(kwargs["dir_model"])
+        hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0])
         if "text_config" in hparams:
             hparams = {**hparams, **hparams["text_config"]}
             kwargs["hparams"] = hparams
@@ -5358,7 +5358,7 @@ def main() -> None:
             logger.error(f"Model {model_architecture} is not supported")
             sys.exit(1)
 
-        model_instance = model_class(dir_model=dir_model, ftype=output_type, fname_out=fname_out,
+        model_instance = model_class(dir_model, output_type, fname_out,
                                      is_big_endian=args.bigendian, use_temp_file=args.use_temp_file,
                                      eager=args.no_lazy,
                                      metadata_override=args.metadata, model_name=args.model_name,