From: Daniel Bevenius Date: Mon, 8 Sep 2025 07:44:34 +0000 (+0200) Subject: convert : force setting sliding_window from original config (#15867) X-Git-Tag: upstream/0.0.6527~114 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=233d773d02c37982badddf3994e9953a175f34da;p=pkg%2Fggml%2Fsources%2Fllama.cpp convert : force setting sliding_window from original config (#15867) * convert : force setting sliding_window from original config This commit modifies the set_gguf_parameters method for EmbeddingGemma so that it reads the sliding_window parameter from the original model config.json and uses that value. The motivation for this change is that the Gemma3TextConfig constructor adjusts the sliding_window value, which can lead to inconsistencies when converting models as we expects this value to match the original model's configuration. Refs: https://github.com/huggingface/transformers/blob/bb45d3631ec7026db04a77d33a52b31766372160/src/transformers/models/gemma3/configuration_gemma3.py#L230 * fix flake8 error * add link to huggingface PR --- diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 717b8a65..62a546ee 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5128,6 +5128,20 @@ class EmbeddingGemma(Gemma3Model): def set_gguf_parameters(self): super().set_gguf_parameters() + + # Override the sliding window size as it gets adjusted by the Gemma3TextConfig + # constructor. We want to use the value from the original model's config.json. + # ref: https://github.com/huggingface/transformers/pull/40700 + with open(self.dir_model / "config.json", "r", encoding="utf-8") as f: + config = json.load(f) + orig_sliding_window = config.get("sliding_window") + if orig_sliding_window is None: + raise ValueError("sliding_window not found in model config - this is required for the model") + + logger.info(f"Using original sliding_window from config: {orig_sliding_window} " + f"instead of {self.hparams['sliding_window']}") + self.gguf_writer.add_sliding_window(orig_sliding_window) + self._try_set_pooling_type()