]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
convert : fix Norway problem when parsing YAML (#12114)
authorXuan-Son Nguyen <redacted>
Fri, 28 Feb 2025 16:44:46 +0000 (17:44 +0100)
committerGitHub <redacted>
Fri, 28 Feb 2025 16:44:46 +0000 (17:44 +0100)
* convert : fix Norway problem when parsing YAML

* Update gguf-py/gguf/metadata.py

* add newline at correct place

gguf-py/gguf/metadata.py

index 962c27b2044642eb169cc7791c25db8bcded907f..e807f434689de669816504921f54809cac15fc71 100644 (file)
@@ -121,19 +121,39 @@ class Metadata:
         if not model_card_path.is_file():
             return {}
 
-        # The model card metadata is assumed to always be in YAML
+        # The model card metadata is assumed to always be in YAML (frontmatter)
         # ref: https://github.com/huggingface/transformers/blob/a5c642fe7a1f25d3bdcd76991443ba6ff7ee34b2/src/transformers/modelcard.py#L468-L473
+        yaml_content: str = ""
         with open(model_card_path, "r", encoding="utf-8") as f:
-            if f.readline() == "---\n":
-                raw = f.read().partition("---\n")[0]
-                data = yaml.safe_load(raw)
-                if isinstance(data, dict):
-                    return data
+            content = f.read()
+            lines = content.splitlines()
+            lines_yaml = []
+            if len(lines) == 0:
+                # Empty file
+                return {}
+            if len(lines) > 0 and lines[0] != "---":
+                # No frontmatter
+                return {}
+            for line in lines[1:]:
+                if line == "---":
+                    break # End of frontmatter
                 else:
-                    logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
-                    return {}
+                    lines_yaml.append(line)
+            yaml_content = "\n".join(lines_yaml) + "\n"
+
+        # Quick hack to fix the Norway problem
+        # https://hitchdev.com/strictyaml/why/implicit-typing-removed/
+        yaml_content = yaml_content.replace("- no\n", "- \"no\"\n")
+
+        if yaml_content:
+            data = yaml.safe_load(yaml_content)
+            if isinstance(data, dict):
+                return data
             else:
+                logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
                 return {}
+        else:
+            return {}
 
     @staticmethod
     def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]: