]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
gguf-py : fix tensor groups for encoder-decoder models in gguf-dump.py (#8090)
authorfairydreaming <redacted>
Mon, 24 Jun 2024 12:13:39 +0000 (14:13 +0200)
committerGitHub <redacted>
Mon, 24 Jun 2024 12:13:39 +0000 (14:13 +0200)
Co-authored-by: Stanisław Szymczyk <redacted>
Co-authored-by: Brian <redacted>
gguf-py/scripts/gguf-dump.py

index 92d14d6cd0a69d1a1775bc1a661b34b08ce02ca8..508ca8f0a5b7b9b2d56ce5a0a1bd9cfa6afc892d 100755 (executable)
@@ -208,7 +208,9 @@ def translate_tensor_name(name):
         'ssm_d': 'State space model skip connection',
         'ssm_dt': 'State space model time step',
         'ssm_out': 'State space model output projection',
-        'blk': 'Block'
+        'blk': 'Block',
+        'enc': 'Encoder',
+        'dec': 'Decoder',
     }
 
     expanded_words = []
@@ -291,6 +293,10 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
             tensor_group_name = "base"
             if tensor_components[0] == 'blk':
                 tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}"
+            elif tensor_components[0] in ['enc', 'dec'] and tensor_components[1] == 'blk':
+                tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}.{tensor_components[2]}"
+            elif tensor_components[0] in ['enc', 'dec']:
+                tensor_group_name = f"{tensor_components[0]}"
 
             # Check if new Tensor Group
             if tensor_group_name not in tensor_groups: