[examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model.
-[scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console.
+[gguf/scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console.
-[scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key.
+[gguf/scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key.
-[scripts/gguf_convert_endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_convert_endian.py) — Allows converting the endianness of GGUF files.
+[gguf/scripts/gguf_convert_endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_convert_endian.py) — Allows converting the endianness of GGUF files.
-[scripts/gguf_new_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_new_metadata.py) — Copies a GGUF file with added/modified/removed metadata values.
+[gguf/scripts/gguf_new_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_new_metadata.py) — Copies a GGUF file with added/modified/removed metadata values.
## Development
Maintainers who participate in development of this package are advised to install it in editable mode:
--- /dev/null
+# pyright: reportUnusedImport=false
+
+from .gguf_convert_endian import main as gguf_convert_endian_entrypoint
+from .gguf_dump import main as gguf_dump_entrypoint
+from .gguf_set_metadata import main as gguf_set_metadata_entrypoint
+from .gguf_new_metadata import main as gguf_new_metadata_entrypoint
--- /dev/null
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import logging
+import argparse
+import os
+import sys
+from tqdm import tqdm
+from pathlib import Path
+
+import numpy as np
+
+# Necessary to load the local gguf package
+if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
+ sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import gguf
+
+logger = logging.getLogger("gguf-convert-endian")
+
+
+def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
+ if np.uint32(1) == np.uint32(1).newbyteorder("<"):
+ # Host is little endian
+ host_endian = "little"
+ swapped_endian = "big"
+ else:
+ # Sorry PDP or other weird systems that don't use BE or LE.
+ host_endian = "big"
+ swapped_endian = "little"
+ if reader.byte_order == "S":
+ file_endian = swapped_endian
+ else:
+ file_endian = host_endian
+ order = host_endian if args.order == "native" else args.order
+ logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian")
+ if file_endian == order:
+ logger.info(f"* File is already {order.upper()} endian. Nothing to do.")
+ sys.exit(0)
+ logger.info("* Checking tensors for conversion compatibility")
+ for tensor in reader.tensors:
+ if tensor.tensor_type not in (
+ gguf.GGMLQuantizationType.F32,
+ gguf.GGMLQuantizationType.F16,
+ gguf.GGMLQuantizationType.Q8_0,
+ ):
+ raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
+ logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}")
+ if args.dry_run:
+ return
+ logger.warning("*** Warning *** Warning *** Warning **")
+ logger.warning("* This conversion process may damage the file. Ensure you have a backup.")
+ if order != host_endian:
+ logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.")
+ logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted")
+ logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:")
+ response = input("YES, I am sure> ")
+ if response != "YES":
+ logger.warning("You didn't enter YES. Okay then, see ya!")
+ sys.exit(0)
+ logger.info(f"* Converting fields ({len(reader.fields)})")
+ for idx, field in enumerate(reader.fields.values()):
+ logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}")
+ for part in field.parts:
+ part.byteswap(inplace=True)
+ logger.info(f"* Converting tensors ({len(reader.tensors)})")
+
+ for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")):
+ log_message = (
+ f"Converting tensor {repr(tensor.name)}, "
+ f"type={tensor.tensor_type.name}, "
+ f"elements={tensor.n_elements} "
+ )
+
+ # Byte-swap each part of the tensor's field
+ for part in tensor.field.parts:
+ part.byteswap(inplace=True)
+
+ # Byte-swap tensor data if necessary
+ if tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0:
+ # Handle Q8_0 tensor blocks (block_q8_0)
+ # Specific handling of block_q8_0 is required.
+ # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
+
+ block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
+
+ n_blocks = len(tensor.data) // block_size
+ for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
+ block_offs = block_num * block_size
+
+ # Byte-Swap f16 sized delta field
+ delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
+ delta.byteswap(inplace=True)
+
+ # Byte-Swap Q8 weights
+ if block_num % 100000 == 0:
+ inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
+
+ else:
+ # Handle other tensor types
+ tensor.data.byteswap(inplace=True)
+
+ pbar.set_description(log_message)
+
+ logger.info("* Completion")
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description="Convert GGUF file byte order")
+ parser.add_argument(
+ "model", type=str,
+ help="GGUF format model filename",
+ )
+ parser.add_argument(
+ "order", type=str, choices=['big', 'little', 'native'],
+ help="Requested byte order",
+ )
+ parser.add_argument(
+ "--dry-run", action="store_true",
+ help="Don't actually change anything",
+ )
+ parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
+
+ args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
+
+ logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+
+ logger.info(f'* Loading: {args.model}')
+ reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+')
+ convert_byteorder(reader, args)
+
+
+if __name__ == "__main__":
+ main()
--- /dev/null
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import logging
+import argparse
+import os
+import re
+import sys
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+
+# Necessary to load the local gguf package
+if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
+ sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from gguf import GGUFReader, GGUFValueType, ReaderTensor # noqa: E402
+
+logger = logging.getLogger("gguf-dump")
+
+
+def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
+ host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
+ if reader.byte_order == 'S':
+ file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
+ else:
+ file_endian = host_endian
+ return (host_endian, file_endian)
+
+
+# For more information about what field.parts and field.data represent,
+# please see the comments in the modify_gguf.py example.
+def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
+ host_endian, file_endian = get_file_host_endian(reader)
+ print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') # noqa: NP100
+ print(f'* Dumping {len(reader.fields)} key/value pair(s)') # noqa: NP100
+ for n, field in enumerate(reader.fields.values(), 1):
+ if not field.types:
+ pretty_type = 'N/A'
+ elif field.types[0] == GGUFValueType.ARRAY:
+ nest_count = len(field.types) - 1
+ pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
+ else:
+ pretty_type = str(field.types[-1].name)
+
+ log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}'
+ if len(field.types) == 1:
+ curr_type = field.types[0]
+ if curr_type == GGUFValueType.STRING:
+ log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60]))
+ elif field.types[0] in reader.gguf_scalar_to_np:
+ log_message += ' = {0}'.format(field.parts[-1][0])
+ print(log_message) # noqa: NP100
+ if args.no_tensors:
+ return
+ print(f'* Dumping {len(reader.tensors)} tensor(s)') # noqa: NP100
+ for n, tensor in enumerate(reader.tensors, 1):
+ prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
+ print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') # noqa: NP100
+
+
+def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
+ import json
+ host_endian, file_endian = get_file_host_endian(reader)
+ metadata: dict[str, Any] = {}
+ tensors: dict[str, Any] = {}
+ result = {
+ "filename": args.model,
+ "endian": file_endian,
+ "metadata": metadata,
+ "tensors": tensors,
+ }
+ for idx, field in enumerate(reader.fields.values()):
+ curr: dict[str, Any] = {
+ "index": idx,
+ "type": field.types[0].name if field.types else 'UNKNOWN',
+ "offset": field.offset,
+ }
+ metadata[field.name] = curr
+ if field.types[:1] == [GGUFValueType.ARRAY]:
+ curr["array_types"] = [t.name for t in field.types][1:]
+ if not args.json_array:
+ continue
+ itype = field.types[-1]
+ if itype == GGUFValueType.STRING:
+ curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
+ else:
+ curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()]
+ elif field.types[0] == GGUFValueType.STRING:
+ curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
+ else:
+ curr["value"] = field.parts[-1].tolist()[0]
+ if not args.no_tensors:
+ for idx, tensor in enumerate(reader.tensors):
+ tensors[tensor.name] = {
+ "index": idx,
+ "shape": tensor.shape.tolist(),
+ "type": tensor.tensor_type.name,
+ "offset": tensor.field.offset,
+ }
+ json.dump(result, sys.stdout)
+
+
+def markdown_table_with_alignment_support(header_map: list[dict[str, str]], data: list[dict[str, Any]]):
+ # JSON to Markdown table formatting: https://stackoverflow.com/a/72983854/2850957
+
+ # Alignment Utility Function
+ def strAlign(padding: int, alignMode: str | None, strVal: str):
+ if alignMode == 'center':
+ return strVal.center(padding)
+ elif alignMode == 'right':
+ return strVal.rjust(padding - 1) + ' '
+ elif alignMode == 'left':
+ return ' ' + strVal.ljust(padding - 1)
+ else: # default left
+ return ' ' + strVal.ljust(padding - 1)
+
+ def dashAlign(padding: int, alignMode: str | None):
+ if alignMode == 'center':
+ return ':' + '-' * (padding - 2) + ':'
+ elif alignMode == 'right':
+ return '-' * (padding - 1) + ':'
+ elif alignMode == 'left':
+ return ':' + '-' * (padding - 1)
+ else: # default left
+ return '-' * (padding)
+
+ # Calculate Padding For Each Column Based On Header and Data Length
+ rowsPadding = {}
+ for index, columnEntry in enumerate(header_map):
+ padCount = max([len(str(v)) for d in data for k, v in d.items() if k == columnEntry['key_name']], default=0) + 2
+ headerPadCount = len(columnEntry['header_name']) + 2
+ rowsPadding[index] = headerPadCount if padCount <= headerPadCount else padCount
+
+ # Render Markdown Header
+ rows = []
+ rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(columnEntry['header_name'])) for index, columnEntry in enumerate(header_map)))
+ rows.append('|'.join(dashAlign(rowsPadding[index], columnEntry.get('align')) for index, columnEntry in enumerate(header_map)))
+
+ # Render Tabular Data
+ for item in data:
+ rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(item[columnEntry['key_name']])) for index, columnEntry in enumerate(header_map)))
+
+ # Convert Tabular String Rows Into String
+ tableString = ""
+ for row in rows:
+ tableString += f'|{row}|\n'
+
+ return tableString
+
+
+def element_count_rounded_notation(count: int) -> str:
+ if count > 1e15 :
+ # Quadrillion
+ scaled_amount = count * 1e-15
+ scale_suffix = "Q"
+ elif count > 1e12 :
+ # Trillions
+ scaled_amount = count * 1e-12
+ scale_suffix = "T"
+ elif count > 1e9 :
+ # Billions
+ scaled_amount = count * 1e-9
+ scale_suffix = "B"
+ elif count > 1e6 :
+ # Millions
+ scaled_amount = count * 1e-6
+ scale_suffix = "M"
+ elif count > 1e3 :
+ # Thousands
+ scaled_amount = count * 1e-3
+ scale_suffix = "K"
+ else:
+ # Under Thousands
+ scaled_amount = count
+ scale_suffix = ""
+ return f"{'~' if count > 1e3 else ''}{round(scaled_amount)}{scale_suffix}"
+
+
+def translate_tensor_name(name):
+ words = name.split(".")
+
+ # Source: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#standardized-tensor-names
+ abbreviation_dictionary = {
+ 'token_embd': 'Token embedding',
+ 'pos_embd': 'Position embedding',
+ 'output_norm': 'Output normalization',
+ 'output': 'Output',
+ 'attn_norm': 'Attention normalization',
+ 'attn_norm_2': 'Attention normalization',
+ 'attn_qkv': 'Attention query-key-value',
+ 'attn_q': 'Attention query',
+ 'attn_k': 'Attention key',
+ 'attn_v': 'Attention value',
+ 'attn_output': 'Attention output',
+ 'ffn_norm': 'Feed-forward network normalization',
+ 'ffn_up': 'Feed-forward network "up"',
+ 'ffn_gate': 'Feed-forward network "gate"',
+ 'ffn_down': 'Feed-forward network "down"',
+ 'ffn_gate_inp': 'Expert-routing layer for the Feed-forward network in Mixture of Expert models',
+ 'ffn_gate_exp': 'Feed-forward network "gate" layer per expert in Mixture of Expert models',
+ 'ffn_down_exp': 'Feed-forward network "down" layer per expert in Mixture of Expert models',
+ 'ffn_up_exp': 'Feed-forward network "up" layer per expert in Mixture of Expert models',
+ 'ssm_in': 'State space model input projections',
+ 'ssm_conv1d': 'State space model rolling/shift',
+ 'ssm_x': 'State space model selective parametrization',
+ 'ssm_a': 'State space model state compression',
+ 'ssm_d': 'State space model skip connection',
+ 'ssm_dt': 'State space model time step',
+ 'ssm_out': 'State space model output projection',
+ 'blk': 'Block',
+ 'enc': 'Encoder',
+ 'dec': 'Decoder',
+ }
+
+ expanded_words = []
+ for word in words:
+ word_norm = word.strip().lower()
+ if word_norm in abbreviation_dictionary:
+ expanded_words.append(abbreviation_dictionary[word_norm].title())
+ else:
+ expanded_words.append(word.title())
+
+ return ' '.join(expanded_words)
+
+
+def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
+ host_endian, file_endian = get_file_host_endian(reader)
+ markdown_content = ""
+ markdown_content += f'# {args.model} - GGUF Internal File Dump\n\n'
+ markdown_content += f'- Endian: {file_endian} endian\n'
+ markdown_content += '\n'
+ markdown_content += '## Key Value Metadata Store\n\n'
+ markdown_content += f'There are {len(reader.fields)} key-value pairs in this file\n'
+ markdown_content += '\n'
+
+ kv_dump_table: list[dict[str, str | int]] = []
+ for n, field in enumerate(reader.fields.values(), 1):
+ if not field.types:
+ pretty_type = 'N/A'
+ elif field.types[0] == GGUFValueType.ARRAY:
+ nest_count = len(field.types) - 1
+ pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
+ else:
+ pretty_type = str(field.types[-1].name)
+
+ def escape_markdown_inline_code(value_string):
+ # Find the longest contiguous sequence of backticks in the string then
+ # wrap string with appropriate number of backticks required to escape it
+ max_backticks = max((len(match.group(0)) for match in re.finditer(r'`+', value_string)), default=0)
+ inline_code_marker = '`' * (max_backticks + 1)
+
+ # If the string starts or ends with a backtick, add a space at the beginning and end
+ if value_string.startswith('`') or value_string.endswith('`'):
+ value_string = f" {value_string} "
+
+ return f"{inline_code_marker}{value_string}{inline_code_marker}"
+
+ total_elements = len(field.data)
+ value = ""
+ if len(field.types) == 1:
+ curr_type = field.types[0]
+ if curr_type == GGUFValueType.STRING:
+ truncate_length = 60
+ value_string = str(bytes(field.parts[-1]), encoding='utf-8')
+ if len(value_string) > truncate_length:
+ head = escape_markdown_inline_code(value_string[:truncate_length // 2])
+ tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
+ value = "{head}...{tail}".format(head=head, tail=tail)
+ else:
+ value = escape_markdown_inline_code(value_string)
+ elif curr_type in reader.gguf_scalar_to_np:
+ value = str(field.parts[-1][0])
+ else:
+ if field.types[0] == GGUFValueType.ARRAY:
+ curr_type = field.types[1]
+ array_elements = []
+
+ if curr_type == GGUFValueType.STRING:
+ render_element = min(5, total_elements)
+ for element_pos in range(render_element):
+ truncate_length = 30
+ value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8')
+ if len(value_string) > truncate_length:
+ head = escape_markdown_inline_code(value_string[:truncate_length // 2])
+ tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
+ value = "{head}...{tail}".format(head=head, tail=tail)
+ else:
+ value = escape_markdown_inline_code(value_string)
+ array_elements.append(value)
+
+ elif curr_type in reader.gguf_scalar_to_np:
+ render_element = min(7, total_elements)
+ for element_pos in range(render_element):
+ array_elements.append(str(field.parts[-1 - (total_elements - element_pos - 1)][0]))
+
+ value = f'[ {", ".join(array_elements).strip()}{", ..." if total_elements > len(array_elements) else ""} ]'
+
+ kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value})
+
+ kv_dump_table_header_map = [
+ {'key_name':'n', 'header_name':'POS', 'align':'right'},
+ {'key_name':'pretty_type', 'header_name':'TYPE', 'align':'left'},
+ {'key_name':'total_elements', 'header_name':'Count', 'align':'right'},
+ {'key_name':'field_name', 'header_name':'Key', 'align':'left'},
+ {'key_name':'value', 'header_name':'Value', 'align':'left'},
+ ]
+
+ markdown_content += markdown_table_with_alignment_support(kv_dump_table_header_map, kv_dump_table)
+
+ markdown_content += "\n"
+
+ if not args.no_tensors:
+ # Group tensors by their prefix and maintain order
+ tensor_prefix_order: list[str] = []
+ tensor_name_to_key: dict[str, int] = {}
+ tensor_groups: dict[str, list[ReaderTensor]] = {}
+ total_elements = sum(tensor.n_elements for tensor in reader.tensors)
+
+ # Parsing Tensors Record
+ for key, tensor in enumerate(reader.tensors):
+ tensor_components = tensor.name.split('.')
+
+ # Classify Tensor Group
+ tensor_group_name = "base"
+ if tensor_components[0] == 'blk':
+ tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}"
+ elif tensor_components[0] in ['enc', 'dec'] and tensor_components[1] == 'blk':
+ tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}.{tensor_components[2]}"
+ elif tensor_components[0] in ['enc', 'dec']:
+ tensor_group_name = f"{tensor_components[0]}"
+
+ # Check if new Tensor Group
+ if tensor_group_name not in tensor_groups:
+ tensor_groups[tensor_group_name] = []
+ tensor_prefix_order.append(tensor_group_name)
+
+ # Record Tensor and Tensor Position
+ tensor_groups[tensor_group_name].append(tensor)
+ tensor_name_to_key[tensor.name] = key
+
+ # Tensors Mapping Dump
+ markdown_content += f'## Tensors Overview {element_count_rounded_notation(total_elements)} Elements\n\n'
+ markdown_content += f'Total number of elements in all tensors: {total_elements} Elements\n'
+ markdown_content += '\n'
+
+ for group in tensor_prefix_order:
+ tensors = tensor_groups[group]
+ group_elements = sum(tensor.n_elements for tensor in tensors)
+ markdown_content += f"- [{translate_tensor_name(group)} Tensor Group - {element_count_rounded_notation(group_elements)} Elements](#{group.replace('.', '_')})\n"
+
+ markdown_content += "\n"
+
+ markdown_content += "### Tensor Data Offset\n"
+ markdown_content += '\n'
+ markdown_content += 'This table contains the offset and data segment relative to start of file\n'
+ markdown_content += '\n'
+
+ tensor_mapping_table: list[dict[str, str | int]] = []
+ for key, tensor in enumerate(reader.tensors):
+ data_offset_pretty = '{0:#16x}'.format(tensor.data_offset)
+ data_size_pretty = '{0:#16x}'.format(tensor.n_bytes)
+ tensor_mapping_table.append({"t_id":key, "layer_name":tensor.name, "data_offset":data_offset_pretty, "data_size":data_size_pretty})
+
+ tensors_mapping_table_header_map = [
+ {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
+ {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
+ {'key_name':'data_offset', 'header_name':'Data Offset (B)', 'align':'right'},
+ {'key_name':'data_size', 'header_name':'Data Size (B)', 'align':'right'},
+ ]
+
+ markdown_content += markdown_table_with_alignment_support(tensors_mapping_table_header_map, tensor_mapping_table)
+ markdown_content += "\n"
+
+ for group in tensor_prefix_order:
+ tensors = tensor_groups[group]
+ group_elements = sum(tensor.n_elements for tensor in tensors)
+ group_percentage = group_elements / total_elements * 100
+ markdown_content += f"### <a name=\"{group.replace('.', '_')}\">{translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements</a>\n\n"
+
+ # Precalculate column sizing for visual consistency
+ prettify_element_est_count_size: int = 1
+ prettify_element_count_size: int = 1
+ prettify_dimension_max_widths: dict[int, int] = {}
+ for tensor in tensors:
+ prettify_element_est_count_size = max(prettify_element_est_count_size, len(str(element_count_rounded_notation(tensor.n_elements))))
+ prettify_element_count_size = max(prettify_element_count_size, len(str(tensor.n_elements)))
+ for i, dimension_size in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))):
+ prettify_dimension_max_widths[i] = max(prettify_dimension_max_widths.get(i,1), len(str(dimension_size)))
+
+ # Generate Tensor Layer Table Content
+ tensor_dump_table: list[dict[str, str | int]] = []
+ for tensor in tensors:
+ human_friendly_name = translate_tensor_name(tensor.name.replace(".weight", ".(W)").replace(".bias", ".(B)"))
+ pretty_dimension = ' x '.join(f'{str(d):>{prettify_dimension_max_widths[i]}}' for i, d in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))))
+ element_count_est = f"({element_count_rounded_notation(tensor.n_elements):>{prettify_element_est_count_size}})"
+ element_count_string = f"{element_count_est} {tensor.n_elements:>{prettify_element_count_size}}"
+ type_name_string = f"{tensor.tensor_type.name}"
+ tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dimension":pretty_dimension, "tensor_type":type_name_string})
+
+ tensor_dump_table_header_map = [
+ {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
+ {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
+ {'key_name':'human_layer_name', 'header_name':'Human Friendly Tensor Layer Name', 'align':'left'},
+ {'key_name':'element_count', 'header_name':'Elements', 'align':'left'},
+ {'key_name':'pretty_dimension', 'header_name':'Shape', 'align':'left'},
+ {'key_name':'tensor_type', 'header_name':'Type', 'align':'left'},
+ ]
+
+ markdown_content += markdown_table_with_alignment_support(tensor_dump_table_header_map, tensor_dump_table)
+
+ markdown_content += "\n"
+ markdown_content += f"- Total elements in {group}: ({element_count_rounded_notation(group_elements):>4}) {group_elements}\n"
+ markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n"
+ markdown_content += "\n\n"
+
+ print(markdown_content) # noqa: NP100
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
+ parser.add_argument("model", type=str, help="GGUF format model filename")
+ parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
+ parser.add_argument("--json", action="store_true", help="Produce JSON output")
+ parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
+ parser.add_argument("--data-offset", action="store_true", help="Start of data offset")
+ parser.add_argument("--data-alignment", action="store_true", help="Data alignment applied globally to data field")
+ parser.add_argument("--markdown", action="store_true", help="Produce markdown output")
+ parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
+
+ args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
+
+ logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+
+ if not args.json and not args.markdown and not args.data_offset and not args.data_alignment:
+ logger.info(f'* Loading: {args.model}')
+
+ reader = GGUFReader(args.model, 'r')
+
+ if args.json:
+ dump_metadata_json(reader, args)
+ elif args.markdown:
+ dump_markdown_metadata(reader, args)
+ elif args.data_offset:
+ print(reader.data_offset) # noqa: NP100
+ elif args.data_alignment:
+ print(reader.alignment) # noqa: NP100
+ else:
+ dump_metadata(reader, args)
+
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import uuid
+import hashlib
+
+import logging
+import argparse
+import os
+import sys
+from pathlib import Path
+
+from tqdm import tqdm
+
+# Necessary to load the local gguf package
+if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
+ sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from gguf import GGUFReader # noqa: E402
+
+
+logger = logging.getLogger("gguf-hash")
+
+# UUID_NAMESPACE_LLAMA_CPP = uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
+UUID_NAMESPACE_LLAMA_CPP = uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5')
+
+
+# For more information about what field.parts and field.data represent,
+# please see the comments in the modify_gguf.py example.
+def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar: bool, no_layer: bool) -> None:
+ sha1 = hashlib.sha1()
+ sha256 = hashlib.sha256()
+ uuidv5_sha1 = hashlib.sha1()
+ uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes)
+
+ # Total Weight Calculation For Progress Bar
+ total_weights = 0
+ for n, tensor in enumerate(reader.tensors, 1):
+
+ # We don't need these
+ if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
+ continue
+
+ # Calculate Tensor Volume
+ sum_weights_in_tensor = 1
+ for dim in tensor.shape:
+ sum_weights_in_tensor *= dim
+ total_weights += sum_weights_in_tensor
+
+ # Hash Progress Bar
+ bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar)
+
+ # Hashing Process
+ for tensor in reader.tensors:
+
+ # We don't need these
+ if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
+ continue
+
+ # Progressbar
+ sum_weights_in_tensor = 1
+ for dim in tensor.shape:
+ sum_weights_in_tensor *= dim
+ bar.update(sum_weights_in_tensor)
+
+ if not no_layer:
+
+ sha1_layer = hashlib.sha1()
+ sha1_layer.update(tensor.data.data)
+ print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
+
+ sha256_layer = hashlib.sha256()
+ sha256_layer.update(tensor.data.data)
+ print("sha256 {0} {1}:{2}".format(sha256_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
+
+ sha1.update(tensor.data.data)
+ sha256.update(tensor.data.data)
+ uuidv5_sha1.update(tensor.data.data)
+
+ # Flush Hash Progress Bar
+ bar.close()
+
+ # Display Hash Output
+ print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100
+ print("sha256 {0} {1}".format(sha256.hexdigest(), filename)) # noqa: NP100
+ print("uuid {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
+ parser.add_argument("model", type=str, help="GGUF format model filename")
+ parser.add_argument("--no-layer", action="store_true", help="exclude per layer hash")
+ parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
+ parser.add_argument("--progressbar", action="store_true", help="enable progressbar")
+ args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
+ logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+ reader = GGUFReader(args.model, 'r')
+ gguf_hash(reader, args.model, not args.progressbar, args.no_layer)
+
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import logging
+import argparse
+import os
+import sys
+import json
+from pathlib import Path
+
+import numpy as np
+from tqdm import tqdm
+from typing import Any, Sequence, NamedTuple
+
+# Necessary to load the local gguf package
+if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
+ sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import gguf
+
+logger = logging.getLogger("gguf-new-metadata")
+
+
+class MetadataDetails(NamedTuple):
+ type: gguf.GGUFValueType
+ value: Any
+ description: str = ''
+
+
+def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian:
+ if np.uint32(1) == np.uint32(1).newbyteorder("<"):
+ # Host is little endian
+ host_endian = gguf.GGUFEndian.LITTLE
+ swapped_endian = gguf.GGUFEndian.BIG
+ else:
+ # Sorry PDP or other weird systems that don't use BE or LE.
+ host_endian = gguf.GGUFEndian.BIG
+ swapped_endian = gguf.GGUFEndian.LITTLE
+
+ if reader.byte_order == "S":
+ return swapped_endian
+ else:
+ return host_endian
+
+
+def decode_field(field: gguf.ReaderField | None) -> Any:
+ if field and field.types:
+ main_type = field.types[0]
+
+ if main_type == gguf.GGUFValueType.ARRAY:
+ sub_type = field.types[-1]
+
+ if sub_type == gguf.GGUFValueType.STRING:
+ return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data]
+ else:
+ return [pv for idx in field.data for pv in field.parts[idx].tolist()]
+ if main_type == gguf.GGUFValueType.STRING:
+ return str(bytes(field.parts[-1]), encoding='utf-8')
+ else:
+ return field.parts[-1][0]
+
+ return None
+
+
+def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
+ field = reader.get_field(key)
+
+ return decode_field(field)
+
+
+def find_token(token_list: Sequence[int], token: str) -> Sequence[int]:
+ token_ids = [index for index, value in enumerate(token_list) if value == token]
+
+ if len(token_ids) == 0:
+ raise LookupError(f'Unable to find "{token}" in token list!')
+
+ return token_ids
+
+
+def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: dict[str, MetadataDetails], remove_metadata: Sequence[str]) -> None:
+ for field in reader.fields.values():
+ # Suppress virtual fields and fields written by GGUFWriter
+ if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
+ logger.debug(f'Suppressing {field.name}')
+ continue
+
+ # Skip old chat templates if we have new ones
+ if field.name.startswith(gguf.Keys.Tokenizer.CHAT_TEMPLATE) and gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
+ logger.debug(f'Skipping {field.name}')
+ continue
+
+ if field.name in remove_metadata:
+ logger.debug(f'Removing {field.name}')
+ continue
+
+ old_val = MetadataDetails(field.types[0], decode_field(field))
+ val = new_metadata.get(field.name, old_val)
+
+ if field.name in new_metadata:
+ logger.debug(f'Modifying {field.name}: "{old_val.value}" -> "{val.value}" {val.description}')
+ del new_metadata[field.name]
+ elif val.value is not None:
+ logger.debug(f'Copying {field.name}')
+
+ if val.value is not None:
+ writer.add_key_value(field.name, val.value, val.type)
+
+ if gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
+ logger.debug('Adding chat template(s)')
+ writer.add_chat_template(new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE].value)
+ del new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE]
+
+ for key, val in new_metadata.items():
+ logger.debug(f'Adding {key}: "{val.value}" {val.description}')
+ writer.add_key_value(key, val.value, val.type)
+
+ total_bytes = 0
+
+ for tensor in reader.tensors:
+ total_bytes += tensor.n_bytes
+ writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)
+
+ bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
+
+ writer.write_header_to_file()
+ writer.write_kv_data_to_file()
+ writer.write_ti_data_to_file()
+
+ for tensor in reader.tensors:
+ writer.write_tensor_data(tensor.data)
+ bar.update(tensor.n_bytes)
+
+ writer.close()
+
+
+def main() -> None:
+ tokenizer_metadata = (getattr(gguf.Keys.Tokenizer, n) for n in gguf.Keys.Tokenizer.__dict__.keys() if not n.startswith('_'))
+ token_names = dict((n.split('.')[-1][:-len('_token_id')], n) for n in tokenizer_metadata if n.endswith('_token_id'))
+
+ parser = argparse.ArgumentParser(description="Make a copy of a GGUF file with new metadata")
+ parser.add_argument("input", type=Path, help="GGUF format model input filename")
+ parser.add_argument("output", type=Path, help="GGUF format model output filename")
+ parser.add_argument("--general-name", type=str, help="The models general.name", metavar='"name"')
+ parser.add_argument("--general-description", type=str, help="The models general.description", metavar='"Description ..."')
+ parser.add_argument("--chat-template", type=str, help="Chat template string (or JSON string containing templates)", metavar='"{% ... %} ..."')
+ parser.add_argument("--chat-template-config", type=Path, help="Config file containing chat template(s)", metavar='tokenizer_config.json')
+ parser.add_argument("--pre-tokenizer", type=str, help="The models tokenizer.ggml.pre", metavar='"pre tokenizer"')
+ parser.add_argument("--remove-metadata", action="append", type=str, help="Remove metadata (by key name) from output model", metavar='general.url')
+ parser.add_argument("--special-token", action="append", type=str, help="Special token by value", nargs=2, metavar=(' | '.join(token_names.keys()), '"<token>"'))
+ parser.add_argument("--special-token-by-id", action="append", type=str, help="Special token by id", nargs=2, metavar=(' | '.join(token_names.keys()), '0'))
+ parser.add_argument("--force", action="store_true", help="Bypass warnings without confirmation")
+ parser.add_argument("--verbose", action="store_true", help="Increase output verbosity")
+ args = parser.parse_args(None if len(sys.argv) > 2 else ["--help"])
+
+ logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+
+ new_metadata = {}
+ remove_metadata = args.remove_metadata or []
+
+ if args.general_name:
+ new_metadata[gguf.Keys.General.NAME] = MetadataDetails(gguf.GGUFValueType.STRING, args.general_name)
+
+ if args.general_description:
+ new_metadata[gguf.Keys.General.DESCRIPTION] = MetadataDetails(gguf.GGUFValueType.STRING, args.general_description)
+
+ if args.chat_template:
+ new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, json.loads(args.chat_template) if args.chat_template.startswith('[') else args.chat_template)
+
+ if args.chat_template_config:
+ with open(args.chat_template_config, 'r') as fp:
+ config = json.load(fp)
+ template = config.get('chat_template')
+ if template:
+ new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template)
+
+ if args.pre_tokenizer:
+ new_metadata[gguf.Keys.Tokenizer.PRE] = MetadataDetails(gguf.GGUFValueType.STRING, args.pre_tokenizer)
+
+ if remove_metadata:
+ logger.warning('*** Warning *** Warning *** Warning **')
+ logger.warning('* Most metadata is required for a fully functional GGUF file,')
+ logger.warning('* removing crucial metadata may result in a corrupt output file!')
+
+ if not args.force:
+ logger.warning('* Enter exactly YES if you are positive you want to proceed:')
+ response = input('YES, I am sure> ')
+ if response != 'YES':
+ logger.info("You didn't enter YES. Okay then, see ya!")
+ sys.exit(0)
+
+ logger.info(f'* Loading: {args.input}')
+ reader = gguf.GGUFReader(args.input, 'r')
+
+ arch = get_field_data(reader, gguf.Keys.General.ARCHITECTURE)
+ endianess = get_byteorder(reader)
+
+ token_list = get_field_data(reader, gguf.Keys.Tokenizer.LIST) or []
+
+ for name, token in args.special_token or []:
+ if name not in token_names:
+ logger.warning(f'Unknown special token "{name}", ignoring...')
+ else:
+ ids = find_token(token_list, token)
+ new_metadata[token_names[name]] = MetadataDetails(gguf.GGUFValueType.UINT32, ids[0], f'= {token}')
+
+ if len(ids) > 1:
+ logger.warning(f'Multiple "{token}" tokens found, choosing ID {ids[0]}, use --special-token-by-id if you want another:')
+ logger.warning(', '.join(str(i) for i in ids))
+
+ for name, id_string in args.special_token_by_id or []:
+ if name not in token_names:
+ logger.warning(f'Unknown special token "{name}", ignoring...')
+ elif not id_string.isdecimal():
+ raise LookupError(f'Token ID "{id_string}" is not a valid ID!')
+ else:
+ id_int = int(id_string)
+
+ if id_int >= 0 and id_int < len(token_list):
+ new_metadata[token_names[name]] = MetadataDetails(gguf.GGUFValueType.UINT32, id_int, f'= {token_list[id_int]}')
+ else:
+ raise LookupError(f'Token ID {id_int} is not within token list!')
+
+ if os.path.isfile(args.output) and not args.force:
+ logger.warning('*** Warning *** Warning *** Warning **')
+ logger.warning(f'* The "{args.output}" GGUF file already exists, it will be overwritten!')
+ logger.warning('* Enter exactly YES if you are positive you want to proceed:')
+ response = input('YES, I am sure> ')
+ if response != 'YES':
+ logger.info("You didn't enter YES. Okay then, see ya!")
+ sys.exit(0)
+
+ logger.info(f'* Writing: {args.output}')
+ writer = gguf.GGUFWriter(args.output, arch=arch, endianess=endianess)
+
+ alignment = get_field_data(reader, gguf.Keys.General.ALIGNMENT)
+ if alignment is not None:
+ logger.debug(f'Setting custom alignment: {alignment}')
+ writer.data_alignment = alignment
+
+ copy_with_new_metadata(reader, writer, new_metadata, remove_metadata)
+
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+#!/usr/bin/env python3
+import logging
+import argparse
+import os
+import sys
+from pathlib import Path
+
+# Necessary to load the local gguf package
+if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
+ sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from gguf import GGUFReader # noqa: E402
+
+logger = logging.getLogger("gguf-set-metadata")
+
+
+def minimal_example(filename: str) -> None:
+ reader = GGUFReader(filename, 'r+')
+ field = reader.fields['tokenizer.ggml.bos_token_id']
+ if field is None:
+ return
+ part_index = field.data[0]
+ field.parts[part_index][0] = 2 # Set tokenizer.ggml.bos_token_id to 2
+ #
+ # So what's this field.data thing? It's helpful because field.parts contains
+ # _every_ part of the GGUF field. For example, tokenizer.ggml.bos_token_id consists
+ # of:
+ #
+ # Part index 0: Key length (27)
+ # Part index 1: Key data ("tokenizer.ggml.bos_token_id")
+ # Part index 2: Field type (4, the id for GGUFValueType.UINT32)
+ # Part index 3: Field value
+ #
+ # Note also that each part is an NDArray slice, so even a part that
+ # is only a single value like the key length will be a NDArray of
+ # the key length type (numpy.uint32).
+ #
+ # The .data attribute in the Field is a list of relevant part indexes
+ # and doesn't contain internal GGUF details like the key length part.
+ # In this case, .data will be [3] - just the part index of the
+ # field value itself.
+
+
+def set_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
+ field = reader.get_field(args.key)
+ if field is None:
+ logger.error(f'! Field {repr(args.key)} not found')
+ sys.exit(1)
+ # Note that field.types is a list of types. This is because the GGUF
+ # format supports arrays. For example, an array of UINT32 would
+ # look like [GGUFValueType.ARRAY, GGUFValueType.UINT32]
+ handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None
+ if handler is None:
+ logger.error(f'! This tool only supports changing simple values, {repr(args.key)} has unsupported type {field.types}')
+ sys.exit(1)
+ current_value = field.parts[field.data[0]][0]
+ new_value = handler(args.value)
+ logger.info(f'* Preparing to change field {repr(args.key)} from {current_value} to {new_value}')
+ if current_value == new_value:
+ logger.info(f'- Key {repr(args.key)} already set to requested value {current_value}')
+ sys.exit(0)
+ if args.dry_run:
+ sys.exit(0)
+ if not args.force:
+ logger.warning('*** Warning *** Warning *** Warning **')
+ logger.warning('* Changing fields in a GGUF file can make it unusable. Proceed at your own risk.')
+ logger.warning('* Enter exactly YES if you are positive you want to proceed:')
+ response = input('YES, I am sure> ')
+ if response != 'YES':
+ logger.info("You didn't enter YES. Okay then, see ya!")
+ sys.exit(0)
+ field.parts[field.data[0]][0] = new_value
+ logger.info('* Field changed. Successful completion.')
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description="Set a simple value in GGUF file metadata")
+ parser.add_argument("model", type=str, help="GGUF format model filename")
+ parser.add_argument("key", type=str, help="Metadata key to set")
+ parser.add_argument("value", type=str, help="Metadata value to set")
+ parser.add_argument("--dry-run", action="store_true", help="Don't actually change anything")
+ parser.add_argument("--force", action="store_true", help="Change the field without confirmation")
+ parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
+
+ args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
+
+ logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+
+ logger.info(f'* Loading: {args.model}')
+ reader = GGUFReader(args.model, 'r' if args.dry_run else 'r+')
+ set_metadata(reader, args)
+
+
+if __name__ == '__main__':
+ main()
[tool.poetry]
name = "gguf"
-version = "0.13.0"
+version = "0.14.0"
description = "Read and write ML models in GGUF for GGML"
authors = ["GGML <ggml@ggml.ai>"]
packages = [
{include = "gguf"},
{include = "gguf/py.typed"},
- {include = "scripts"},
]
readme = "README.md"
homepage = "https://ggml.ai"
build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts]
-gguf-convert-endian = "scripts:gguf_convert_endian_entrypoint"
-gguf-dump = "scripts:gguf_dump_entrypoint"
-gguf-set-metadata = "scripts:gguf_set_metadata_entrypoint"
-gguf-new-metadata = "scripts:gguf_new_metadata_entrypoint"
+gguf-convert-endian = "gguf.scripts:gguf_convert_endian_entrypoint"
+gguf-dump = "gguf.scripts:gguf_dump_entrypoint"
+gguf-set-metadata = "gguf.scripts:gguf_set_metadata_entrypoint"
+gguf-new-metadata = "gguf.scripts:gguf_new_metadata_entrypoint"
+++ /dev/null
-# pyright: reportUnusedImport=false
-
-from .gguf_convert_endian import main as gguf_convert_endian_entrypoint
-from .gguf_dump import main as gguf_dump_entrypoint
-from .gguf_set_metadata import main as gguf_set_metadata_entrypoint
-from .gguf_new_metadata import main as gguf_new_metadata_entrypoint
+++ /dev/null
-#!/usr/bin/env python3
-from __future__ import annotations
-
-import logging
-import argparse
-import os
-import sys
-from tqdm import tqdm
-from pathlib import Path
-
-import numpy as np
-
-# Necessary to load the local gguf package
-if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
- sys.path.insert(0, str(Path(__file__).parent.parent))
-
-import gguf
-
-logger = logging.getLogger("gguf-convert-endian")
-
-
-def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
- if np.uint32(1) == np.uint32(1).newbyteorder("<"):
- # Host is little endian
- host_endian = "little"
- swapped_endian = "big"
- else:
- # Sorry PDP or other weird systems that don't use BE or LE.
- host_endian = "big"
- swapped_endian = "little"
- if reader.byte_order == "S":
- file_endian = swapped_endian
- else:
- file_endian = host_endian
- order = host_endian if args.order == "native" else args.order
- logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian")
- if file_endian == order:
- logger.info(f"* File is already {order.upper()} endian. Nothing to do.")
- sys.exit(0)
- logger.info("* Checking tensors for conversion compatibility")
- for tensor in reader.tensors:
- if tensor.tensor_type not in (
- gguf.GGMLQuantizationType.F32,
- gguf.GGMLQuantizationType.F16,
- gguf.GGMLQuantizationType.Q8_0,
- ):
- raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
- logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}")
- if args.dry_run:
- return
- logger.warning("*** Warning *** Warning *** Warning **")
- logger.warning("* This conversion process may damage the file. Ensure you have a backup.")
- if order != host_endian:
- logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.")
- logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted")
- logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:")
- response = input("YES, I am sure> ")
- if response != "YES":
- logger.warning("You didn't enter YES. Okay then, see ya!")
- sys.exit(0)
- logger.info(f"* Converting fields ({len(reader.fields)})")
- for idx, field in enumerate(reader.fields.values()):
- logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}")
- for part in field.parts:
- part.byteswap(inplace=True)
- logger.info(f"* Converting tensors ({len(reader.tensors)})")
-
- for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")):
- log_message = (
- f"Converting tensor {repr(tensor.name)}, "
- f"type={tensor.tensor_type.name}, "
- f"elements={tensor.n_elements} "
- )
-
- # Byte-swap each part of the tensor's field
- for part in tensor.field.parts:
- part.byteswap(inplace=True)
-
- # Byte-swap tensor data if necessary
- if tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0:
- # Handle Q8_0 tensor blocks (block_q8_0)
- # Specific handling of block_q8_0 is required.
- # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
-
- block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
-
- n_blocks = len(tensor.data) // block_size
- for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
- block_offs = block_num * block_size
-
- # Byte-Swap f16 sized delta field
- delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
- delta.byteswap(inplace=True)
-
- # Byte-Swap Q8 weights
- if block_num % 100000 == 0:
- inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
-
- else:
- # Handle other tensor types
- tensor.data.byteswap(inplace=True)
-
- pbar.set_description(log_message)
-
- logger.info("* Completion")
-
-
-def main() -> None:
- parser = argparse.ArgumentParser(description="Convert GGUF file byte order")
- parser.add_argument(
- "model", type=str,
- help="GGUF format model filename",
- )
- parser.add_argument(
- "order", type=str, choices=['big', 'little', 'native'],
- help="Requested byte order",
- )
- parser.add_argument(
- "--dry-run", action="store_true",
- help="Don't actually change anything",
- )
- parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
-
- args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
-
- logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
-
- logger.info(f'* Loading: {args.model}')
- reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+')
- convert_byteorder(reader, args)
-
-
-if __name__ == "__main__":
- main()
+++ /dev/null
-#!/usr/bin/env python3
-from __future__ import annotations
-
-import logging
-import argparse
-import os
-import re
-import sys
-from pathlib import Path
-from typing import Any
-
-import numpy as np
-
-# Necessary to load the local gguf package
-if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
- sys.path.insert(0, str(Path(__file__).parent.parent))
-
-from gguf import GGUFReader, GGUFValueType, ReaderTensor # noqa: E402
-
-logger = logging.getLogger("gguf-dump")
-
-
-def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
- host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
- if reader.byte_order == 'S':
- file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
- else:
- file_endian = host_endian
- return (host_endian, file_endian)
-
-
-# For more information about what field.parts and field.data represent,
-# please see the comments in the modify_gguf.py example.
-def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
- host_endian, file_endian = get_file_host_endian(reader)
- print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') # noqa: NP100
- print(f'* Dumping {len(reader.fields)} key/value pair(s)') # noqa: NP100
- for n, field in enumerate(reader.fields.values(), 1):
- if not field.types:
- pretty_type = 'N/A'
- elif field.types[0] == GGUFValueType.ARRAY:
- nest_count = len(field.types) - 1
- pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
- else:
- pretty_type = str(field.types[-1].name)
-
- log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}'
- if len(field.types) == 1:
- curr_type = field.types[0]
- if curr_type == GGUFValueType.STRING:
- log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60]))
- elif field.types[0] in reader.gguf_scalar_to_np:
- log_message += ' = {0}'.format(field.parts[-1][0])
- print(log_message) # noqa: NP100
- if args.no_tensors:
- return
- print(f'* Dumping {len(reader.tensors)} tensor(s)') # noqa: NP100
- for n, tensor in enumerate(reader.tensors, 1):
- prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
- print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') # noqa: NP100
-
-
-def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
- import json
- host_endian, file_endian = get_file_host_endian(reader)
- metadata: dict[str, Any] = {}
- tensors: dict[str, Any] = {}
- result = {
- "filename": args.model,
- "endian": file_endian,
- "metadata": metadata,
- "tensors": tensors,
- }
- for idx, field in enumerate(reader.fields.values()):
- curr: dict[str, Any] = {
- "index": idx,
- "type": field.types[0].name if field.types else 'UNKNOWN',
- "offset": field.offset,
- }
- metadata[field.name] = curr
- if field.types[:1] == [GGUFValueType.ARRAY]:
- curr["array_types"] = [t.name for t in field.types][1:]
- if not args.json_array:
- continue
- itype = field.types[-1]
- if itype == GGUFValueType.STRING:
- curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
- else:
- curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()]
- elif field.types[0] == GGUFValueType.STRING:
- curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
- else:
- curr["value"] = field.parts[-1].tolist()[0]
- if not args.no_tensors:
- for idx, tensor in enumerate(reader.tensors):
- tensors[tensor.name] = {
- "index": idx,
- "shape": tensor.shape.tolist(),
- "type": tensor.tensor_type.name,
- "offset": tensor.field.offset,
- }
- json.dump(result, sys.stdout)
-
-
-def markdown_table_with_alignment_support(header_map: list[dict[str, str]], data: list[dict[str, Any]]):
- # JSON to Markdown table formatting: https://stackoverflow.com/a/72983854/2850957
-
- # Alignment Utility Function
- def strAlign(padding: int, alignMode: str | None, strVal: str):
- if alignMode == 'center':
- return strVal.center(padding)
- elif alignMode == 'right':
- return strVal.rjust(padding - 1) + ' '
- elif alignMode == 'left':
- return ' ' + strVal.ljust(padding - 1)
- else: # default left
- return ' ' + strVal.ljust(padding - 1)
-
- def dashAlign(padding: int, alignMode: str | None):
- if alignMode == 'center':
- return ':' + '-' * (padding - 2) + ':'
- elif alignMode == 'right':
- return '-' * (padding - 1) + ':'
- elif alignMode == 'left':
- return ':' + '-' * (padding - 1)
- else: # default left
- return '-' * (padding)
-
- # Calculate Padding For Each Column Based On Header and Data Length
- rowsPadding = {}
- for index, columnEntry in enumerate(header_map):
- padCount = max([len(str(v)) for d in data for k, v in d.items() if k == columnEntry['key_name']], default=0) + 2
- headerPadCount = len(columnEntry['header_name']) + 2
- rowsPadding[index] = headerPadCount if padCount <= headerPadCount else padCount
-
- # Render Markdown Header
- rows = []
- rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(columnEntry['header_name'])) for index, columnEntry in enumerate(header_map)))
- rows.append('|'.join(dashAlign(rowsPadding[index], columnEntry.get('align')) for index, columnEntry in enumerate(header_map)))
-
- # Render Tabular Data
- for item in data:
- rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(item[columnEntry['key_name']])) for index, columnEntry in enumerate(header_map)))
-
- # Convert Tabular String Rows Into String
- tableString = ""
- for row in rows:
- tableString += f'|{row}|\n'
-
- return tableString
-
-
-def element_count_rounded_notation(count: int) -> str:
- if count > 1e15 :
- # Quadrillion
- scaled_amount = count * 1e-15
- scale_suffix = "Q"
- elif count > 1e12 :
- # Trillions
- scaled_amount = count * 1e-12
- scale_suffix = "T"
- elif count > 1e9 :
- # Billions
- scaled_amount = count * 1e-9
- scale_suffix = "B"
- elif count > 1e6 :
- # Millions
- scaled_amount = count * 1e-6
- scale_suffix = "M"
- elif count > 1e3 :
- # Thousands
- scaled_amount = count * 1e-3
- scale_suffix = "K"
- else:
- # Under Thousands
- scaled_amount = count
- scale_suffix = ""
- return f"{'~' if count > 1e3 else ''}{round(scaled_amount)}{scale_suffix}"
-
-
-def translate_tensor_name(name):
- words = name.split(".")
-
- # Source: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#standardized-tensor-names
- abbreviation_dictionary = {
- 'token_embd': 'Token embedding',
- 'pos_embd': 'Position embedding',
- 'output_norm': 'Output normalization',
- 'output': 'Output',
- 'attn_norm': 'Attention normalization',
- 'attn_norm_2': 'Attention normalization',
- 'attn_qkv': 'Attention query-key-value',
- 'attn_q': 'Attention query',
- 'attn_k': 'Attention key',
- 'attn_v': 'Attention value',
- 'attn_output': 'Attention output',
- 'ffn_norm': 'Feed-forward network normalization',
- 'ffn_up': 'Feed-forward network "up"',
- 'ffn_gate': 'Feed-forward network "gate"',
- 'ffn_down': 'Feed-forward network "down"',
- 'ffn_gate_inp': 'Expert-routing layer for the Feed-forward network in Mixture of Expert models',
- 'ffn_gate_exp': 'Feed-forward network "gate" layer per expert in Mixture of Expert models',
- 'ffn_down_exp': 'Feed-forward network "down" layer per expert in Mixture of Expert models',
- 'ffn_up_exp': 'Feed-forward network "up" layer per expert in Mixture of Expert models',
- 'ssm_in': 'State space model input projections',
- 'ssm_conv1d': 'State space model rolling/shift',
- 'ssm_x': 'State space model selective parametrization',
- 'ssm_a': 'State space model state compression',
- 'ssm_d': 'State space model skip connection',
- 'ssm_dt': 'State space model time step',
- 'ssm_out': 'State space model output projection',
- 'blk': 'Block',
- 'enc': 'Encoder',
- 'dec': 'Decoder',
- }
-
- expanded_words = []
- for word in words:
- word_norm = word.strip().lower()
- if word_norm in abbreviation_dictionary:
- expanded_words.append(abbreviation_dictionary[word_norm].title())
- else:
- expanded_words.append(word.title())
-
- return ' '.join(expanded_words)
-
-
-def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
- host_endian, file_endian = get_file_host_endian(reader)
- markdown_content = ""
- markdown_content += f'# {args.model} - GGUF Internal File Dump\n\n'
- markdown_content += f'- Endian: {file_endian} endian\n'
- markdown_content += '\n'
- markdown_content += '## Key Value Metadata Store\n\n'
- markdown_content += f'There are {len(reader.fields)} key-value pairs in this file\n'
- markdown_content += '\n'
-
- kv_dump_table: list[dict[str, str | int]] = []
- for n, field in enumerate(reader.fields.values(), 1):
- if not field.types:
- pretty_type = 'N/A'
- elif field.types[0] == GGUFValueType.ARRAY:
- nest_count = len(field.types) - 1
- pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
- else:
- pretty_type = str(field.types[-1].name)
-
- def escape_markdown_inline_code(value_string):
- # Find the longest contiguous sequence of backticks in the string then
- # wrap string with appropriate number of backticks required to escape it
- max_backticks = max((len(match.group(0)) for match in re.finditer(r'`+', value_string)), default=0)
- inline_code_marker = '`' * (max_backticks + 1)
-
- # If the string starts or ends with a backtick, add a space at the beginning and end
- if value_string.startswith('`') or value_string.endswith('`'):
- value_string = f" {value_string} "
-
- return f"{inline_code_marker}{value_string}{inline_code_marker}"
-
- total_elements = len(field.data)
- value = ""
- if len(field.types) == 1:
- curr_type = field.types[0]
- if curr_type == GGUFValueType.STRING:
- truncate_length = 60
- value_string = str(bytes(field.parts[-1]), encoding='utf-8')
- if len(value_string) > truncate_length:
- head = escape_markdown_inline_code(value_string[:truncate_length // 2])
- tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
- value = "{head}...{tail}".format(head=head, tail=tail)
- else:
- value = escape_markdown_inline_code(value_string)
- elif curr_type in reader.gguf_scalar_to_np:
- value = str(field.parts[-1][0])
- else:
- if field.types[0] == GGUFValueType.ARRAY:
- curr_type = field.types[1]
- array_elements = []
-
- if curr_type == GGUFValueType.STRING:
- render_element = min(5, total_elements)
- for element_pos in range(render_element):
- truncate_length = 30
- value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8')
- if len(value_string) > truncate_length:
- head = escape_markdown_inline_code(value_string[:truncate_length // 2])
- tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
- value = "{head}...{tail}".format(head=head, tail=tail)
- else:
- value = escape_markdown_inline_code(value_string)
- array_elements.append(value)
-
- elif curr_type in reader.gguf_scalar_to_np:
- render_element = min(7, total_elements)
- for element_pos in range(render_element):
- array_elements.append(str(field.parts[-1 - (total_elements - element_pos - 1)][0]))
-
- value = f'[ {", ".join(array_elements).strip()}{", ..." if total_elements > len(array_elements) else ""} ]'
-
- kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value})
-
- kv_dump_table_header_map = [
- {'key_name':'n', 'header_name':'POS', 'align':'right'},
- {'key_name':'pretty_type', 'header_name':'TYPE', 'align':'left'},
- {'key_name':'total_elements', 'header_name':'Count', 'align':'right'},
- {'key_name':'field_name', 'header_name':'Key', 'align':'left'},
- {'key_name':'value', 'header_name':'Value', 'align':'left'},
- ]
-
- markdown_content += markdown_table_with_alignment_support(kv_dump_table_header_map, kv_dump_table)
-
- markdown_content += "\n"
-
- if not args.no_tensors:
- # Group tensors by their prefix and maintain order
- tensor_prefix_order: list[str] = []
- tensor_name_to_key: dict[str, int] = {}
- tensor_groups: dict[str, list[ReaderTensor]] = {}
- total_elements = sum(tensor.n_elements for tensor in reader.tensors)
-
- # Parsing Tensors Record
- for key, tensor in enumerate(reader.tensors):
- tensor_components = tensor.name.split('.')
-
- # Classify Tensor Group
- tensor_group_name = "base"
- if tensor_components[0] == 'blk':
- tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}"
- elif tensor_components[0] in ['enc', 'dec'] and tensor_components[1] == 'blk':
- tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}.{tensor_components[2]}"
- elif tensor_components[0] in ['enc', 'dec']:
- tensor_group_name = f"{tensor_components[0]}"
-
- # Check if new Tensor Group
- if tensor_group_name not in tensor_groups:
- tensor_groups[tensor_group_name] = []
- tensor_prefix_order.append(tensor_group_name)
-
- # Record Tensor and Tensor Position
- tensor_groups[tensor_group_name].append(tensor)
- tensor_name_to_key[tensor.name] = key
-
- # Tensors Mapping Dump
- markdown_content += f'## Tensors Overview {element_count_rounded_notation(total_elements)} Elements\n\n'
- markdown_content += f'Total number of elements in all tensors: {total_elements} Elements\n'
- markdown_content += '\n'
-
- for group in tensor_prefix_order:
- tensors = tensor_groups[group]
- group_elements = sum(tensor.n_elements for tensor in tensors)
- markdown_content += f"- [{translate_tensor_name(group)} Tensor Group - {element_count_rounded_notation(group_elements)} Elements](#{group.replace('.', '_')})\n"
-
- markdown_content += "\n"
-
- markdown_content += "### Tensor Data Offset\n"
- markdown_content += '\n'
- markdown_content += 'This table contains the offset and data segment relative to start of file\n'
- markdown_content += '\n'
-
- tensor_mapping_table: list[dict[str, str | int]] = []
- for key, tensor in enumerate(reader.tensors):
- data_offset_pretty = '{0:#16x}'.format(tensor.data_offset)
- data_size_pretty = '{0:#16x}'.format(tensor.n_bytes)
- tensor_mapping_table.append({"t_id":key, "layer_name":tensor.name, "data_offset":data_offset_pretty, "data_size":data_size_pretty})
-
- tensors_mapping_table_header_map = [
- {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
- {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
- {'key_name':'data_offset', 'header_name':'Data Offset (B)', 'align':'right'},
- {'key_name':'data_size', 'header_name':'Data Size (B)', 'align':'right'},
- ]
-
- markdown_content += markdown_table_with_alignment_support(tensors_mapping_table_header_map, tensor_mapping_table)
- markdown_content += "\n"
-
- for group in tensor_prefix_order:
- tensors = tensor_groups[group]
- group_elements = sum(tensor.n_elements for tensor in tensors)
- group_percentage = group_elements / total_elements * 100
- markdown_content += f"### <a name=\"{group.replace('.', '_')}\">{translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements</a>\n\n"
-
- # Precalculate column sizing for visual consistency
- prettify_element_est_count_size: int = 1
- prettify_element_count_size: int = 1
- prettify_dimension_max_widths: dict[int, int] = {}
- for tensor in tensors:
- prettify_element_est_count_size = max(prettify_element_est_count_size, len(str(element_count_rounded_notation(tensor.n_elements))))
- prettify_element_count_size = max(prettify_element_count_size, len(str(tensor.n_elements)))
- for i, dimension_size in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))):
- prettify_dimension_max_widths[i] = max(prettify_dimension_max_widths.get(i,1), len(str(dimension_size)))
-
- # Generate Tensor Layer Table Content
- tensor_dump_table: list[dict[str, str | int]] = []
- for tensor in tensors:
- human_friendly_name = translate_tensor_name(tensor.name.replace(".weight", ".(W)").replace(".bias", ".(B)"))
- pretty_dimension = ' x '.join(f'{str(d):>{prettify_dimension_max_widths[i]}}' for i, d in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))))
- element_count_est = f"({element_count_rounded_notation(tensor.n_elements):>{prettify_element_est_count_size}})"
- element_count_string = f"{element_count_est} {tensor.n_elements:>{prettify_element_count_size}}"
- type_name_string = f"{tensor.tensor_type.name}"
- tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dimension":pretty_dimension, "tensor_type":type_name_string})
-
- tensor_dump_table_header_map = [
- {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
- {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
- {'key_name':'human_layer_name', 'header_name':'Human Friendly Tensor Layer Name', 'align':'left'},
- {'key_name':'element_count', 'header_name':'Elements', 'align':'left'},
- {'key_name':'pretty_dimension', 'header_name':'Shape', 'align':'left'},
- {'key_name':'tensor_type', 'header_name':'Type', 'align':'left'},
- ]
-
- markdown_content += markdown_table_with_alignment_support(tensor_dump_table_header_map, tensor_dump_table)
-
- markdown_content += "\n"
- markdown_content += f"- Total elements in {group}: ({element_count_rounded_notation(group_elements):>4}) {group_elements}\n"
- markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n"
- markdown_content += "\n\n"
-
- print(markdown_content) # noqa: NP100
-
-
-def main() -> None:
- parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
- parser.add_argument("model", type=str, help="GGUF format model filename")
- parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
- parser.add_argument("--json", action="store_true", help="Produce JSON output")
- parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
- parser.add_argument("--data-offset", action="store_true", help="Start of data offset")
- parser.add_argument("--data-alignment", action="store_true", help="Data alignment applied globally to data field")
- parser.add_argument("--markdown", action="store_true", help="Produce markdown output")
- parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
-
- args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
-
- logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
-
- if not args.json and not args.markdown and not args.data_offset and not args.data_alignment:
- logger.info(f'* Loading: {args.model}')
-
- reader = GGUFReader(args.model, 'r')
-
- if args.json:
- dump_metadata_json(reader, args)
- elif args.markdown:
- dump_markdown_metadata(reader, args)
- elif args.data_offset:
- print(reader.data_offset) # noqa: NP100
- elif args.data_alignment:
- print(reader.alignment) # noqa: NP100
- else:
- dump_metadata(reader, args)
-
-
-if __name__ == '__main__':
- main()
+++ /dev/null
-#!/usr/bin/env python3
-from __future__ import annotations
-
-import uuid
-import hashlib
-
-import logging
-import argparse
-import os
-import sys
-from pathlib import Path
-
-from tqdm import tqdm
-
-# Necessary to load the local gguf package
-if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
- sys.path.insert(0, str(Path(__file__).parent.parent))
-
-from gguf import GGUFReader # noqa: E402
-
-
-logger = logging.getLogger("gguf-hash")
-
-# UUID_NAMESPACE_LLAMA_CPP = uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
-UUID_NAMESPACE_LLAMA_CPP = uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5')
-
-
-# For more information about what field.parts and field.data represent,
-# please see the comments in the modify_gguf.py example.
-def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar: bool, no_layer: bool) -> None:
- sha1 = hashlib.sha1()
- sha256 = hashlib.sha256()
- uuidv5_sha1 = hashlib.sha1()
- uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes)
-
- # Total Weight Calculation For Progress Bar
- total_weights = 0
- for n, tensor in enumerate(reader.tensors, 1):
-
- # We don't need these
- if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
- continue
-
- # Calculate Tensor Volume
- sum_weights_in_tensor = 1
- for dim in tensor.shape:
- sum_weights_in_tensor *= dim
- total_weights += sum_weights_in_tensor
-
- # Hash Progress Bar
- bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar)
-
- # Hashing Process
- for tensor in reader.tensors:
-
- # We don't need these
- if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
- continue
-
- # Progressbar
- sum_weights_in_tensor = 1
- for dim in tensor.shape:
- sum_weights_in_tensor *= dim
- bar.update(sum_weights_in_tensor)
-
- if not no_layer:
-
- sha1_layer = hashlib.sha1()
- sha1_layer.update(tensor.data.data)
- print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
-
- sha256_layer = hashlib.sha256()
- sha256_layer.update(tensor.data.data)
- print("sha256 {0} {1}:{2}".format(sha256_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
-
- sha1.update(tensor.data.data)
- sha256.update(tensor.data.data)
- uuidv5_sha1.update(tensor.data.data)
-
- # Flush Hash Progress Bar
- bar.close()
-
- # Display Hash Output
- print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100
- print("sha256 {0} {1}".format(sha256.hexdigest(), filename)) # noqa: NP100
- print("uuid {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100
-
-
-def main() -> None:
- parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
- parser.add_argument("model", type=str, help="GGUF format model filename")
- parser.add_argument("--no-layer", action="store_true", help="exclude per layer hash")
- parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
- parser.add_argument("--progressbar", action="store_true", help="enable progressbar")
- args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
- logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
- reader = GGUFReader(args.model, 'r')
- gguf_hash(reader, args.model, not args.progressbar, args.no_layer)
-
-
-if __name__ == '__main__':
- main()
+++ /dev/null
-#!/usr/bin/env python3
-from __future__ import annotations
-
-import logging
-import argparse
-import os
-import sys
-import json
-from pathlib import Path
-
-import numpy as np
-from tqdm import tqdm
-from typing import Any, Sequence, NamedTuple
-
-# Necessary to load the local gguf package
-if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
- sys.path.insert(0, str(Path(__file__).parent.parent))
-
-import gguf
-
-logger = logging.getLogger("gguf-new-metadata")
-
-
-class MetadataDetails(NamedTuple):
- type: gguf.GGUFValueType
- value: Any
- description: str = ''
-
-
-def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian:
- if np.uint32(1) == np.uint32(1).newbyteorder("<"):
- # Host is little endian
- host_endian = gguf.GGUFEndian.LITTLE
- swapped_endian = gguf.GGUFEndian.BIG
- else:
- # Sorry PDP or other weird systems that don't use BE or LE.
- host_endian = gguf.GGUFEndian.BIG
- swapped_endian = gguf.GGUFEndian.LITTLE
-
- if reader.byte_order == "S":
- return swapped_endian
- else:
- return host_endian
-
-
-def decode_field(field: gguf.ReaderField | None) -> Any:
- if field and field.types:
- main_type = field.types[0]
-
- if main_type == gguf.GGUFValueType.ARRAY:
- sub_type = field.types[-1]
-
- if sub_type == gguf.GGUFValueType.STRING:
- return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data]
- else:
- return [pv for idx in field.data for pv in field.parts[idx].tolist()]
- if main_type == gguf.GGUFValueType.STRING:
- return str(bytes(field.parts[-1]), encoding='utf-8')
- else:
- return field.parts[-1][0]
-
- return None
-
-
-def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
- field = reader.get_field(key)
-
- return decode_field(field)
-
-
-def find_token(token_list: Sequence[int], token: str) -> Sequence[int]:
- token_ids = [index for index, value in enumerate(token_list) if value == token]
-
- if len(token_ids) == 0:
- raise LookupError(f'Unable to find "{token}" in token list!')
-
- return token_ids
-
-
-def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: dict[str, MetadataDetails], remove_metadata: Sequence[str]) -> None:
- for field in reader.fields.values():
- # Suppress virtual fields and fields written by GGUFWriter
- if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
- logger.debug(f'Suppressing {field.name}')
- continue
-
- # Skip old chat templates if we have new ones
- if field.name.startswith(gguf.Keys.Tokenizer.CHAT_TEMPLATE) and gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
- logger.debug(f'Skipping {field.name}')
- continue
-
- if field.name in remove_metadata:
- logger.debug(f'Removing {field.name}')
- continue
-
- old_val = MetadataDetails(field.types[0], decode_field(field))
- val = new_metadata.get(field.name, old_val)
-
- if field.name in new_metadata:
- logger.debug(f'Modifying {field.name}: "{old_val.value}" -> "{val.value}" {val.description}')
- del new_metadata[field.name]
- elif val.value is not None:
- logger.debug(f'Copying {field.name}')
-
- if val.value is not None:
- writer.add_key_value(field.name, val.value, val.type)
-
- if gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
- logger.debug('Adding chat template(s)')
- writer.add_chat_template(new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE].value)
- del new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE]
-
- for key, val in new_metadata.items():
- logger.debug(f'Adding {key}: "{val.value}" {val.description}')
- writer.add_key_value(key, val.value, val.type)
-
- total_bytes = 0
-
- for tensor in reader.tensors:
- total_bytes += tensor.n_bytes
- writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)
-
- bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
-
- writer.write_header_to_file()
- writer.write_kv_data_to_file()
- writer.write_ti_data_to_file()
-
- for tensor in reader.tensors:
- writer.write_tensor_data(tensor.data)
- bar.update(tensor.n_bytes)
-
- writer.close()
-
-
-def main() -> None:
- tokenizer_metadata = (getattr(gguf.Keys.Tokenizer, n) for n in gguf.Keys.Tokenizer.__dict__.keys() if not n.startswith('_'))
- token_names = dict((n.split('.')[-1][:-len('_token_id')], n) for n in tokenizer_metadata if n.endswith('_token_id'))
-
- parser = argparse.ArgumentParser(description="Make a copy of a GGUF file with new metadata")
- parser.add_argument("input", type=Path, help="GGUF format model input filename")
- parser.add_argument("output", type=Path, help="GGUF format model output filename")
- parser.add_argument("--general-name", type=str, help="The models general.name", metavar='"name"')
- parser.add_argument("--general-description", type=str, help="The models general.description", metavar='"Description ..."')
- parser.add_argument("--chat-template", type=str, help="Chat template string (or JSON string containing templates)", metavar='"{% ... %} ..."')
- parser.add_argument("--chat-template-config", type=Path, help="Config file containing chat template(s)", metavar='tokenizer_config.json')
- parser.add_argument("--pre-tokenizer", type=str, help="The models tokenizer.ggml.pre", metavar='"pre tokenizer"')
- parser.add_argument("--remove-metadata", action="append", type=str, help="Remove metadata (by key name) from output model", metavar='general.url')
- parser.add_argument("--special-token", action="append", type=str, help="Special token by value", nargs=2, metavar=(' | '.join(token_names.keys()), '"<token>"'))
- parser.add_argument("--special-token-by-id", action="append", type=str, help="Special token by id", nargs=2, metavar=(' | '.join(token_names.keys()), '0'))
- parser.add_argument("--force", action="store_true", help="Bypass warnings without confirmation")
- parser.add_argument("--verbose", action="store_true", help="Increase output verbosity")
- args = parser.parse_args(None if len(sys.argv) > 2 else ["--help"])
-
- logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
-
- new_metadata = {}
- remove_metadata = args.remove_metadata or []
-
- if args.general_name:
- new_metadata[gguf.Keys.General.NAME] = MetadataDetails(gguf.GGUFValueType.STRING, args.general_name)
-
- if args.general_description:
- new_metadata[gguf.Keys.General.DESCRIPTION] = MetadataDetails(gguf.GGUFValueType.STRING, args.general_description)
-
- if args.chat_template:
- new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, json.loads(args.chat_template) if args.chat_template.startswith('[') else args.chat_template)
-
- if args.chat_template_config:
- with open(args.chat_template_config, 'r') as fp:
- config = json.load(fp)
- template = config.get('chat_template')
- if template:
- new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template)
-
- if args.pre_tokenizer:
- new_metadata[gguf.Keys.Tokenizer.PRE] = MetadataDetails(gguf.GGUFValueType.STRING, args.pre_tokenizer)
-
- if remove_metadata:
- logger.warning('*** Warning *** Warning *** Warning **')
- logger.warning('* Most metadata is required for a fully functional GGUF file,')
- logger.warning('* removing crucial metadata may result in a corrupt output file!')
-
- if not args.force:
- logger.warning('* Enter exactly YES if you are positive you want to proceed:')
- response = input('YES, I am sure> ')
- if response != 'YES':
- logger.info("You didn't enter YES. Okay then, see ya!")
- sys.exit(0)
-
- logger.info(f'* Loading: {args.input}')
- reader = gguf.GGUFReader(args.input, 'r')
-
- arch = get_field_data(reader, gguf.Keys.General.ARCHITECTURE)
- endianess = get_byteorder(reader)
-
- token_list = get_field_data(reader, gguf.Keys.Tokenizer.LIST) or []
-
- for name, token in args.special_token or []:
- if name not in token_names:
- logger.warning(f'Unknown special token "{name}", ignoring...')
- else:
- ids = find_token(token_list, token)
- new_metadata[token_names[name]] = MetadataDetails(gguf.GGUFValueType.UINT32, ids[0], f'= {token}')
-
- if len(ids) > 1:
- logger.warning(f'Multiple "{token}" tokens found, choosing ID {ids[0]}, use --special-token-by-id if you want another:')
- logger.warning(', '.join(str(i) for i in ids))
-
- for name, id_string in args.special_token_by_id or []:
- if name not in token_names:
- logger.warning(f'Unknown special token "{name}", ignoring...')
- elif not id_string.isdecimal():
- raise LookupError(f'Token ID "{id_string}" is not a valid ID!')
- else:
- id_int = int(id_string)
-
- if id_int >= 0 and id_int < len(token_list):
- new_metadata[token_names[name]] = MetadataDetails(gguf.GGUFValueType.UINT32, id_int, f'= {token_list[id_int]}')
- else:
- raise LookupError(f'Token ID {id_int} is not within token list!')
-
- if os.path.isfile(args.output) and not args.force:
- logger.warning('*** Warning *** Warning *** Warning **')
- logger.warning(f'* The "{args.output}" GGUF file already exists, it will be overwritten!')
- logger.warning('* Enter exactly YES if you are positive you want to proceed:')
- response = input('YES, I am sure> ')
- if response != 'YES':
- logger.info("You didn't enter YES. Okay then, see ya!")
- sys.exit(0)
-
- logger.info(f'* Writing: {args.output}')
- writer = gguf.GGUFWriter(args.output, arch=arch, endianess=endianess)
-
- alignment = get_field_data(reader, gguf.Keys.General.ALIGNMENT)
- if alignment is not None:
- logger.debug(f'Setting custom alignment: {alignment}')
- writer.data_alignment = alignment
-
- copy_with_new_metadata(reader, writer, new_metadata, remove_metadata)
-
-
-if __name__ == '__main__':
- main()
+++ /dev/null
-#!/usr/bin/env python3
-import logging
-import argparse
-import os
-import sys
-from pathlib import Path
-
-# Necessary to load the local gguf package
-if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
- sys.path.insert(0, str(Path(__file__).parent.parent))
-
-from gguf import GGUFReader # noqa: E402
-
-logger = logging.getLogger("gguf-set-metadata")
-
-
-def minimal_example(filename: str) -> None:
- reader = GGUFReader(filename, 'r+')
- field = reader.fields['tokenizer.ggml.bos_token_id']
- if field is None:
- return
- part_index = field.data[0]
- field.parts[part_index][0] = 2 # Set tokenizer.ggml.bos_token_id to 2
- #
- # So what's this field.data thing? It's helpful because field.parts contains
- # _every_ part of the GGUF field. For example, tokenizer.ggml.bos_token_id consists
- # of:
- #
- # Part index 0: Key length (27)
- # Part index 1: Key data ("tokenizer.ggml.bos_token_id")
- # Part index 2: Field type (4, the id for GGUFValueType.UINT32)
- # Part index 3: Field value
- #
- # Note also that each part is an NDArray slice, so even a part that
- # is only a single value like the key length will be a NDArray of
- # the key length type (numpy.uint32).
- #
- # The .data attribute in the Field is a list of relevant part indexes
- # and doesn't contain internal GGUF details like the key length part.
- # In this case, .data will be [3] - just the part index of the
- # field value itself.
-
-
-def set_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
- field = reader.get_field(args.key)
- if field is None:
- logger.error(f'! Field {repr(args.key)} not found')
- sys.exit(1)
- # Note that field.types is a list of types. This is because the GGUF
- # format supports arrays. For example, an array of UINT32 would
- # look like [GGUFValueType.ARRAY, GGUFValueType.UINT32]
- handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None
- if handler is None:
- logger.error(f'! This tool only supports changing simple values, {repr(args.key)} has unsupported type {field.types}')
- sys.exit(1)
- current_value = field.parts[field.data[0]][0]
- new_value = handler(args.value)
- logger.info(f'* Preparing to change field {repr(args.key)} from {current_value} to {new_value}')
- if current_value == new_value:
- logger.info(f'- Key {repr(args.key)} already set to requested value {current_value}')
- sys.exit(0)
- if args.dry_run:
- sys.exit(0)
- if not args.force:
- logger.warning('*** Warning *** Warning *** Warning **')
- logger.warning('* Changing fields in a GGUF file can make it unusable. Proceed at your own risk.')
- logger.warning('* Enter exactly YES if you are positive you want to proceed:')
- response = input('YES, I am sure> ')
- if response != 'YES':
- logger.info("You didn't enter YES. Okay then, see ya!")
- sys.exit(0)
- field.parts[field.data[0]][0] = new_value
- logger.info('* Field changed. Successful completion.')
-
-
-def main() -> None:
- parser = argparse.ArgumentParser(description="Set a simple value in GGUF file metadata")
- parser.add_argument("model", type=str, help="GGUF format model filename")
- parser.add_argument("key", type=str, help="Metadata key to set")
- parser.add_argument("value", type=str, help="Metadata value to set")
- parser.add_argument("--dry-run", action="store_true", help="Don't actually change anything")
- parser.add_argument("--force", action="store_true", help="Change the field without confirmation")
- parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
-
- args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
-
- logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
-
- logger.info(f'* Loading: {args.model}')
- reader = GGUFReader(args.model, 'r' if args.dry_run else 'r+')
- set_metadata(reader, args)
-
-
-if __name__ == '__main__':
- main()