From: Vinesh Janarthanan Date: Wed, 8 Jan 2025 18:54:58 +0000 (-0600) Subject: gguf-py : move scripts directory (#11116) X-Git-Tag: upstream/0.0.4488~39 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=8a1d9c25fafbaf4182dd0b785dd6303ee40d55bc;p=pkg%2Fggml%2Fsources%2Fllama.cpp gguf-py : move scripts directory (#11116) * Moved scripts dir and fixed pyproject.toml * updated readme * fixed README urls * bump pypi gguf to v0.14.0 * retrigger ci * empty commit - trigger ci --- diff --git a/gguf-py/README.md b/gguf-py/README.md index 24af96a1..37a75923 100644 --- a/gguf-py/README.md +++ b/gguf-py/README.md @@ -15,13 +15,13 @@ pip install gguf [examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model. -[scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console. +[gguf/scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console. -[scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key. +[gguf/scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key. -[scripts/gguf_convert_endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_convert_endian.py) — Allows converting the endianness of GGUF files. +[gguf/scripts/gguf_convert_endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_convert_endian.py) — Allows converting the endianness of GGUF files. -[scripts/gguf_new_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_new_metadata.py) — Copies a GGUF file with added/modified/removed metadata values. +[gguf/scripts/gguf_new_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_new_metadata.py) — Copies a GGUF file with added/modified/removed metadata values. ## Development Maintainers who participate in development of this package are advised to install it in editable mode: diff --git a/gguf-py/gguf/scripts/__init__.py b/gguf-py/gguf/scripts/__init__.py new file mode 100644 index 00000000..e77f2e9c --- /dev/null +++ b/gguf-py/gguf/scripts/__init__.py @@ -0,0 +1,6 @@ +# pyright: reportUnusedImport=false + +from .gguf_convert_endian import main as gguf_convert_endian_entrypoint +from .gguf_dump import main as gguf_dump_entrypoint +from .gguf_set_metadata import main as gguf_set_metadata_entrypoint +from .gguf_new_metadata import main as gguf_new_metadata_entrypoint diff --git a/gguf-py/gguf/scripts/gguf_convert_endian.py b/gguf-py/gguf/scripts/gguf_convert_endian.py new file mode 100755 index 00000000..b698af0f --- /dev/null +++ b/gguf-py/gguf/scripts/gguf_convert_endian.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import logging +import argparse +import os +import sys +from tqdm import tqdm +from pathlib import Path + +import numpy as np + +# Necessary to load the local gguf package +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent)) + +import gguf + +logger = logging.getLogger("gguf-convert-endian") + + +def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None: + if np.uint32(1) == np.uint32(1).newbyteorder("<"): + # Host is little endian + host_endian = "little" + swapped_endian = "big" + else: + # Sorry PDP or other weird systems that don't use BE or LE. + host_endian = "big" + swapped_endian = "little" + if reader.byte_order == "S": + file_endian = swapped_endian + else: + file_endian = host_endian + order = host_endian if args.order == "native" else args.order + logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian") + if file_endian == order: + logger.info(f"* File is already {order.upper()} endian. Nothing to do.") + sys.exit(0) + logger.info("* Checking tensors for conversion compatibility") + for tensor in reader.tensors: + if tensor.tensor_type not in ( + gguf.GGMLQuantizationType.F32, + gguf.GGMLQuantizationType.F16, + gguf.GGMLQuantizationType.Q8_0, + ): + raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}") + logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}") + if args.dry_run: + return + logger.warning("*** Warning *** Warning *** Warning **") + logger.warning("* This conversion process may damage the file. Ensure you have a backup.") + if order != host_endian: + logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.") + logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted") + logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:") + response = input("YES, I am sure> ") + if response != "YES": + logger.warning("You didn't enter YES. Okay then, see ya!") + sys.exit(0) + logger.info(f"* Converting fields ({len(reader.fields)})") + for idx, field in enumerate(reader.fields.values()): + logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}") + for part in field.parts: + part.byteswap(inplace=True) + logger.info(f"* Converting tensors ({len(reader.tensors)})") + + for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")): + log_message = ( + f"Converting tensor {repr(tensor.name)}, " + f"type={tensor.tensor_type.name}, " + f"elements={tensor.n_elements} " + ) + + # Byte-swap each part of the tensor's field + for part in tensor.field.parts: + part.byteswap(inplace=True) + + # Byte-swap tensor data if necessary + if tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0: + # Handle Q8_0 tensor blocks (block_q8_0) + # Specific handling of block_q8_0 is required. + # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations. + + block_size = 34 # 34 bytes = + 32 * + + n_blocks = len(tensor.data) // block_size + for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)): + block_offs = block_num * block_size + + # Byte-Swap f16 sized delta field + delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16) + delta.byteswap(inplace=True) + + # Byte-Swap Q8 weights + if block_num % 100000 == 0: + inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]") + + else: + # Handle other tensor types + tensor.data.byteswap(inplace=True) + + pbar.set_description(log_message) + + logger.info("* Completion") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Convert GGUF file byte order") + parser.add_argument( + "model", type=str, + help="GGUF format model filename", + ) + parser.add_argument( + "order", type=str, choices=['big', 'little', 'native'], + help="Requested byte order", + ) + parser.add_argument( + "--dry-run", action="store_true", + help="Don't actually change anything", + ) + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") + + args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) + + logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) + + logger.info(f'* Loading: {args.model}') + reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+') + convert_byteorder(reader, args) + + +if __name__ == "__main__": + main() diff --git a/gguf-py/gguf/scripts/gguf_dump.py b/gguf-py/gguf/scripts/gguf_dump.py new file mode 100755 index 00000000..1b654654 --- /dev/null +++ b/gguf-py/gguf/scripts/gguf_dump.py @@ -0,0 +1,454 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import logging +import argparse +import os +import re +import sys +from pathlib import Path +from typing import Any + +import numpy as np + +# Necessary to load the local gguf package +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent)) + +from gguf import GGUFReader, GGUFValueType, ReaderTensor # noqa: E402 + +logger = logging.getLogger("gguf-dump") + + +def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]: + host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG' + if reader.byte_order == 'S': + file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE' + else: + file_endian = host_endian + return (host_endian, file_endian) + + +# For more information about what field.parts and field.data represent, +# please see the comments in the modify_gguf.py example. +def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: + host_endian, file_endian = get_file_host_endian(reader) + print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') # noqa: NP100 + print(f'* Dumping {len(reader.fields)} key/value pair(s)') # noqa: NP100 + for n, field in enumerate(reader.fields.values(), 1): + if not field.types: + pretty_type = 'N/A' + elif field.types[0] == GGUFValueType.ARRAY: + nest_count = len(field.types) - 1 + pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count + else: + pretty_type = str(field.types[-1].name) + + log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}' + if len(field.types) == 1: + curr_type = field.types[0] + if curr_type == GGUFValueType.STRING: + log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60])) + elif field.types[0] in reader.gguf_scalar_to_np: + log_message += ' = {0}'.format(field.parts[-1][0]) + print(log_message) # noqa: NP100 + if args.no_tensors: + return + print(f'* Dumping {len(reader.tensors)} tensor(s)') # noqa: NP100 + for n, tensor in enumerate(reader.tensors, 1): + prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape))) + print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') # noqa: NP100 + + +def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None: + import json + host_endian, file_endian = get_file_host_endian(reader) + metadata: dict[str, Any] = {} + tensors: dict[str, Any] = {} + result = { + "filename": args.model, + "endian": file_endian, + "metadata": metadata, + "tensors": tensors, + } + for idx, field in enumerate(reader.fields.values()): + curr: dict[str, Any] = { + "index": idx, + "type": field.types[0].name if field.types else 'UNKNOWN', + "offset": field.offset, + } + metadata[field.name] = curr + if field.types[:1] == [GGUFValueType.ARRAY]: + curr["array_types"] = [t.name for t in field.types][1:] + if not args.json_array: + continue + itype = field.types[-1] + if itype == GGUFValueType.STRING: + curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data] + else: + curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()] + elif field.types[0] == GGUFValueType.STRING: + curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8") + else: + curr["value"] = field.parts[-1].tolist()[0] + if not args.no_tensors: + for idx, tensor in enumerate(reader.tensors): + tensors[tensor.name] = { + "index": idx, + "shape": tensor.shape.tolist(), + "type": tensor.tensor_type.name, + "offset": tensor.field.offset, + } + json.dump(result, sys.stdout) + + +def markdown_table_with_alignment_support(header_map: list[dict[str, str]], data: list[dict[str, Any]]): + # JSON to Markdown table formatting: https://stackoverflow.com/a/72983854/2850957 + + # Alignment Utility Function + def strAlign(padding: int, alignMode: str | None, strVal: str): + if alignMode == 'center': + return strVal.center(padding) + elif alignMode == 'right': + return strVal.rjust(padding - 1) + ' ' + elif alignMode == 'left': + return ' ' + strVal.ljust(padding - 1) + else: # default left + return ' ' + strVal.ljust(padding - 1) + + def dashAlign(padding: int, alignMode: str | None): + if alignMode == 'center': + return ':' + '-' * (padding - 2) + ':' + elif alignMode == 'right': + return '-' * (padding - 1) + ':' + elif alignMode == 'left': + return ':' + '-' * (padding - 1) + else: # default left + return '-' * (padding) + + # Calculate Padding For Each Column Based On Header and Data Length + rowsPadding = {} + for index, columnEntry in enumerate(header_map): + padCount = max([len(str(v)) for d in data for k, v in d.items() if k == columnEntry['key_name']], default=0) + 2 + headerPadCount = len(columnEntry['header_name']) + 2 + rowsPadding[index] = headerPadCount if padCount <= headerPadCount else padCount + + # Render Markdown Header + rows = [] + rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(columnEntry['header_name'])) for index, columnEntry in enumerate(header_map))) + rows.append('|'.join(dashAlign(rowsPadding[index], columnEntry.get('align')) for index, columnEntry in enumerate(header_map))) + + # Render Tabular Data + for item in data: + rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(item[columnEntry['key_name']])) for index, columnEntry in enumerate(header_map))) + + # Convert Tabular String Rows Into String + tableString = "" + for row in rows: + tableString += f'|{row}|\n' + + return tableString + + +def element_count_rounded_notation(count: int) -> str: + if count > 1e15 : + # Quadrillion + scaled_amount = count * 1e-15 + scale_suffix = "Q" + elif count > 1e12 : + # Trillions + scaled_amount = count * 1e-12 + scale_suffix = "T" + elif count > 1e9 : + # Billions + scaled_amount = count * 1e-9 + scale_suffix = "B" + elif count > 1e6 : + # Millions + scaled_amount = count * 1e-6 + scale_suffix = "M" + elif count > 1e3 : + # Thousands + scaled_amount = count * 1e-3 + scale_suffix = "K" + else: + # Under Thousands + scaled_amount = count + scale_suffix = "" + return f"{'~' if count > 1e3 else ''}{round(scaled_amount)}{scale_suffix}" + + +def translate_tensor_name(name): + words = name.split(".") + + # Source: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#standardized-tensor-names + abbreviation_dictionary = { + 'token_embd': 'Token embedding', + 'pos_embd': 'Position embedding', + 'output_norm': 'Output normalization', + 'output': 'Output', + 'attn_norm': 'Attention normalization', + 'attn_norm_2': 'Attention normalization', + 'attn_qkv': 'Attention query-key-value', + 'attn_q': 'Attention query', + 'attn_k': 'Attention key', + 'attn_v': 'Attention value', + 'attn_output': 'Attention output', + 'ffn_norm': 'Feed-forward network normalization', + 'ffn_up': 'Feed-forward network "up"', + 'ffn_gate': 'Feed-forward network "gate"', + 'ffn_down': 'Feed-forward network "down"', + 'ffn_gate_inp': 'Expert-routing layer for the Feed-forward network in Mixture of Expert models', + 'ffn_gate_exp': 'Feed-forward network "gate" layer per expert in Mixture of Expert models', + 'ffn_down_exp': 'Feed-forward network "down" layer per expert in Mixture of Expert models', + 'ffn_up_exp': 'Feed-forward network "up" layer per expert in Mixture of Expert models', + 'ssm_in': 'State space model input projections', + 'ssm_conv1d': 'State space model rolling/shift', + 'ssm_x': 'State space model selective parametrization', + 'ssm_a': 'State space model state compression', + 'ssm_d': 'State space model skip connection', + 'ssm_dt': 'State space model time step', + 'ssm_out': 'State space model output projection', + 'blk': 'Block', + 'enc': 'Encoder', + 'dec': 'Decoder', + } + + expanded_words = [] + for word in words: + word_norm = word.strip().lower() + if word_norm in abbreviation_dictionary: + expanded_words.append(abbreviation_dictionary[word_norm].title()) + else: + expanded_words.append(word.title()) + + return ' '.join(expanded_words) + + +def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: + host_endian, file_endian = get_file_host_endian(reader) + markdown_content = "" + markdown_content += f'# {args.model} - GGUF Internal File Dump\n\n' + markdown_content += f'- Endian: {file_endian} endian\n' + markdown_content += '\n' + markdown_content += '## Key Value Metadata Store\n\n' + markdown_content += f'There are {len(reader.fields)} key-value pairs in this file\n' + markdown_content += '\n' + + kv_dump_table: list[dict[str, str | int]] = [] + for n, field in enumerate(reader.fields.values(), 1): + if not field.types: + pretty_type = 'N/A' + elif field.types[0] == GGUFValueType.ARRAY: + nest_count = len(field.types) - 1 + pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count + else: + pretty_type = str(field.types[-1].name) + + def escape_markdown_inline_code(value_string): + # Find the longest contiguous sequence of backticks in the string then + # wrap string with appropriate number of backticks required to escape it + max_backticks = max((len(match.group(0)) for match in re.finditer(r'`+', value_string)), default=0) + inline_code_marker = '`' * (max_backticks + 1) + + # If the string starts or ends with a backtick, add a space at the beginning and end + if value_string.startswith('`') or value_string.endswith('`'): + value_string = f" {value_string} " + + return f"{inline_code_marker}{value_string}{inline_code_marker}" + + total_elements = len(field.data) + value = "" + if len(field.types) == 1: + curr_type = field.types[0] + if curr_type == GGUFValueType.STRING: + truncate_length = 60 + value_string = str(bytes(field.parts[-1]), encoding='utf-8') + if len(value_string) > truncate_length: + head = escape_markdown_inline_code(value_string[:truncate_length // 2]) + tail = escape_markdown_inline_code(value_string[-truncate_length // 2:]) + value = "{head}...{tail}".format(head=head, tail=tail) + else: + value = escape_markdown_inline_code(value_string) + elif curr_type in reader.gguf_scalar_to_np: + value = str(field.parts[-1][0]) + else: + if field.types[0] == GGUFValueType.ARRAY: + curr_type = field.types[1] + array_elements = [] + + if curr_type == GGUFValueType.STRING: + render_element = min(5, total_elements) + for element_pos in range(render_element): + truncate_length = 30 + value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8') + if len(value_string) > truncate_length: + head = escape_markdown_inline_code(value_string[:truncate_length // 2]) + tail = escape_markdown_inline_code(value_string[-truncate_length // 2:]) + value = "{head}...{tail}".format(head=head, tail=tail) + else: + value = escape_markdown_inline_code(value_string) + array_elements.append(value) + + elif curr_type in reader.gguf_scalar_to_np: + render_element = min(7, total_elements) + for element_pos in range(render_element): + array_elements.append(str(field.parts[-1 - (total_elements - element_pos - 1)][0])) + + value = f'[ {", ".join(array_elements).strip()}{", ..." if total_elements > len(array_elements) else ""} ]' + + kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value}) + + kv_dump_table_header_map = [ + {'key_name':'n', 'header_name':'POS', 'align':'right'}, + {'key_name':'pretty_type', 'header_name':'TYPE', 'align':'left'}, + {'key_name':'total_elements', 'header_name':'Count', 'align':'right'}, + {'key_name':'field_name', 'header_name':'Key', 'align':'left'}, + {'key_name':'value', 'header_name':'Value', 'align':'left'}, + ] + + markdown_content += markdown_table_with_alignment_support(kv_dump_table_header_map, kv_dump_table) + + markdown_content += "\n" + + if not args.no_tensors: + # Group tensors by their prefix and maintain order + tensor_prefix_order: list[str] = [] + tensor_name_to_key: dict[str, int] = {} + tensor_groups: dict[str, list[ReaderTensor]] = {} + total_elements = sum(tensor.n_elements for tensor in reader.tensors) + + # Parsing Tensors Record + for key, tensor in enumerate(reader.tensors): + tensor_components = tensor.name.split('.') + + # Classify Tensor Group + tensor_group_name = "base" + if tensor_components[0] == 'blk': + tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}" + elif tensor_components[0] in ['enc', 'dec'] and tensor_components[1] == 'blk': + tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}.{tensor_components[2]}" + elif tensor_components[0] in ['enc', 'dec']: + tensor_group_name = f"{tensor_components[0]}" + + # Check if new Tensor Group + if tensor_group_name not in tensor_groups: + tensor_groups[tensor_group_name] = [] + tensor_prefix_order.append(tensor_group_name) + + # Record Tensor and Tensor Position + tensor_groups[tensor_group_name].append(tensor) + tensor_name_to_key[tensor.name] = key + + # Tensors Mapping Dump + markdown_content += f'## Tensors Overview {element_count_rounded_notation(total_elements)} Elements\n\n' + markdown_content += f'Total number of elements in all tensors: {total_elements} Elements\n' + markdown_content += '\n' + + for group in tensor_prefix_order: + tensors = tensor_groups[group] + group_elements = sum(tensor.n_elements for tensor in tensors) + markdown_content += f"- [{translate_tensor_name(group)} Tensor Group - {element_count_rounded_notation(group_elements)} Elements](#{group.replace('.', '_')})\n" + + markdown_content += "\n" + + markdown_content += "### Tensor Data Offset\n" + markdown_content += '\n' + markdown_content += 'This table contains the offset and data segment relative to start of file\n' + markdown_content += '\n' + + tensor_mapping_table: list[dict[str, str | int]] = [] + for key, tensor in enumerate(reader.tensors): + data_offset_pretty = '{0:#16x}'.format(tensor.data_offset) + data_size_pretty = '{0:#16x}'.format(tensor.n_bytes) + tensor_mapping_table.append({"t_id":key, "layer_name":tensor.name, "data_offset":data_offset_pretty, "data_size":data_size_pretty}) + + tensors_mapping_table_header_map = [ + {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'}, + {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'}, + {'key_name':'data_offset', 'header_name':'Data Offset (B)', 'align':'right'}, + {'key_name':'data_size', 'header_name':'Data Size (B)', 'align':'right'}, + ] + + markdown_content += markdown_table_with_alignment_support(tensors_mapping_table_header_map, tensor_mapping_table) + markdown_content += "\n" + + for group in tensor_prefix_order: + tensors = tensor_groups[group] + group_elements = sum(tensor.n_elements for tensor in tensors) + group_percentage = group_elements / total_elements * 100 + markdown_content += f"### {translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements\n\n" + + # Precalculate column sizing for visual consistency + prettify_element_est_count_size: int = 1 + prettify_element_count_size: int = 1 + prettify_dimension_max_widths: dict[int, int] = {} + for tensor in tensors: + prettify_element_est_count_size = max(prettify_element_est_count_size, len(str(element_count_rounded_notation(tensor.n_elements)))) + prettify_element_count_size = max(prettify_element_count_size, len(str(tensor.n_elements))) + for i, dimension_size in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))): + prettify_dimension_max_widths[i] = max(prettify_dimension_max_widths.get(i,1), len(str(dimension_size))) + + # Generate Tensor Layer Table Content + tensor_dump_table: list[dict[str, str | int]] = [] + for tensor in tensors: + human_friendly_name = translate_tensor_name(tensor.name.replace(".weight", ".(W)").replace(".bias", ".(B)")) + pretty_dimension = ' x '.join(f'{str(d):>{prettify_dimension_max_widths[i]}}' for i, d in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape)))) + element_count_est = f"({element_count_rounded_notation(tensor.n_elements):>{prettify_element_est_count_size}})" + element_count_string = f"{element_count_est} {tensor.n_elements:>{prettify_element_count_size}}" + type_name_string = f"{tensor.tensor_type.name}" + tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dimension":pretty_dimension, "tensor_type":type_name_string}) + + tensor_dump_table_header_map = [ + {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'}, + {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'}, + {'key_name':'human_layer_name', 'header_name':'Human Friendly Tensor Layer Name', 'align':'left'}, + {'key_name':'element_count', 'header_name':'Elements', 'align':'left'}, + {'key_name':'pretty_dimension', 'header_name':'Shape', 'align':'left'}, + {'key_name':'tensor_type', 'header_name':'Type', 'align':'left'}, + ] + + markdown_content += markdown_table_with_alignment_support(tensor_dump_table_header_map, tensor_dump_table) + + markdown_content += "\n" + markdown_content += f"- Total elements in {group}: ({element_count_rounded_notation(group_elements):>4}) {group_elements}\n" + markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n" + markdown_content += "\n\n" + + print(markdown_content) # noqa: NP100 + + +def main() -> None: + parser = argparse.ArgumentParser(description="Dump GGUF file metadata") + parser.add_argument("model", type=str, help="GGUF format model filename") + parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata") + parser.add_argument("--json", action="store_true", help="Produce JSON output") + parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)") + parser.add_argument("--data-offset", action="store_true", help="Start of data offset") + parser.add_argument("--data-alignment", action="store_true", help="Data alignment applied globally to data field") + parser.add_argument("--markdown", action="store_true", help="Produce markdown output") + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") + + args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) + + logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) + + if not args.json and not args.markdown and not args.data_offset and not args.data_alignment: + logger.info(f'* Loading: {args.model}') + + reader = GGUFReader(args.model, 'r') + + if args.json: + dump_metadata_json(reader, args) + elif args.markdown: + dump_markdown_metadata(reader, args) + elif args.data_offset: + print(reader.data_offset) # noqa: NP100 + elif args.data_alignment: + print(reader.alignment) # noqa: NP100 + else: + dump_metadata(reader, args) + + +if __name__ == '__main__': + main() diff --git a/gguf-py/gguf/scripts/gguf_hash.py b/gguf-py/gguf/scripts/gguf_hash.py new file mode 100755 index 00000000..ee34d09b --- /dev/null +++ b/gguf-py/gguf/scripts/gguf_hash.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import uuid +import hashlib + +import logging +import argparse +import os +import sys +from pathlib import Path + +from tqdm import tqdm + +# Necessary to load the local gguf package +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent)) + +from gguf import GGUFReader # noqa: E402 + + +logger = logging.getLogger("gguf-hash") + +# UUID_NAMESPACE_LLAMA_CPP = uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp') +UUID_NAMESPACE_LLAMA_CPP = uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5') + + +# For more information about what field.parts and field.data represent, +# please see the comments in the modify_gguf.py example. +def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar: bool, no_layer: bool) -> None: + sha1 = hashlib.sha1() + sha256 = hashlib.sha256() + uuidv5_sha1 = hashlib.sha1() + uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes) + + # Total Weight Calculation For Progress Bar + total_weights = 0 + for n, tensor in enumerate(reader.tensors, 1): + + # We don't need these + if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): + continue + + # Calculate Tensor Volume + sum_weights_in_tensor = 1 + for dim in tensor.shape: + sum_weights_in_tensor *= dim + total_weights += sum_weights_in_tensor + + # Hash Progress Bar + bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar) + + # Hashing Process + for tensor in reader.tensors: + + # We don't need these + if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): + continue + + # Progressbar + sum_weights_in_tensor = 1 + for dim in tensor.shape: + sum_weights_in_tensor *= dim + bar.update(sum_weights_in_tensor) + + if not no_layer: + + sha1_layer = hashlib.sha1() + sha1_layer.update(tensor.data.data) + print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100 + + sha256_layer = hashlib.sha256() + sha256_layer.update(tensor.data.data) + print("sha256 {0} {1}:{2}".format(sha256_layer.hexdigest(), filename, tensor.name)) # noqa: NP100 + + sha1.update(tensor.data.data) + sha256.update(tensor.data.data) + uuidv5_sha1.update(tensor.data.data) + + # Flush Hash Progress Bar + bar.close() + + # Display Hash Output + print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100 + print("sha256 {0} {1}".format(sha256.hexdigest(), filename)) # noqa: NP100 + print("uuid {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100 + + +def main() -> None: + parser = argparse.ArgumentParser(description="Dump GGUF file metadata") + parser.add_argument("model", type=str, help="GGUF format model filename") + parser.add_argument("--no-layer", action="store_true", help="exclude per layer hash") + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") + parser.add_argument("--progressbar", action="store_true", help="enable progressbar") + args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) + logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) + reader = GGUFReader(args.model, 'r') + gguf_hash(reader, args.model, not args.progressbar, args.no_layer) + + +if __name__ == '__main__': + main() diff --git a/gguf-py/gguf/scripts/gguf_new_metadata.py b/gguf-py/gguf/scripts/gguf_new_metadata.py new file mode 100755 index 00000000..fce52a8c --- /dev/null +++ b/gguf-py/gguf/scripts/gguf_new_metadata.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import logging +import argparse +import os +import sys +import json +from pathlib import Path + +import numpy as np +from tqdm import tqdm +from typing import Any, Sequence, NamedTuple + +# Necessary to load the local gguf package +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent)) + +import gguf + +logger = logging.getLogger("gguf-new-metadata") + + +class MetadataDetails(NamedTuple): + type: gguf.GGUFValueType + value: Any + description: str = '' + + +def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian: + if np.uint32(1) == np.uint32(1).newbyteorder("<"): + # Host is little endian + host_endian = gguf.GGUFEndian.LITTLE + swapped_endian = gguf.GGUFEndian.BIG + else: + # Sorry PDP or other weird systems that don't use BE or LE. + host_endian = gguf.GGUFEndian.BIG + swapped_endian = gguf.GGUFEndian.LITTLE + + if reader.byte_order == "S": + return swapped_endian + else: + return host_endian + + +def decode_field(field: gguf.ReaderField | None) -> Any: + if field and field.types: + main_type = field.types[0] + + if main_type == gguf.GGUFValueType.ARRAY: + sub_type = field.types[-1] + + if sub_type == gguf.GGUFValueType.STRING: + return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data] + else: + return [pv for idx in field.data for pv in field.parts[idx].tolist()] + if main_type == gguf.GGUFValueType.STRING: + return str(bytes(field.parts[-1]), encoding='utf-8') + else: + return field.parts[-1][0] + + return None + + +def get_field_data(reader: gguf.GGUFReader, key: str) -> Any: + field = reader.get_field(key) + + return decode_field(field) + + +def find_token(token_list: Sequence[int], token: str) -> Sequence[int]: + token_ids = [index for index, value in enumerate(token_list) if value == token] + + if len(token_ids) == 0: + raise LookupError(f'Unable to find "{token}" in token list!') + + return token_ids + + +def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: dict[str, MetadataDetails], remove_metadata: Sequence[str]) -> None: + for field in reader.fields.values(): + # Suppress virtual fields and fields written by GGUFWriter + if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'): + logger.debug(f'Suppressing {field.name}') + continue + + # Skip old chat templates if we have new ones + if field.name.startswith(gguf.Keys.Tokenizer.CHAT_TEMPLATE) and gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata: + logger.debug(f'Skipping {field.name}') + continue + + if field.name in remove_metadata: + logger.debug(f'Removing {field.name}') + continue + + old_val = MetadataDetails(field.types[0], decode_field(field)) + val = new_metadata.get(field.name, old_val) + + if field.name in new_metadata: + logger.debug(f'Modifying {field.name}: "{old_val.value}" -> "{val.value}" {val.description}') + del new_metadata[field.name] + elif val.value is not None: + logger.debug(f'Copying {field.name}') + + if val.value is not None: + writer.add_key_value(field.name, val.value, val.type) + + if gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata: + logger.debug('Adding chat template(s)') + writer.add_chat_template(new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE].value) + del new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] + + for key, val in new_metadata.items(): + logger.debug(f'Adding {key}: "{val.value}" {val.description}') + writer.add_key_value(key, val.value, val.type) + + total_bytes = 0 + + for tensor in reader.tensors: + total_bytes += tensor.n_bytes + writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type) + + bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True) + + writer.write_header_to_file() + writer.write_kv_data_to_file() + writer.write_ti_data_to_file() + + for tensor in reader.tensors: + writer.write_tensor_data(tensor.data) + bar.update(tensor.n_bytes) + + writer.close() + + +def main() -> None: + tokenizer_metadata = (getattr(gguf.Keys.Tokenizer, n) for n in gguf.Keys.Tokenizer.__dict__.keys() if not n.startswith('_')) + token_names = dict((n.split('.')[-1][:-len('_token_id')], n) for n in tokenizer_metadata if n.endswith('_token_id')) + + parser = argparse.ArgumentParser(description="Make a copy of a GGUF file with new metadata") + parser.add_argument("input", type=Path, help="GGUF format model input filename") + parser.add_argument("output", type=Path, help="GGUF format model output filename") + parser.add_argument("--general-name", type=str, help="The models general.name", metavar='"name"') + parser.add_argument("--general-description", type=str, help="The models general.description", metavar='"Description ..."') + parser.add_argument("--chat-template", type=str, help="Chat template string (or JSON string containing templates)", metavar='"{% ... %} ..."') + parser.add_argument("--chat-template-config", type=Path, help="Config file containing chat template(s)", metavar='tokenizer_config.json') + parser.add_argument("--pre-tokenizer", type=str, help="The models tokenizer.ggml.pre", metavar='"pre tokenizer"') + parser.add_argument("--remove-metadata", action="append", type=str, help="Remove metadata (by key name) from output model", metavar='general.url') + parser.add_argument("--special-token", action="append", type=str, help="Special token by value", nargs=2, metavar=(' | '.join(token_names.keys()), '""')) + parser.add_argument("--special-token-by-id", action="append", type=str, help="Special token by id", nargs=2, metavar=(' | '.join(token_names.keys()), '0')) + parser.add_argument("--force", action="store_true", help="Bypass warnings without confirmation") + parser.add_argument("--verbose", action="store_true", help="Increase output verbosity") + args = parser.parse_args(None if len(sys.argv) > 2 else ["--help"]) + + logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) + + new_metadata = {} + remove_metadata = args.remove_metadata or [] + + if args.general_name: + new_metadata[gguf.Keys.General.NAME] = MetadataDetails(gguf.GGUFValueType.STRING, args.general_name) + + if args.general_description: + new_metadata[gguf.Keys.General.DESCRIPTION] = MetadataDetails(gguf.GGUFValueType.STRING, args.general_description) + + if args.chat_template: + new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, json.loads(args.chat_template) if args.chat_template.startswith('[') else args.chat_template) + + if args.chat_template_config: + with open(args.chat_template_config, 'r') as fp: + config = json.load(fp) + template = config.get('chat_template') + if template: + new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template) + + if args.pre_tokenizer: + new_metadata[gguf.Keys.Tokenizer.PRE] = MetadataDetails(gguf.GGUFValueType.STRING, args.pre_tokenizer) + + if remove_metadata: + logger.warning('*** Warning *** Warning *** Warning **') + logger.warning('* Most metadata is required for a fully functional GGUF file,') + logger.warning('* removing crucial metadata may result in a corrupt output file!') + + if not args.force: + logger.warning('* Enter exactly YES if you are positive you want to proceed:') + response = input('YES, I am sure> ') + if response != 'YES': + logger.info("You didn't enter YES. Okay then, see ya!") + sys.exit(0) + + logger.info(f'* Loading: {args.input}') + reader = gguf.GGUFReader(args.input, 'r') + + arch = get_field_data(reader, gguf.Keys.General.ARCHITECTURE) + endianess = get_byteorder(reader) + + token_list = get_field_data(reader, gguf.Keys.Tokenizer.LIST) or [] + + for name, token in args.special_token or []: + if name not in token_names: + logger.warning(f'Unknown special token "{name}", ignoring...') + else: + ids = find_token(token_list, token) + new_metadata[token_names[name]] = MetadataDetails(gguf.GGUFValueType.UINT32, ids[0], f'= {token}') + + if len(ids) > 1: + logger.warning(f'Multiple "{token}" tokens found, choosing ID {ids[0]}, use --special-token-by-id if you want another:') + logger.warning(', '.join(str(i) for i in ids)) + + for name, id_string in args.special_token_by_id or []: + if name not in token_names: + logger.warning(f'Unknown special token "{name}", ignoring...') + elif not id_string.isdecimal(): + raise LookupError(f'Token ID "{id_string}" is not a valid ID!') + else: + id_int = int(id_string) + + if id_int >= 0 and id_int < len(token_list): + new_metadata[token_names[name]] = MetadataDetails(gguf.GGUFValueType.UINT32, id_int, f'= {token_list[id_int]}') + else: + raise LookupError(f'Token ID {id_int} is not within token list!') + + if os.path.isfile(args.output) and not args.force: + logger.warning('*** Warning *** Warning *** Warning **') + logger.warning(f'* The "{args.output}" GGUF file already exists, it will be overwritten!') + logger.warning('* Enter exactly YES if you are positive you want to proceed:') + response = input('YES, I am sure> ') + if response != 'YES': + logger.info("You didn't enter YES. Okay then, see ya!") + sys.exit(0) + + logger.info(f'* Writing: {args.output}') + writer = gguf.GGUFWriter(args.output, arch=arch, endianess=endianess) + + alignment = get_field_data(reader, gguf.Keys.General.ALIGNMENT) + if alignment is not None: + logger.debug(f'Setting custom alignment: {alignment}') + writer.data_alignment = alignment + + copy_with_new_metadata(reader, writer, new_metadata, remove_metadata) + + +if __name__ == '__main__': + main() diff --git a/gguf-py/gguf/scripts/gguf_set_metadata.py b/gguf-py/gguf/scripts/gguf_set_metadata.py new file mode 100755 index 00000000..e35b651b --- /dev/null +++ b/gguf-py/gguf/scripts/gguf_set_metadata.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +import logging +import argparse +import os +import sys +from pathlib import Path + +# Necessary to load the local gguf package +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent)) + +from gguf import GGUFReader # noqa: E402 + +logger = logging.getLogger("gguf-set-metadata") + + +def minimal_example(filename: str) -> None: + reader = GGUFReader(filename, 'r+') + field = reader.fields['tokenizer.ggml.bos_token_id'] + if field is None: + return + part_index = field.data[0] + field.parts[part_index][0] = 2 # Set tokenizer.ggml.bos_token_id to 2 + # + # So what's this field.data thing? It's helpful because field.parts contains + # _every_ part of the GGUF field. For example, tokenizer.ggml.bos_token_id consists + # of: + # + # Part index 0: Key length (27) + # Part index 1: Key data ("tokenizer.ggml.bos_token_id") + # Part index 2: Field type (4, the id for GGUFValueType.UINT32) + # Part index 3: Field value + # + # Note also that each part is an NDArray slice, so even a part that + # is only a single value like the key length will be a NDArray of + # the key length type (numpy.uint32). + # + # The .data attribute in the Field is a list of relevant part indexes + # and doesn't contain internal GGUF details like the key length part. + # In this case, .data will be [3] - just the part index of the + # field value itself. + + +def set_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: + field = reader.get_field(args.key) + if field is None: + logger.error(f'! Field {repr(args.key)} not found') + sys.exit(1) + # Note that field.types is a list of types. This is because the GGUF + # format supports arrays. For example, an array of UINT32 would + # look like [GGUFValueType.ARRAY, GGUFValueType.UINT32] + handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None + if handler is None: + logger.error(f'! This tool only supports changing simple values, {repr(args.key)} has unsupported type {field.types}') + sys.exit(1) + current_value = field.parts[field.data[0]][0] + new_value = handler(args.value) + logger.info(f'* Preparing to change field {repr(args.key)} from {current_value} to {new_value}') + if current_value == new_value: + logger.info(f'- Key {repr(args.key)} already set to requested value {current_value}') + sys.exit(0) + if args.dry_run: + sys.exit(0) + if not args.force: + logger.warning('*** Warning *** Warning *** Warning **') + logger.warning('* Changing fields in a GGUF file can make it unusable. Proceed at your own risk.') + logger.warning('* Enter exactly YES if you are positive you want to proceed:') + response = input('YES, I am sure> ') + if response != 'YES': + logger.info("You didn't enter YES. Okay then, see ya!") + sys.exit(0) + field.parts[field.data[0]][0] = new_value + logger.info('* Field changed. Successful completion.') + + +def main() -> None: + parser = argparse.ArgumentParser(description="Set a simple value in GGUF file metadata") + parser.add_argument("model", type=str, help="GGUF format model filename") + parser.add_argument("key", type=str, help="Metadata key to set") + parser.add_argument("value", type=str, help="Metadata value to set") + parser.add_argument("--dry-run", action="store_true", help="Don't actually change anything") + parser.add_argument("--force", action="store_true", help="Change the field without confirmation") + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") + + args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) + + logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) + + logger.info(f'* Loading: {args.model}') + reader = GGUFReader(args.model, 'r' if args.dry_run else 'r+') + set_metadata(reader, args) + + +if __name__ == '__main__': + main() diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml index 9c395625..92d7f22e 100644 --- a/gguf-py/pyproject.toml +++ b/gguf-py/pyproject.toml @@ -1,12 +1,11 @@ [tool.poetry] name = "gguf" -version = "0.13.0" +version = "0.14.0" description = "Read and write ML models in GGUF for GGML" authors = ["GGML "] packages = [ {include = "gguf"}, {include = "gguf/py.typed"}, - {include = "scripts"}, ] readme = "README.md" homepage = "https://ggml.ai" @@ -33,7 +32,7 @@ requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] -gguf-convert-endian = "scripts:gguf_convert_endian_entrypoint" -gguf-dump = "scripts:gguf_dump_entrypoint" -gguf-set-metadata = "scripts:gguf_set_metadata_entrypoint" -gguf-new-metadata = "scripts:gguf_new_metadata_entrypoint" +gguf-convert-endian = "gguf.scripts:gguf_convert_endian_entrypoint" +gguf-dump = "gguf.scripts:gguf_dump_entrypoint" +gguf-set-metadata = "gguf.scripts:gguf_set_metadata_entrypoint" +gguf-new-metadata = "gguf.scripts:gguf_new_metadata_entrypoint" diff --git a/gguf-py/scripts/__init__.py b/gguf-py/scripts/__init__.py deleted file mode 100644 index e77f2e9c..00000000 --- a/gguf-py/scripts/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# pyright: reportUnusedImport=false - -from .gguf_convert_endian import main as gguf_convert_endian_entrypoint -from .gguf_dump import main as gguf_dump_entrypoint -from .gguf_set_metadata import main as gguf_set_metadata_entrypoint -from .gguf_new_metadata import main as gguf_new_metadata_entrypoint diff --git a/gguf-py/scripts/gguf_convert_endian.py b/gguf-py/scripts/gguf_convert_endian.py deleted file mode 100755 index b698af0f..00000000 --- a/gguf-py/scripts/gguf_convert_endian.py +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations - -import logging -import argparse -import os -import sys -from tqdm import tqdm -from pathlib import Path - -import numpy as np - -# Necessary to load the local gguf package -if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): - sys.path.insert(0, str(Path(__file__).parent.parent)) - -import gguf - -logger = logging.getLogger("gguf-convert-endian") - - -def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None: - if np.uint32(1) == np.uint32(1).newbyteorder("<"): - # Host is little endian - host_endian = "little" - swapped_endian = "big" - else: - # Sorry PDP or other weird systems that don't use BE or LE. - host_endian = "big" - swapped_endian = "little" - if reader.byte_order == "S": - file_endian = swapped_endian - else: - file_endian = host_endian - order = host_endian if args.order == "native" else args.order - logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian") - if file_endian == order: - logger.info(f"* File is already {order.upper()} endian. Nothing to do.") - sys.exit(0) - logger.info("* Checking tensors for conversion compatibility") - for tensor in reader.tensors: - if tensor.tensor_type not in ( - gguf.GGMLQuantizationType.F32, - gguf.GGMLQuantizationType.F16, - gguf.GGMLQuantizationType.Q8_0, - ): - raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}") - logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}") - if args.dry_run: - return - logger.warning("*** Warning *** Warning *** Warning **") - logger.warning("* This conversion process may damage the file. Ensure you have a backup.") - if order != host_endian: - logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.") - logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted") - logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:") - response = input("YES, I am sure> ") - if response != "YES": - logger.warning("You didn't enter YES. Okay then, see ya!") - sys.exit(0) - logger.info(f"* Converting fields ({len(reader.fields)})") - for idx, field in enumerate(reader.fields.values()): - logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}") - for part in field.parts: - part.byteswap(inplace=True) - logger.info(f"* Converting tensors ({len(reader.tensors)})") - - for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")): - log_message = ( - f"Converting tensor {repr(tensor.name)}, " - f"type={tensor.tensor_type.name}, " - f"elements={tensor.n_elements} " - ) - - # Byte-swap each part of the tensor's field - for part in tensor.field.parts: - part.byteswap(inplace=True) - - # Byte-swap tensor data if necessary - if tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0: - # Handle Q8_0 tensor blocks (block_q8_0) - # Specific handling of block_q8_0 is required. - # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations. - - block_size = 34 # 34 bytes = + 32 * - - n_blocks = len(tensor.data) // block_size - for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)): - block_offs = block_num * block_size - - # Byte-Swap f16 sized delta field - delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16) - delta.byteswap(inplace=True) - - # Byte-Swap Q8 weights - if block_num % 100000 == 0: - inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]") - - else: - # Handle other tensor types - tensor.data.byteswap(inplace=True) - - pbar.set_description(log_message) - - logger.info("* Completion") - - -def main() -> None: - parser = argparse.ArgumentParser(description="Convert GGUF file byte order") - parser.add_argument( - "model", type=str, - help="GGUF format model filename", - ) - parser.add_argument( - "order", type=str, choices=['big', 'little', 'native'], - help="Requested byte order", - ) - parser.add_argument( - "--dry-run", action="store_true", - help="Don't actually change anything", - ) - parser.add_argument("--verbose", action="store_true", help="increase output verbosity") - - args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) - - logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) - - logger.info(f'* Loading: {args.model}') - reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+') - convert_byteorder(reader, args) - - -if __name__ == "__main__": - main() diff --git a/gguf-py/scripts/gguf_dump.py b/gguf-py/scripts/gguf_dump.py deleted file mode 100755 index 1b654654..00000000 --- a/gguf-py/scripts/gguf_dump.py +++ /dev/null @@ -1,454 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations - -import logging -import argparse -import os -import re -import sys -from pathlib import Path -from typing import Any - -import numpy as np - -# Necessary to load the local gguf package -if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): - sys.path.insert(0, str(Path(__file__).parent.parent)) - -from gguf import GGUFReader, GGUFValueType, ReaderTensor # noqa: E402 - -logger = logging.getLogger("gguf-dump") - - -def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]: - host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG' - if reader.byte_order == 'S': - file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE' - else: - file_endian = host_endian - return (host_endian, file_endian) - - -# For more information about what field.parts and field.data represent, -# please see the comments in the modify_gguf.py example. -def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: - host_endian, file_endian = get_file_host_endian(reader) - print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') # noqa: NP100 - print(f'* Dumping {len(reader.fields)} key/value pair(s)') # noqa: NP100 - for n, field in enumerate(reader.fields.values(), 1): - if not field.types: - pretty_type = 'N/A' - elif field.types[0] == GGUFValueType.ARRAY: - nest_count = len(field.types) - 1 - pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count - else: - pretty_type = str(field.types[-1].name) - - log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}' - if len(field.types) == 1: - curr_type = field.types[0] - if curr_type == GGUFValueType.STRING: - log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60])) - elif field.types[0] in reader.gguf_scalar_to_np: - log_message += ' = {0}'.format(field.parts[-1][0]) - print(log_message) # noqa: NP100 - if args.no_tensors: - return - print(f'* Dumping {len(reader.tensors)} tensor(s)') # noqa: NP100 - for n, tensor in enumerate(reader.tensors, 1): - prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape))) - print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') # noqa: NP100 - - -def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None: - import json - host_endian, file_endian = get_file_host_endian(reader) - metadata: dict[str, Any] = {} - tensors: dict[str, Any] = {} - result = { - "filename": args.model, - "endian": file_endian, - "metadata": metadata, - "tensors": tensors, - } - for idx, field in enumerate(reader.fields.values()): - curr: dict[str, Any] = { - "index": idx, - "type": field.types[0].name if field.types else 'UNKNOWN', - "offset": field.offset, - } - metadata[field.name] = curr - if field.types[:1] == [GGUFValueType.ARRAY]: - curr["array_types"] = [t.name for t in field.types][1:] - if not args.json_array: - continue - itype = field.types[-1] - if itype == GGUFValueType.STRING: - curr["value"] = [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data] - else: - curr["value"] = [pv for idx in field.data for pv in field.parts[idx].tolist()] - elif field.types[0] == GGUFValueType.STRING: - curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8") - else: - curr["value"] = field.parts[-1].tolist()[0] - if not args.no_tensors: - for idx, tensor in enumerate(reader.tensors): - tensors[tensor.name] = { - "index": idx, - "shape": tensor.shape.tolist(), - "type": tensor.tensor_type.name, - "offset": tensor.field.offset, - } - json.dump(result, sys.stdout) - - -def markdown_table_with_alignment_support(header_map: list[dict[str, str]], data: list[dict[str, Any]]): - # JSON to Markdown table formatting: https://stackoverflow.com/a/72983854/2850957 - - # Alignment Utility Function - def strAlign(padding: int, alignMode: str | None, strVal: str): - if alignMode == 'center': - return strVal.center(padding) - elif alignMode == 'right': - return strVal.rjust(padding - 1) + ' ' - elif alignMode == 'left': - return ' ' + strVal.ljust(padding - 1) - else: # default left - return ' ' + strVal.ljust(padding - 1) - - def dashAlign(padding: int, alignMode: str | None): - if alignMode == 'center': - return ':' + '-' * (padding - 2) + ':' - elif alignMode == 'right': - return '-' * (padding - 1) + ':' - elif alignMode == 'left': - return ':' + '-' * (padding - 1) - else: # default left - return '-' * (padding) - - # Calculate Padding For Each Column Based On Header and Data Length - rowsPadding = {} - for index, columnEntry in enumerate(header_map): - padCount = max([len(str(v)) for d in data for k, v in d.items() if k == columnEntry['key_name']], default=0) + 2 - headerPadCount = len(columnEntry['header_name']) + 2 - rowsPadding[index] = headerPadCount if padCount <= headerPadCount else padCount - - # Render Markdown Header - rows = [] - rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(columnEntry['header_name'])) for index, columnEntry in enumerate(header_map))) - rows.append('|'.join(dashAlign(rowsPadding[index], columnEntry.get('align')) for index, columnEntry in enumerate(header_map))) - - # Render Tabular Data - for item in data: - rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(item[columnEntry['key_name']])) for index, columnEntry in enumerate(header_map))) - - # Convert Tabular String Rows Into String - tableString = "" - for row in rows: - tableString += f'|{row}|\n' - - return tableString - - -def element_count_rounded_notation(count: int) -> str: - if count > 1e15 : - # Quadrillion - scaled_amount = count * 1e-15 - scale_suffix = "Q" - elif count > 1e12 : - # Trillions - scaled_amount = count * 1e-12 - scale_suffix = "T" - elif count > 1e9 : - # Billions - scaled_amount = count * 1e-9 - scale_suffix = "B" - elif count > 1e6 : - # Millions - scaled_amount = count * 1e-6 - scale_suffix = "M" - elif count > 1e3 : - # Thousands - scaled_amount = count * 1e-3 - scale_suffix = "K" - else: - # Under Thousands - scaled_amount = count - scale_suffix = "" - return f"{'~' if count > 1e3 else ''}{round(scaled_amount)}{scale_suffix}" - - -def translate_tensor_name(name): - words = name.split(".") - - # Source: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#standardized-tensor-names - abbreviation_dictionary = { - 'token_embd': 'Token embedding', - 'pos_embd': 'Position embedding', - 'output_norm': 'Output normalization', - 'output': 'Output', - 'attn_norm': 'Attention normalization', - 'attn_norm_2': 'Attention normalization', - 'attn_qkv': 'Attention query-key-value', - 'attn_q': 'Attention query', - 'attn_k': 'Attention key', - 'attn_v': 'Attention value', - 'attn_output': 'Attention output', - 'ffn_norm': 'Feed-forward network normalization', - 'ffn_up': 'Feed-forward network "up"', - 'ffn_gate': 'Feed-forward network "gate"', - 'ffn_down': 'Feed-forward network "down"', - 'ffn_gate_inp': 'Expert-routing layer for the Feed-forward network in Mixture of Expert models', - 'ffn_gate_exp': 'Feed-forward network "gate" layer per expert in Mixture of Expert models', - 'ffn_down_exp': 'Feed-forward network "down" layer per expert in Mixture of Expert models', - 'ffn_up_exp': 'Feed-forward network "up" layer per expert in Mixture of Expert models', - 'ssm_in': 'State space model input projections', - 'ssm_conv1d': 'State space model rolling/shift', - 'ssm_x': 'State space model selective parametrization', - 'ssm_a': 'State space model state compression', - 'ssm_d': 'State space model skip connection', - 'ssm_dt': 'State space model time step', - 'ssm_out': 'State space model output projection', - 'blk': 'Block', - 'enc': 'Encoder', - 'dec': 'Decoder', - } - - expanded_words = [] - for word in words: - word_norm = word.strip().lower() - if word_norm in abbreviation_dictionary: - expanded_words.append(abbreviation_dictionary[word_norm].title()) - else: - expanded_words.append(word.title()) - - return ' '.join(expanded_words) - - -def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: - host_endian, file_endian = get_file_host_endian(reader) - markdown_content = "" - markdown_content += f'# {args.model} - GGUF Internal File Dump\n\n' - markdown_content += f'- Endian: {file_endian} endian\n' - markdown_content += '\n' - markdown_content += '## Key Value Metadata Store\n\n' - markdown_content += f'There are {len(reader.fields)} key-value pairs in this file\n' - markdown_content += '\n' - - kv_dump_table: list[dict[str, str | int]] = [] - for n, field in enumerate(reader.fields.values(), 1): - if not field.types: - pretty_type = 'N/A' - elif field.types[0] == GGUFValueType.ARRAY: - nest_count = len(field.types) - 1 - pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count - else: - pretty_type = str(field.types[-1].name) - - def escape_markdown_inline_code(value_string): - # Find the longest contiguous sequence of backticks in the string then - # wrap string with appropriate number of backticks required to escape it - max_backticks = max((len(match.group(0)) for match in re.finditer(r'`+', value_string)), default=0) - inline_code_marker = '`' * (max_backticks + 1) - - # If the string starts or ends with a backtick, add a space at the beginning and end - if value_string.startswith('`') or value_string.endswith('`'): - value_string = f" {value_string} " - - return f"{inline_code_marker}{value_string}{inline_code_marker}" - - total_elements = len(field.data) - value = "" - if len(field.types) == 1: - curr_type = field.types[0] - if curr_type == GGUFValueType.STRING: - truncate_length = 60 - value_string = str(bytes(field.parts[-1]), encoding='utf-8') - if len(value_string) > truncate_length: - head = escape_markdown_inline_code(value_string[:truncate_length // 2]) - tail = escape_markdown_inline_code(value_string[-truncate_length // 2:]) - value = "{head}...{tail}".format(head=head, tail=tail) - else: - value = escape_markdown_inline_code(value_string) - elif curr_type in reader.gguf_scalar_to_np: - value = str(field.parts[-1][0]) - else: - if field.types[0] == GGUFValueType.ARRAY: - curr_type = field.types[1] - array_elements = [] - - if curr_type == GGUFValueType.STRING: - render_element = min(5, total_elements) - for element_pos in range(render_element): - truncate_length = 30 - value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8') - if len(value_string) > truncate_length: - head = escape_markdown_inline_code(value_string[:truncate_length // 2]) - tail = escape_markdown_inline_code(value_string[-truncate_length // 2:]) - value = "{head}...{tail}".format(head=head, tail=tail) - else: - value = escape_markdown_inline_code(value_string) - array_elements.append(value) - - elif curr_type in reader.gguf_scalar_to_np: - render_element = min(7, total_elements) - for element_pos in range(render_element): - array_elements.append(str(field.parts[-1 - (total_elements - element_pos - 1)][0])) - - value = f'[ {", ".join(array_elements).strip()}{", ..." if total_elements > len(array_elements) else ""} ]' - - kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value}) - - kv_dump_table_header_map = [ - {'key_name':'n', 'header_name':'POS', 'align':'right'}, - {'key_name':'pretty_type', 'header_name':'TYPE', 'align':'left'}, - {'key_name':'total_elements', 'header_name':'Count', 'align':'right'}, - {'key_name':'field_name', 'header_name':'Key', 'align':'left'}, - {'key_name':'value', 'header_name':'Value', 'align':'left'}, - ] - - markdown_content += markdown_table_with_alignment_support(kv_dump_table_header_map, kv_dump_table) - - markdown_content += "\n" - - if not args.no_tensors: - # Group tensors by their prefix and maintain order - tensor_prefix_order: list[str] = [] - tensor_name_to_key: dict[str, int] = {} - tensor_groups: dict[str, list[ReaderTensor]] = {} - total_elements = sum(tensor.n_elements for tensor in reader.tensors) - - # Parsing Tensors Record - for key, tensor in enumerate(reader.tensors): - tensor_components = tensor.name.split('.') - - # Classify Tensor Group - tensor_group_name = "base" - if tensor_components[0] == 'blk': - tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}" - elif tensor_components[0] in ['enc', 'dec'] and tensor_components[1] == 'blk': - tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}.{tensor_components[2]}" - elif tensor_components[0] in ['enc', 'dec']: - tensor_group_name = f"{tensor_components[0]}" - - # Check if new Tensor Group - if tensor_group_name not in tensor_groups: - tensor_groups[tensor_group_name] = [] - tensor_prefix_order.append(tensor_group_name) - - # Record Tensor and Tensor Position - tensor_groups[tensor_group_name].append(tensor) - tensor_name_to_key[tensor.name] = key - - # Tensors Mapping Dump - markdown_content += f'## Tensors Overview {element_count_rounded_notation(total_elements)} Elements\n\n' - markdown_content += f'Total number of elements in all tensors: {total_elements} Elements\n' - markdown_content += '\n' - - for group in tensor_prefix_order: - tensors = tensor_groups[group] - group_elements = sum(tensor.n_elements for tensor in tensors) - markdown_content += f"- [{translate_tensor_name(group)} Tensor Group - {element_count_rounded_notation(group_elements)} Elements](#{group.replace('.', '_')})\n" - - markdown_content += "\n" - - markdown_content += "### Tensor Data Offset\n" - markdown_content += '\n' - markdown_content += 'This table contains the offset and data segment relative to start of file\n' - markdown_content += '\n' - - tensor_mapping_table: list[dict[str, str | int]] = [] - for key, tensor in enumerate(reader.tensors): - data_offset_pretty = '{0:#16x}'.format(tensor.data_offset) - data_size_pretty = '{0:#16x}'.format(tensor.n_bytes) - tensor_mapping_table.append({"t_id":key, "layer_name":tensor.name, "data_offset":data_offset_pretty, "data_size":data_size_pretty}) - - tensors_mapping_table_header_map = [ - {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'}, - {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'}, - {'key_name':'data_offset', 'header_name':'Data Offset (B)', 'align':'right'}, - {'key_name':'data_size', 'header_name':'Data Size (B)', 'align':'right'}, - ] - - markdown_content += markdown_table_with_alignment_support(tensors_mapping_table_header_map, tensor_mapping_table) - markdown_content += "\n" - - for group in tensor_prefix_order: - tensors = tensor_groups[group] - group_elements = sum(tensor.n_elements for tensor in tensors) - group_percentage = group_elements / total_elements * 100 - markdown_content += f"### {translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements\n\n" - - # Precalculate column sizing for visual consistency - prettify_element_est_count_size: int = 1 - prettify_element_count_size: int = 1 - prettify_dimension_max_widths: dict[int, int] = {} - for tensor in tensors: - prettify_element_est_count_size = max(prettify_element_est_count_size, len(str(element_count_rounded_notation(tensor.n_elements)))) - prettify_element_count_size = max(prettify_element_count_size, len(str(tensor.n_elements))) - for i, dimension_size in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))): - prettify_dimension_max_widths[i] = max(prettify_dimension_max_widths.get(i,1), len(str(dimension_size))) - - # Generate Tensor Layer Table Content - tensor_dump_table: list[dict[str, str | int]] = [] - for tensor in tensors: - human_friendly_name = translate_tensor_name(tensor.name.replace(".weight", ".(W)").replace(".bias", ".(B)")) - pretty_dimension = ' x '.join(f'{str(d):>{prettify_dimension_max_widths[i]}}' for i, d in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape)))) - element_count_est = f"({element_count_rounded_notation(tensor.n_elements):>{prettify_element_est_count_size}})" - element_count_string = f"{element_count_est} {tensor.n_elements:>{prettify_element_count_size}}" - type_name_string = f"{tensor.tensor_type.name}" - tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dimension":pretty_dimension, "tensor_type":type_name_string}) - - tensor_dump_table_header_map = [ - {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'}, - {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'}, - {'key_name':'human_layer_name', 'header_name':'Human Friendly Tensor Layer Name', 'align':'left'}, - {'key_name':'element_count', 'header_name':'Elements', 'align':'left'}, - {'key_name':'pretty_dimension', 'header_name':'Shape', 'align':'left'}, - {'key_name':'tensor_type', 'header_name':'Type', 'align':'left'}, - ] - - markdown_content += markdown_table_with_alignment_support(tensor_dump_table_header_map, tensor_dump_table) - - markdown_content += "\n" - markdown_content += f"- Total elements in {group}: ({element_count_rounded_notation(group_elements):>4}) {group_elements}\n" - markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n" - markdown_content += "\n\n" - - print(markdown_content) # noqa: NP100 - - -def main() -> None: - parser = argparse.ArgumentParser(description="Dump GGUF file metadata") - parser.add_argument("model", type=str, help="GGUF format model filename") - parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata") - parser.add_argument("--json", action="store_true", help="Produce JSON output") - parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)") - parser.add_argument("--data-offset", action="store_true", help="Start of data offset") - parser.add_argument("--data-alignment", action="store_true", help="Data alignment applied globally to data field") - parser.add_argument("--markdown", action="store_true", help="Produce markdown output") - parser.add_argument("--verbose", action="store_true", help="increase output verbosity") - - args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) - - logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) - - if not args.json and not args.markdown and not args.data_offset and not args.data_alignment: - logger.info(f'* Loading: {args.model}') - - reader = GGUFReader(args.model, 'r') - - if args.json: - dump_metadata_json(reader, args) - elif args.markdown: - dump_markdown_metadata(reader, args) - elif args.data_offset: - print(reader.data_offset) # noqa: NP100 - elif args.data_alignment: - print(reader.alignment) # noqa: NP100 - else: - dump_metadata(reader, args) - - -if __name__ == '__main__': - main() diff --git a/gguf-py/scripts/gguf_hash.py b/gguf-py/scripts/gguf_hash.py deleted file mode 100755 index ee34d09b..00000000 --- a/gguf-py/scripts/gguf_hash.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations - -import uuid -import hashlib - -import logging -import argparse -import os -import sys -from pathlib import Path - -from tqdm import tqdm - -# Necessary to load the local gguf package -if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): - sys.path.insert(0, str(Path(__file__).parent.parent)) - -from gguf import GGUFReader # noqa: E402 - - -logger = logging.getLogger("gguf-hash") - -# UUID_NAMESPACE_LLAMA_CPP = uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp') -UUID_NAMESPACE_LLAMA_CPP = uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5') - - -# For more information about what field.parts and field.data represent, -# please see the comments in the modify_gguf.py example. -def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar: bool, no_layer: bool) -> None: - sha1 = hashlib.sha1() - sha256 = hashlib.sha256() - uuidv5_sha1 = hashlib.sha1() - uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes) - - # Total Weight Calculation For Progress Bar - total_weights = 0 - for n, tensor in enumerate(reader.tensors, 1): - - # We don't need these - if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): - continue - - # Calculate Tensor Volume - sum_weights_in_tensor = 1 - for dim in tensor.shape: - sum_weights_in_tensor *= dim - total_weights += sum_weights_in_tensor - - # Hash Progress Bar - bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar) - - # Hashing Process - for tensor in reader.tensors: - - # We don't need these - if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): - continue - - # Progressbar - sum_weights_in_tensor = 1 - for dim in tensor.shape: - sum_weights_in_tensor *= dim - bar.update(sum_weights_in_tensor) - - if not no_layer: - - sha1_layer = hashlib.sha1() - sha1_layer.update(tensor.data.data) - print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100 - - sha256_layer = hashlib.sha256() - sha256_layer.update(tensor.data.data) - print("sha256 {0} {1}:{2}".format(sha256_layer.hexdigest(), filename, tensor.name)) # noqa: NP100 - - sha1.update(tensor.data.data) - sha256.update(tensor.data.data) - uuidv5_sha1.update(tensor.data.data) - - # Flush Hash Progress Bar - bar.close() - - # Display Hash Output - print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100 - print("sha256 {0} {1}".format(sha256.hexdigest(), filename)) # noqa: NP100 - print("uuid {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100 - - -def main() -> None: - parser = argparse.ArgumentParser(description="Dump GGUF file metadata") - parser.add_argument("model", type=str, help="GGUF format model filename") - parser.add_argument("--no-layer", action="store_true", help="exclude per layer hash") - parser.add_argument("--verbose", action="store_true", help="increase output verbosity") - parser.add_argument("--progressbar", action="store_true", help="enable progressbar") - args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) - logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) - reader = GGUFReader(args.model, 'r') - gguf_hash(reader, args.model, not args.progressbar, args.no_layer) - - -if __name__ == '__main__': - main() diff --git a/gguf-py/scripts/gguf_new_metadata.py b/gguf-py/scripts/gguf_new_metadata.py deleted file mode 100755 index fce52a8c..00000000 --- a/gguf-py/scripts/gguf_new_metadata.py +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations - -import logging -import argparse -import os -import sys -import json -from pathlib import Path - -import numpy as np -from tqdm import tqdm -from typing import Any, Sequence, NamedTuple - -# Necessary to load the local gguf package -if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): - sys.path.insert(0, str(Path(__file__).parent.parent)) - -import gguf - -logger = logging.getLogger("gguf-new-metadata") - - -class MetadataDetails(NamedTuple): - type: gguf.GGUFValueType - value: Any - description: str = '' - - -def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian: - if np.uint32(1) == np.uint32(1).newbyteorder("<"): - # Host is little endian - host_endian = gguf.GGUFEndian.LITTLE - swapped_endian = gguf.GGUFEndian.BIG - else: - # Sorry PDP or other weird systems that don't use BE or LE. - host_endian = gguf.GGUFEndian.BIG - swapped_endian = gguf.GGUFEndian.LITTLE - - if reader.byte_order == "S": - return swapped_endian - else: - return host_endian - - -def decode_field(field: gguf.ReaderField | None) -> Any: - if field and field.types: - main_type = field.types[0] - - if main_type == gguf.GGUFValueType.ARRAY: - sub_type = field.types[-1] - - if sub_type == gguf.GGUFValueType.STRING: - return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data] - else: - return [pv for idx in field.data for pv in field.parts[idx].tolist()] - if main_type == gguf.GGUFValueType.STRING: - return str(bytes(field.parts[-1]), encoding='utf-8') - else: - return field.parts[-1][0] - - return None - - -def get_field_data(reader: gguf.GGUFReader, key: str) -> Any: - field = reader.get_field(key) - - return decode_field(field) - - -def find_token(token_list: Sequence[int], token: str) -> Sequence[int]: - token_ids = [index for index, value in enumerate(token_list) if value == token] - - if len(token_ids) == 0: - raise LookupError(f'Unable to find "{token}" in token list!') - - return token_ids - - -def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: dict[str, MetadataDetails], remove_metadata: Sequence[str]) -> None: - for field in reader.fields.values(): - # Suppress virtual fields and fields written by GGUFWriter - if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'): - logger.debug(f'Suppressing {field.name}') - continue - - # Skip old chat templates if we have new ones - if field.name.startswith(gguf.Keys.Tokenizer.CHAT_TEMPLATE) and gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata: - logger.debug(f'Skipping {field.name}') - continue - - if field.name in remove_metadata: - logger.debug(f'Removing {field.name}') - continue - - old_val = MetadataDetails(field.types[0], decode_field(field)) - val = new_metadata.get(field.name, old_val) - - if field.name in new_metadata: - logger.debug(f'Modifying {field.name}: "{old_val.value}" -> "{val.value}" {val.description}') - del new_metadata[field.name] - elif val.value is not None: - logger.debug(f'Copying {field.name}') - - if val.value is not None: - writer.add_key_value(field.name, val.value, val.type) - - if gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata: - logger.debug('Adding chat template(s)') - writer.add_chat_template(new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE].value) - del new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] - - for key, val in new_metadata.items(): - logger.debug(f'Adding {key}: "{val.value}" {val.description}') - writer.add_key_value(key, val.value, val.type) - - total_bytes = 0 - - for tensor in reader.tensors: - total_bytes += tensor.n_bytes - writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type) - - bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True) - - writer.write_header_to_file() - writer.write_kv_data_to_file() - writer.write_ti_data_to_file() - - for tensor in reader.tensors: - writer.write_tensor_data(tensor.data) - bar.update(tensor.n_bytes) - - writer.close() - - -def main() -> None: - tokenizer_metadata = (getattr(gguf.Keys.Tokenizer, n) for n in gguf.Keys.Tokenizer.__dict__.keys() if not n.startswith('_')) - token_names = dict((n.split('.')[-1][:-len('_token_id')], n) for n in tokenizer_metadata if n.endswith('_token_id')) - - parser = argparse.ArgumentParser(description="Make a copy of a GGUF file with new metadata") - parser.add_argument("input", type=Path, help="GGUF format model input filename") - parser.add_argument("output", type=Path, help="GGUF format model output filename") - parser.add_argument("--general-name", type=str, help="The models general.name", metavar='"name"') - parser.add_argument("--general-description", type=str, help="The models general.description", metavar='"Description ..."') - parser.add_argument("--chat-template", type=str, help="Chat template string (or JSON string containing templates)", metavar='"{% ... %} ..."') - parser.add_argument("--chat-template-config", type=Path, help="Config file containing chat template(s)", metavar='tokenizer_config.json') - parser.add_argument("--pre-tokenizer", type=str, help="The models tokenizer.ggml.pre", metavar='"pre tokenizer"') - parser.add_argument("--remove-metadata", action="append", type=str, help="Remove metadata (by key name) from output model", metavar='general.url') - parser.add_argument("--special-token", action="append", type=str, help="Special token by value", nargs=2, metavar=(' | '.join(token_names.keys()), '""')) - parser.add_argument("--special-token-by-id", action="append", type=str, help="Special token by id", nargs=2, metavar=(' | '.join(token_names.keys()), '0')) - parser.add_argument("--force", action="store_true", help="Bypass warnings without confirmation") - parser.add_argument("--verbose", action="store_true", help="Increase output verbosity") - args = parser.parse_args(None if len(sys.argv) > 2 else ["--help"]) - - logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) - - new_metadata = {} - remove_metadata = args.remove_metadata or [] - - if args.general_name: - new_metadata[gguf.Keys.General.NAME] = MetadataDetails(gguf.GGUFValueType.STRING, args.general_name) - - if args.general_description: - new_metadata[gguf.Keys.General.DESCRIPTION] = MetadataDetails(gguf.GGUFValueType.STRING, args.general_description) - - if args.chat_template: - new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, json.loads(args.chat_template) if args.chat_template.startswith('[') else args.chat_template) - - if args.chat_template_config: - with open(args.chat_template_config, 'r') as fp: - config = json.load(fp) - template = config.get('chat_template') - if template: - new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template) - - if args.pre_tokenizer: - new_metadata[gguf.Keys.Tokenizer.PRE] = MetadataDetails(gguf.GGUFValueType.STRING, args.pre_tokenizer) - - if remove_metadata: - logger.warning('*** Warning *** Warning *** Warning **') - logger.warning('* Most metadata is required for a fully functional GGUF file,') - logger.warning('* removing crucial metadata may result in a corrupt output file!') - - if not args.force: - logger.warning('* Enter exactly YES if you are positive you want to proceed:') - response = input('YES, I am sure> ') - if response != 'YES': - logger.info("You didn't enter YES. Okay then, see ya!") - sys.exit(0) - - logger.info(f'* Loading: {args.input}') - reader = gguf.GGUFReader(args.input, 'r') - - arch = get_field_data(reader, gguf.Keys.General.ARCHITECTURE) - endianess = get_byteorder(reader) - - token_list = get_field_data(reader, gguf.Keys.Tokenizer.LIST) or [] - - for name, token in args.special_token or []: - if name not in token_names: - logger.warning(f'Unknown special token "{name}", ignoring...') - else: - ids = find_token(token_list, token) - new_metadata[token_names[name]] = MetadataDetails(gguf.GGUFValueType.UINT32, ids[0], f'= {token}') - - if len(ids) > 1: - logger.warning(f'Multiple "{token}" tokens found, choosing ID {ids[0]}, use --special-token-by-id if you want another:') - logger.warning(', '.join(str(i) for i in ids)) - - for name, id_string in args.special_token_by_id or []: - if name not in token_names: - logger.warning(f'Unknown special token "{name}", ignoring...') - elif not id_string.isdecimal(): - raise LookupError(f'Token ID "{id_string}" is not a valid ID!') - else: - id_int = int(id_string) - - if id_int >= 0 and id_int < len(token_list): - new_metadata[token_names[name]] = MetadataDetails(gguf.GGUFValueType.UINT32, id_int, f'= {token_list[id_int]}') - else: - raise LookupError(f'Token ID {id_int} is not within token list!') - - if os.path.isfile(args.output) and not args.force: - logger.warning('*** Warning *** Warning *** Warning **') - logger.warning(f'* The "{args.output}" GGUF file already exists, it will be overwritten!') - logger.warning('* Enter exactly YES if you are positive you want to proceed:') - response = input('YES, I am sure> ') - if response != 'YES': - logger.info("You didn't enter YES. Okay then, see ya!") - sys.exit(0) - - logger.info(f'* Writing: {args.output}') - writer = gguf.GGUFWriter(args.output, arch=arch, endianess=endianess) - - alignment = get_field_data(reader, gguf.Keys.General.ALIGNMENT) - if alignment is not None: - logger.debug(f'Setting custom alignment: {alignment}') - writer.data_alignment = alignment - - copy_with_new_metadata(reader, writer, new_metadata, remove_metadata) - - -if __name__ == '__main__': - main() diff --git a/gguf-py/scripts/gguf_set_metadata.py b/gguf-py/scripts/gguf_set_metadata.py deleted file mode 100755 index e35b651b..00000000 --- a/gguf-py/scripts/gguf_set_metadata.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python3 -import logging -import argparse -import os -import sys -from pathlib import Path - -# Necessary to load the local gguf package -if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): - sys.path.insert(0, str(Path(__file__).parent.parent)) - -from gguf import GGUFReader # noqa: E402 - -logger = logging.getLogger("gguf-set-metadata") - - -def minimal_example(filename: str) -> None: - reader = GGUFReader(filename, 'r+') - field = reader.fields['tokenizer.ggml.bos_token_id'] - if field is None: - return - part_index = field.data[0] - field.parts[part_index][0] = 2 # Set tokenizer.ggml.bos_token_id to 2 - # - # So what's this field.data thing? It's helpful because field.parts contains - # _every_ part of the GGUF field. For example, tokenizer.ggml.bos_token_id consists - # of: - # - # Part index 0: Key length (27) - # Part index 1: Key data ("tokenizer.ggml.bos_token_id") - # Part index 2: Field type (4, the id for GGUFValueType.UINT32) - # Part index 3: Field value - # - # Note also that each part is an NDArray slice, so even a part that - # is only a single value like the key length will be a NDArray of - # the key length type (numpy.uint32). - # - # The .data attribute in the Field is a list of relevant part indexes - # and doesn't contain internal GGUF details like the key length part. - # In this case, .data will be [3] - just the part index of the - # field value itself. - - -def set_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: - field = reader.get_field(args.key) - if field is None: - logger.error(f'! Field {repr(args.key)} not found') - sys.exit(1) - # Note that field.types is a list of types. This is because the GGUF - # format supports arrays. For example, an array of UINT32 would - # look like [GGUFValueType.ARRAY, GGUFValueType.UINT32] - handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None - if handler is None: - logger.error(f'! This tool only supports changing simple values, {repr(args.key)} has unsupported type {field.types}') - sys.exit(1) - current_value = field.parts[field.data[0]][0] - new_value = handler(args.value) - logger.info(f'* Preparing to change field {repr(args.key)} from {current_value} to {new_value}') - if current_value == new_value: - logger.info(f'- Key {repr(args.key)} already set to requested value {current_value}') - sys.exit(0) - if args.dry_run: - sys.exit(0) - if not args.force: - logger.warning('*** Warning *** Warning *** Warning **') - logger.warning('* Changing fields in a GGUF file can make it unusable. Proceed at your own risk.') - logger.warning('* Enter exactly YES if you are positive you want to proceed:') - response = input('YES, I am sure> ') - if response != 'YES': - logger.info("You didn't enter YES. Okay then, see ya!") - sys.exit(0) - field.parts[field.data[0]][0] = new_value - logger.info('* Field changed. Successful completion.') - - -def main() -> None: - parser = argparse.ArgumentParser(description="Set a simple value in GGUF file metadata") - parser.add_argument("model", type=str, help="GGUF format model filename") - parser.add_argument("key", type=str, help="Metadata key to set") - parser.add_argument("value", type=str, help="Metadata value to set") - parser.add_argument("--dry-run", action="store_true", help="Don't actually change anything") - parser.add_argument("--force", action="store_true", help="Change the field without confirmation") - parser.add_argument("--verbose", action="store_true", help="increase output verbosity") - - args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) - - logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) - - logger.info(f'* Loading: {args.model}') - reader = GGUFReader(args.model, 'r' if args.dry_run else 'r+') - set_metadata(reader, args) - - -if __name__ == '__main__': - main()