# Python
-__pycache__
-.venv
-/Pipfile
-dist
-poetry.lock
+/.venv
+/__pycache__/
+*/poetry.lock
poetry.toml
+# Nix
+/result
+
# Test binaries
/tests/test-backend-ops
/tests/test-double-float
+++ /dev/null
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# This script downloads the tokenizer models of the specified models from Huggingface and
-# generates the get_vocab_base_pre() function for convert-hf-to-gguf.py
-#
-# This is necessary in order to analyze the type of pre-tokenizer used by the model and
-# provide the necessary information to llama.cpp via the GGUF header in order to implement
-# the same pre-tokenizer.
-#
-# ref: https://github.com/ggerganov/llama.cpp/pull/6920
-#
-# Instructions:
-#
-# - Add a new model to the "models" list
-# - Run the script with your huggingface token:
-#
-# python3 convert-hf-to-gguf-update.py <huggingface_token>
-#
-# - Copy-paste the generated get_vocab_base_pre() function into convert-hf-to-gguf.py
-# - Update llama.cpp with the new pre-tokenizer if necessary
-#
-# TODO: generate tokenizer tests for llama.cpp
-#
-
-import logging
-import os
-import pathlib
-import re
-
-import requests
-import sys
-import json
-
-from hashlib import sha256
-from enum import IntEnum, auto
-from transformers import AutoTokenizer
-
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger("convert-hf-to-gguf-update")
-sess = requests.Session()
-
-
-class TOKENIZER_TYPE(IntEnum):
- SPM = auto()
- BPE = auto()
- WPM = auto()
- UGM = auto()
-
-
-# TODO: this string has to exercise as much pre-tokenizer functionality as possible
-# will be updated with time - contributions welcome
-chktxt = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````\"\"\"\"......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
-
-if len(sys.argv) == 2:
- token = sys.argv[1]
- if not token.startswith("hf_"):
- logger.info("Huggingface token seems invalid")
- logger.info("Usage: python convert-hf-to-gguf-update.py <huggingface_token>")
- sys.exit(1)
-else:
- logger.info("Usage: python convert-hf-to-gguf-update.py <huggingface_token>")
- sys.exit(1)
-
-# TODO: add models here, base models preferred
-models = [
- {"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
- {"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
- {"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
- {"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
- {"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
- {"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
- {"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
- {"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
- {"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
- {"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
- {"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
- {"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
- {"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
- {"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
- {"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
- {"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
- {"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
- {"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
- {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
- {"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
- {"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
- {"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
- {"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
- {"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
- {"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
- {"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
- {"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
-]
-
-
-def download_file_with_auth(url, token, save_path):
- headers = {"Authorization": f"Bearer {token}"}
- response = sess.get(url, headers=headers)
- response.raise_for_status()
- os.makedirs(os.path.dirname(save_path), exist_ok=True)
- with open(save_path, 'wb') as f:
- f.write(response.content)
- logger.info(f"File {save_path} downloaded successfully")
-
-
-def download_model(model):
- name = model["name"]
- repo = model["repo"]
- tokt = model["tokt"]
-
- os.makedirs(f"models/tokenizers/{name}", exist_ok=True)
-
- files = ["config.json", "tokenizer.json", "tokenizer_config.json"]
-
- if tokt == TOKENIZER_TYPE.SPM:
- files.append("tokenizer.model")
-
- if tokt == TOKENIZER_TYPE.UGM:
- files.append("spiece.model")
-
- for file in files:
- save_path = f"models/tokenizers/{name}/{file}"
- if os.path.isfile(save_path):
- logger.info(f"{name}: File {save_path} already exists - skipping")
- continue
- download_file_with_auth(f"{repo}/resolve/main/{file}", token, save_path)
-
-
-for model in models:
- try:
- download_model(model)
- except Exception as e:
- logger.error(f"Failed to download model {model['name']}. Error: {e}")
-
-
-# generate the source code for the convert-hf-to-gguf.py:get_vocab_base_pre() function:
-
-src_ifs = ""
-for model in models:
- name = model["name"]
- tokt = model["tokt"]
-
- if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM:
- continue
-
- # Skip if the tokenizer folder does not exist or there are other download issues previously
- if not os.path.exists(f"models/tokenizers/{name}"):
- logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
- continue
-
- # create the tokenizer
- try:
- if name == "t5":
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
- else:
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
- except OSError as e:
- logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
- continue # Skip to the next model if the tokenizer can't be loaded
-
- chktok = tokenizer.encode(chktxt)
- chkhsh = sha256(str(chktok).encode()).hexdigest()
-
- logger.info(f"model: {name}")
- logger.info(f"tokt: {tokt}")
- logger.info(f"repo: {model['repo']}")
- logger.info(f"chktok: {chktok}")
- logger.info(f"chkhsh: {chkhsh}")
-
- # print the "pre_tokenizer" content from the tokenizer.json
- with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
- cfg = json.load(f)
- normalizer = cfg["normalizer"]
- logger.info("normalizer: " + json.dumps(normalizer, indent=4))
- pre_tokenizer = cfg["pre_tokenizer"]
- logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
- if "ignore_merges" in cfg["model"]:
- logger.info("ignore_merges: " + json.dumps(cfg["model"]["ignore_merges"], indent=4))
-
- logger.info("")
-
- src_ifs += f" if chkhsh == \"{chkhsh}\":\n"
- src_ifs += f" # ref: {model['repo']}\n"
- src_ifs += f" res = \"{name}\"\n"
-
-src_func = f"""
- def get_vocab_base_pre(self, tokenizer) -> str:
- # encoding this string and hashing the resulting tokens would (hopefully) give us a unique identifier that
- # is specific for the BPE pre-tokenizer used by the model
- # we will use this unique identifier to write a "tokenizer.ggml.pre" entry in the GGUF file which we can
- # use in llama.cpp to implement the same pre-tokenizer
-
- chktxt = {repr(chktxt)}
-
- chktok = tokenizer.encode(chktxt)
- chkhsh = sha256(str(chktok).encode()).hexdigest()
-
- logger.debug(f"chktok: {{chktok}}")
- logger.debug(f"chkhsh: {{chkhsh}}")
-
- res = None
-
- # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
- # or pull the latest version of the model from Huggingface
- # don't edit the hashes manually!
-{src_ifs}
- if res is None:
- logger.warning("\\n")
- logger.warning("**************************************************************************************")
- logger.warning("** WARNING: The BPE pre-tokenizer was not recognized!")
- logger.warning("** There are 2 possible reasons for this:")
- logger.warning("** - the model has not been added to convert-hf-to-gguf-update.py yet")
- logger.warning("** - the pre-tokenization config has changed upstream")
- logger.warning("** Check your model files and convert-hf-to-gguf-update.py and update them accordingly.")
- logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
- logger.warning("**")
- logger.warning(f"** chkhsh: {{chkhsh}}")
- logger.warning("**************************************************************************************")
- logger.warning("\\n")
- raise NotImplementedError("BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
-
- logger.debug(f"tokenizer.ggml.pre: {{repr(res)}}")
- logger.debug(f"chkhsh: {{chkhsh}}")
-
- return res
-"""
-
-convert_py_pth = pathlib.Path("convert-hf-to-gguf.py")
-convert_py = convert_py_pth.read_text(encoding="utf-8")
-convert_py = re.sub(
- r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
- lambda m: m.group(1) + src_func + m.group(3),
- convert_py,
- flags=re.DOTALL | re.MULTILINE,
-)
-
-convert_py_pth.write_text(convert_py, encoding="utf-8")
-
-logger.info("+++ convert-hf-to-gguf.py was updated")
-
-# generate tests for each tokenizer model
-
-tests = [
- "ied 4 ½ months",
- "Führer",
- "",
- " ",
- " ",
- " ",
- "\t",
- "\n",
- "\n\n",
- "\n\n\n",
- "\t\n",
- "Hello world",
- " Hello world",
- "Hello World",
- " Hello World",
- " Hello World!",
- "Hello, world!",
- " Hello, world!",
- " this is 🦙.cpp",
- "w048 7tuijk dsdfhu",
- "нещо на Български",
- "កាន់តែពិសេសអាចខលចេញ",
- "🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
- "Hello",
- " Hello",
- " Hello",
- " Hello",
- " Hello",
- " Hello\n Hello",
- " (",
- "\n =",
- "' era",
- "Hello, y'all! How are you 😁 ?我想在apple工作1314151天~",
- "!!!!!!",
- "3",
- "33",
- "333",
- "3333",
- "33333",
- "333333",
- "3333333",
- "33333333",
- "333333333",
- "Cửa Việt", # llama-bpe fails on this
- " discards",
- chktxt,
-]
-
-# write the tests to ./models/ggml-vocab-{name}.gguf.inp
-# the format is:
-#
-# test0
-# __ggml_vocab_test__
-# test1
-# __ggml_vocab_test__
-# ...
-#
-
-# with each model, encode all tests and write the results in ./models/ggml-vocab-{name}.gguf.out
-# for each test, write the resulting tokens on a separate line
-
-for model in models:
- name = model["name"]
- tokt = model["tokt"]
-
- # Skip if the tokenizer folder does not exist or there are other download issues previously
- if not os.path.exists(f"models/tokenizers/{name}"):
- logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
- continue
-
- # create the tokenizer
- try:
- if name == "t5":
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
- else:
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
- except OSError as e:
- logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
- continue # Skip this model and continue with the next one in the loop
-
- with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
- for text in tests:
- f.write(f"{text}")
- f.write("\n__ggml_vocab_test__\n")
-
- with open(f"models/ggml-vocab-{name}.gguf.out", "w") as f:
- for text in tests:
- res = tokenizer.encode(text, add_special_tokens=False)
- for r in res:
- f.write(f" {r}")
- f.write("\n")
-
- logger.info(f"Tests for {name} written in ./models/ggml-vocab-{name}.gguf.*")
-
-# generate commands for creating vocab files
-
-logger.info("\nRun the following commands to generate the vocab files for testing:\n")
-
-for model in models:
- name = model["name"]
-
- print(f"python3 convert-hf-to-gguf.py models/tokenizers/{name}/ --outfile models/ggml-vocab-{name}.gguf --vocab-only") # noqa: NP100
-
-logger.info("\n")
+++ /dev/null
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-from __future__ import annotations
-
-import logging
-import argparse
-import contextlib
-import json
-import os
-import re
-import sys
-from enum import IntEnum
-from pathlib import Path
-from hashlib import sha256
-from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Sequence, TypeVar, cast
-
-import math
-import numpy as np
-import torch
-
-if TYPE_CHECKING:
- from torch import Tensor
-
-if 'NO_LOCAL_GGUF' not in os.environ:
- sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
-import gguf
-
-logger = logging.getLogger("hf-to-gguf")
-
-
-###### MODEL DEFINITIONS ######
-
-class SentencePieceTokenTypes(IntEnum):
- NORMAL = 1
- UNKNOWN = 2
- CONTROL = 3
- USER_DEFINED = 4
- UNUSED = 5
- BYTE = 6
-
-
-AnyModel = TypeVar("AnyModel", bound="type[Model]")
-
-
-class Model:
- _model_classes: dict[str, type[Model]] = {}
-
- dir_model: Path
- ftype: gguf.LlamaFileType
- is_big_endian: bool
- endianess: gguf.GGUFEndian
- use_temp_file: bool
- lazy: bool
- model_name: str | None
- part_names: list[str]
- is_safetensors: bool
- hparams: dict[str, Any]
- block_count: int
- tensor_map: gguf.TensorNameMap
- tensor_names: set[str] | None
- fname_out: Path
- gguf_writer: gguf.GGUFWriter
-
- # subclasses should define this!
- model_arch: gguf.MODEL_ARCH
-
- def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool,
- model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
- if type(self) is Model:
- raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
- self.dir_model = dir_model
- self.ftype = ftype
- self.is_big_endian = is_big_endian
- self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
- self.use_temp_file = use_temp_file
- self.lazy = not eager
- self.model_name = model_name
- self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors")
- self.is_safetensors = len(self.part_names) > 0
- if not self.is_safetensors:
- self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
- self.hparams = Model.load_hparams(self.dir_model)
- self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
- self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
- self.tensor_names = None
- if self.ftype == gguf.LlamaFileType.GUESSED:
- # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
- _, first_tensor = next(self.get_tensors())
- if first_tensor.dtype == torch.float16:
- logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
- self.ftype = gguf.LlamaFileType.MOSTLY_F16
- else:
- logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})")
- self.ftype = gguf.LlamaFileType.MOSTLY_BF16
- ftype_up: str = self.ftype.name.partition("_")[2].upper()
- ftype_lw: str = ftype_up.lower()
- # allow templating the file name with the output ftype, useful with the "auto" ftype
- self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up)
- self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
- split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
-
- @classmethod
- def __init_subclass__(cls):
- # can't use an abstract property, because overriding it without type errors
- # would require using decorated functions instead of simply defining the property
- if "model_arch" not in cls.__dict__:
- raise TypeError(f"Missing property 'model_arch' for {cls.__name__!r}")
-
- def find_hparam(self, keys: Iterable[str], optional: bool = False) -> Any:
- key = next((k for k in keys if k in self.hparams), None)
- if key is not None:
- return self.hparams[key]
- if optional:
- return None
- raise KeyError(f"could not find any of: {keys}")
-
- def set_vocab(self):
- self._set_vocab_gpt2()
-
- def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
- tensor_names_from_parts: set[str] = set()
-
- if len(self.part_names) > 1:
- self.tensor_names = set()
- index_name = "model.safetensors" if self.is_safetensors else "pytorch_model.bin"
- index_name += ".index.json"
- logger.info(f"gguf: loading model weight map from '{index_name}'")
- with open(self.dir_model / index_name, "r", encoding="utf-8") as f:
- index: dict[str, Any] = json.load(f)
- weight_map = index.get("weight_map")
- if weight_map is None or not isinstance(weight_map, dict):
- raise ValueError(f"Can't load 'weight_map' from {index_name!r}")
- self.tensor_names.update(weight_map.keys())
- else:
- self.tensor_names = tensor_names_from_parts
-
- for part_name in self.part_names:
- logger.info(f"gguf: loading model part '{part_name}'")
- ctx: ContextManager[Any]
- if self.is_safetensors:
- from safetensors import safe_open
- ctx = cast(ContextManager[Any], safe_open(self.dir_model / part_name, framework="pt", device="cpu"))
- else:
- ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True))
-
- with ctx as model_part:
- tensor_names_from_parts.update(model_part.keys())
-
- for name in model_part.keys():
- data = model_part.get_tensor(name) if self.is_safetensors else model_part[name]
- if self.lazy:
- data = LazyTorchTensor.from_eager(data)
- yield name, data
-
- # only verify tensor name presence; it doesn't matter if they are not in the right files
- if len(sym_diff := tensor_names_from_parts.symmetric_difference(self.tensor_names)) > 0:
- raise ValueError(f"Mismatch between weight map and model parts for tensor names: {sym_diff}")
-
- def format_tensor_name(self, key: gguf.MODEL_TENSOR, bid: int | None = None, suffix: str = ".weight") -> str:
- if key not in gguf.MODEL_TENSORS[self.model_arch]:
- raise ValueError(f"Missing {key!r} for MODEL_TENSORS of {self.model_arch!r}")
- name: str = gguf.TENSOR_NAMES[key]
- if "{bid}" in name:
- assert bid is not None
- name = name.format(bid=bid)
- return name + suffix
-
- def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, bid: int | None, suffix: str = ".weight") -> bool:
- if key not in gguf.MODEL_TENSORS[self.model_arch]:
- return False
- key_name: str = gguf.TENSOR_NAMES[key]
- if "{bid}" in key_name:
- if bid is None:
- return False
- key_name = key_name.format(bid=bid)
- else:
- if bid is not None:
- return False
- return name == (key_name + suffix)
-
- def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str:
- new_name = self.tensor_map.get_name(key=name, try_suffixes=try_suffixes)
- if new_name is None:
- raise ValueError(f"Can not map tensor {name!r}")
- return new_name
-
- def set_gguf_parameters(self):
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_block_count(self.block_count)
-
- if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
- self.gguf_writer.add_context_length(n_ctx)
- logger.info(f"gguf: context length = {n_ctx}")
-
- n_embd = self.find_hparam(["hidden_size", "n_embd"])
- self.gguf_writer.add_embedding_length(n_embd)
- logger.info(f"gguf: embedding length = {n_embd}")
-
- if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None:
- self.gguf_writer.add_feed_forward_length(n_ff)
- logger.info(f"gguf: feed forward length = {n_ff}")
-
- n_head = self.find_hparam(["num_attention_heads", "n_head"])
- self.gguf_writer.add_head_count(n_head)
- logger.info(f"gguf: head count = {n_head}")
-
- if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None:
- self.gguf_writer.add_head_count_kv(n_head_kv)
- logger.info(f"gguf: key-value head count = {n_head_kv}")
-
- if (rope_theta := self.hparams.get("rope_theta")) is not None:
- self.gguf_writer.add_rope_freq_base(rope_theta)
- logger.info(f"gguf: rope theta = {rope_theta}")
- if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
- self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps)
- logger.info(f"gguf: rms norm epsilon = {f_rms_eps}")
- if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None:
- self.gguf_writer.add_layer_norm_eps(f_norm_eps)
- logger.info(f"gguf: layer norm epsilon = {f_norm_eps}")
- if (n_experts := self.hparams.get("num_local_experts")) is not None:
- self.gguf_writer.add_expert_count(n_experts)
- logger.info(f"gguf: expert count = {n_experts}")
- if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
- self.gguf_writer.add_expert_used_count(n_experts_used)
- logger.info(f"gguf: experts used count = {n_experts_used}")
-
- self.gguf_writer.add_file_type(self.ftype)
- logger.info(f"gguf: file type = {self.ftype}")
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- return [(self.map_tensor_name(name), data_torch)]
-
- def extra_f32_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
- del name, new_name, bid, n_dims # unused
-
- return False
-
- def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
- del name, new_name, bid, n_dims # unused
-
- return False
-
- def write_tensors(self):
- max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
-
- for name, data_torch in self.get_tensors():
- # we don't need these
- if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
- continue
-
- old_dtype = data_torch.dtype
-
- # convert any unsupported data types to float32
- if data_torch.dtype not in (torch.float16, torch.float32):
- data_torch = data_torch.to(torch.float32)
-
- # use the first number-like part of the tensor name as the block id
- bid = None
- for part in name.split("."):
- if part.isdecimal():
- bid = int(part)
- break
-
- for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)):
- data: np.ndarray = data # type hint
- n_dims = len(data.shape)
- data_dtype = data.dtype
- data_qtype: gguf.GGMLQuantizationType | None = None
-
- # when both are True, f32 should win
- extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims)
- extra_f16 = self.extra_f16_tensors(name, new_name, bid, n_dims)
-
- # Most of the codebase that takes in 1D tensors or norms only handles F32 tensors
- # Conditions should closely match those in llama_model_quantize_internal in llama.cpp
- extra_f32 = any(cond for cond in (
- extra_f32,
- n_dims == 1,
- new_name.endswith("_norm.weight"),
- ))
-
- # Some tensor types are always in float32
- extra_f32 = extra_f32 or any(self.match_model_tensor_name(new_name, key, bid) for key in (
- gguf.MODEL_TENSOR.FFN_GATE_INP,
- gguf.MODEL_TENSOR.POS_EMBD,
- gguf.MODEL_TENSOR.TOKEN_TYPES,
- ))
-
- # if f16 desired, convert any float32 2-dim weight tensors to float16
- extra_f16 = any(cond for cond in (
- extra_f16,
- (name.endswith(".weight") and n_dims >= 2),
- ))
-
- if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
- if self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
- data = gguf.quantize_bf16(data)
- assert data.dtype == np.int16
- data_qtype = gguf.GGMLQuantizationType.BF16
-
- elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data):
- data = gguf.quantize_q8_0(data)
- assert data.dtype == np.uint8
- data_qtype = gguf.GGMLQuantizationType.Q8_0
-
- else: # default to float16 for quantized tensors
- if data_dtype != np.float16:
- data = data.astype(np.float16)
- data_qtype = gguf.GGMLQuantizationType.F16
-
- if data_qtype is None: # by default, convert to float32
- if data_dtype != np.float32:
- data = data.astype(np.float32)
- data_qtype = gguf.GGMLQuantizationType.F32
-
- shape = gguf.quant_shape_from_byte_shape(data.shape, data_qtype) if data.dtype == np.uint8 else data.shape
-
- # reverse shape to make it similar to the internal ggml dimension order
- shape_str = f"{{{', '.join(str(n) for n in reversed(shape))}}}"
-
- # n_dims is implicit in the shape
- logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
-
- self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype)
-
- def write(self):
- self.write_tensors()
- self.gguf_writer.write_header_to_file(self.fname_out)
- self.gguf_writer.write_kv_data_to_file()
- self.gguf_writer.write_tensors_to_file(progress=True)
- self.gguf_writer.close()
-
- def write_vocab(self):
- if len(self.gguf_writer.tensors) != 1:
- raise ValueError('Splitting the vocabulary is not supported')
- self.gguf_writer.write_header_to_file(self.fname_out)
- self.gguf_writer.write_kv_data_to_file()
- self.gguf_writer.close()
-
- @staticmethod
- def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]:
- part_names: list[str] = []
- for filename in os.listdir(dir_model):
- if filename.startswith(prefix) and filename.endswith(suffix):
- part_names.append(filename)
-
- part_names.sort()
-
- return part_names
-
- @staticmethod
- def load_hparams(dir_model: Path):
- with open(dir_model / "config.json", "r", encoding="utf-8") as f:
- return json.load(f)
-
- @classmethod
- def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]:
- assert names
-
- def func(modelcls: AnyModel) -> AnyModel:
- for name in names:
- cls._model_classes[name] = modelcls
- return modelcls
- return func
-
- @classmethod
- def from_model_architecture(cls, arch: str) -> type[Model]:
- try:
- return cls._model_classes[arch]
- except KeyError:
- raise NotImplementedError(f'Architecture {arch!r} not supported!') from None
-
- # used for GPT-2 BPE and WordPiece vocabs
- def get_vocab_base(self) -> tuple[list[str], list[int], str]:
- tokens: list[str] = []
- toktypes: list[int] = []
-
- from transformers import AutoTokenizer
- tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
- vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab))
- assert max(tokenizer.vocab.values()) < vocab_size
-
- tokpre = self.get_vocab_base_pre(tokenizer)
-
- reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
- added_vocab = tokenizer.get_added_vocab()
-
- for i in range(vocab_size):
- if i not in reverse_vocab:
- tokens.append(f"[PAD{i}]")
- toktypes.append(gguf.TokenType.USER_DEFINED)
- elif reverse_vocab[i] in added_vocab:
- tokens.append(reverse_vocab[i])
- if tokenizer.added_tokens_decoder[i].special:
- toktypes.append(gguf.TokenType.CONTROL)
- else:
- toktypes.append(gguf.TokenType.USER_DEFINED)
- else:
- tokens.append(reverse_vocab[i])
- toktypes.append(gguf.TokenType.NORMAL)
-
- return tokens, toktypes, tokpre
-
- # NOTE: this function is generated by convert-hf-to-gguf-update.py
- # do not modify it manually!
- # ref: https://github.com/ggerganov/llama.cpp/pull/6920
- # Marker: Start get_vocab_base_pre
- def get_vocab_base_pre(self, tokenizer) -> str:
- # encoding this string and hashing the resulting tokens would (hopefully) give us a unique identifier that
- # is specific for the BPE pre-tokenizer used by the model
- # we will use this unique identifier to write a "tokenizer.ggml.pre" entry in the GGUF file which we can
- # use in llama.cpp to implement the same pre-tokenizer
-
- chktxt = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶\u200d🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````""""......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
-
- chktok = tokenizer.encode(chktxt)
- chkhsh = sha256(str(chktok).encode()).hexdigest()
-
- logger.debug(f"chktok: {chktok}")
- logger.debug(f"chkhsh: {chkhsh}")
-
- res = None
-
- # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
- # or pull the latest version of the model from Huggingface
- # don't edit the hashes manually!
- if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
- # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
- res = "llama-bpe"
- if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754":
- # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base
- res = "deepseek-llm"
- if chkhsh == "347715f544604f9118bb75ed199f68779f423cabb20db6de6f31b908d04d7821":
- # ref: https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base
- res = "deepseek-coder"
- if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
- # ref: https://huggingface.co/tiiuae/falcon-7b
- res = "falcon"
- if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
- # ref: https://huggingface.co/BAAI/bge-small-en-v1.5
- res = "bert-bge"
- if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
- # ref: https://huggingface.co/mosaicml/mpt-7b
- res = "mpt"
- if chkhsh == "35d91631860c815f952d711435f48d356ebac988362536bed955d43bfa436e34":
- # ref: https://huggingface.co/bigcode/starcoder2-3b
- res = "starcoder"
- if chkhsh == "3ce83efda5659b07b1ad37ca97ca5797ea4285d9b9ab0dc679e4a720c9da7454":
- # ref: https://huggingface.co/openai-community/gpt2
- res = "gpt-2"
- if chkhsh == "32d85c31273f8019248f2559fed492d929ea28b17e51d81d3bb36fff23ca72b3":
- # ref: https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b
- res = "stablelm2"
- if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff":
- # ref: https://huggingface.co/smallcloudai/Refact-1_6-base
- res = "refact"
- if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
- # ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
- res = "command-r"
- if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
- # ref: https://huggingface.co/Qwen/Qwen1.5-7B
- res = "qwen2"
- if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
- # ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
- res = "olmo"
- if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
- # ref: https://huggingface.co/databricks/dbrx-base
- res = "dbrx"
- if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
- # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
- res = "jina-v2-en"
- if chkhsh == "171aeeedd6fb548d418a7461d053f11b6f1f1fc9b387bd66640d28a4b9f5c643":
- # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-es
- res = "jina-v2-es"
- if chkhsh == "27949a2493fc4a9f53f5b9b029c82689cfbe5d3a1929bb25e043089e28466de6":
- # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-de
- res = "jina-v2-de"
- if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d":
- # ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct
- res = "smaug-bpe"
- if chkhsh == "c7ea5862a53e4272c035c8238367063e2b270d51faa48c0f09e9d5b54746c360":
- # ref: https://huggingface.co/LumiOpen/Poro-34B-chat
- res = "poro-chat"
- if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
- # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
- res = "jina-v2-code"
- if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee":
- # ref: https://huggingface.co/LumiOpen/Viking-7B
- res = "viking"
- if chkhsh == "b53802fb28e26d645c3a310b34bfe07da813026ec7c7716883404d5e0f8b1901":
- # ref: https://huggingface.co/core42/jais-13b
- res = "jais"
-
- if res is None:
- logger.warning("\n")
- logger.warning("**************************************************************************************")
- logger.warning("** WARNING: The BPE pre-tokenizer was not recognized!")
- logger.warning("** There are 2 possible reasons for this:")
- logger.warning("** - the model has not been added to convert-hf-to-gguf-update.py yet")
- logger.warning("** - the pre-tokenization config has changed upstream")
- logger.warning("** Check your model files and convert-hf-to-gguf-update.py and update them accordingly.")
- logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
- logger.warning("**")
- logger.warning(f"** chkhsh: {chkhsh}")
- logger.warning("**************************************************************************************")
- logger.warning("\n")
- raise NotImplementedError("BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
-
- logger.debug(f"tokenizer.ggml.pre: {repr(res)}")
- logger.debug(f"chkhsh: {chkhsh}")
-
- return res
- # Marker: End get_vocab_base_pre
-
- def _set_vocab_gpt2(self) -> None:
- tokens, toktypes, tokpre = self.get_vocab_base()
- self.gguf_writer.add_tokenizer_model("gpt2")
- self.gguf_writer.add_tokenizer_pre(tokpre)
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_types(toktypes)
-
- special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
- special_vocab.add_to_gguf(self.gguf_writer)
-
- def _set_vocab_qwen(self):
- dir_model = self.dir_model
- hparams = self.hparams
- tokens: list[str] = []
- toktypes: list[int] = []
-
- from transformers import AutoTokenizer
- tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
- vocab_size = hparams["vocab_size"]
- assert max(tokenizer.get_vocab().values()) < vocab_size
-
- tokpre = self.get_vocab_base_pre(tokenizer)
-
- merges = []
- vocab = {}
- mergeable_ranks = tokenizer.mergeable_ranks
- for token, rank in mergeable_ranks.items():
- vocab[QwenModel.token_bytes_to_string(token)] = rank
- if len(token) == 1:
- continue
- merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
- assert len(merged) == 2
- merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
-
- # for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined
- added_vocab = tokenizer.special_tokens
- reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **added_vocab}.items()}
-
- for i in range(vocab_size):
- if i not in reverse_vocab:
- tokens.append(f"[PAD{i}]")
- toktypes.append(gguf.TokenType.USER_DEFINED)
- elif reverse_vocab[i] in added_vocab:
- tokens.append(reverse_vocab[i])
- toktypes.append(gguf.TokenType.CONTROL)
- else:
- tokens.append(reverse_vocab[i])
- toktypes.append(gguf.TokenType.NORMAL)
-
- self.gguf_writer.add_tokenizer_model("gpt2")
- self.gguf_writer.add_tokenizer_pre(tokpre)
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_types(toktypes)
-
- special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
- special_vocab.merges = merges
- # only add special tokens when they were not already loaded from config.json
- if len(special_vocab.special_token_ids) == 0:
- special_vocab._set_special_token("bos", tokenizer.special_tokens["<|endoftext|>"])
- special_vocab._set_special_token("eos", tokenizer.special_tokens["<|endoftext|>"])
- # this one is usually not in config.json anyway
- special_vocab._set_special_token("unk", tokenizer.special_tokens["<|endoftext|>"])
- special_vocab.add_to_gguf(self.gguf_writer)
-
- def _set_vocab_sentencepiece(self, add_to_gguf=True):
- tokens, scores, toktypes = self._create_vocab_sentencepiece()
-
- self.gguf_writer.add_tokenizer_model("llama")
- self.gguf_writer.add_tokenizer_pre("default")
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_scores(scores)
- self.gguf_writer.add_token_types(toktypes)
-
- special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
- special_vocab.add_to_gguf(self.gguf_writer)
-
- def _create_vocab_sentencepiece(self):
- from sentencepiece import SentencePieceProcessor
-
- tokenizer_path = self.dir_model / 'tokenizer.model'
-
- tokens: list[bytes] = []
- scores: list[float] = []
- toktypes: list[int] = []
-
- if not tokenizer_path.is_file():
- raise FileNotFoundError(f"File not found: {tokenizer_path}")
-
- tokenizer = SentencePieceProcessor()
- tokenizer.LoadFromFile(str(tokenizer_path))
-
- vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
-
- tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
- scores: list[float] = [-10000.0] * vocab_size
- toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
-
- for token_id in range(tokenizer.vocab_size()):
- piece = tokenizer.IdToPiece(token_id)
- text = piece.encode("utf-8")
- score = tokenizer.GetScore(token_id)
-
- toktype = SentencePieceTokenTypes.NORMAL
- if tokenizer.IsUnknown(token_id):
- toktype = SentencePieceTokenTypes.UNKNOWN
- elif tokenizer.IsControl(token_id):
- toktype = SentencePieceTokenTypes.CONTROL
- elif tokenizer.IsUnused(token_id):
- toktype = SentencePieceTokenTypes.UNUSED
- elif tokenizer.IsByte(token_id):
- toktype = SentencePieceTokenTypes.BYTE
-
- tokens[token_id] = text
- scores[token_id] = score
- toktypes[token_id] = toktype
-
- added_tokens_file = self.dir_model / 'added_tokens.json'
- if added_tokens_file.is_file():
- with open(added_tokens_file, "r", encoding="utf-8") as f:
- added_tokens_json = json.load(f)
- for key in added_tokens_json:
- token_id = added_tokens_json[key]
- if (token_id >= vocab_size):
- logger.warning(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
- continue
-
- tokens[token_id] = key.encode("utf-8")
- scores[token_id] = -1000.0
- toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
-
- if vocab_size > len(tokens):
- pad_count = vocab_size - len(tokens)
- logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
- for i in range(1, pad_count + 1):
- tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
- scores.append(-1000.0)
- toktypes.append(SentencePieceTokenTypes.UNUSED)
-
- return tokens, scores, toktypes
-
- def _set_vocab_llama_hf(self):
- vocab = gguf.LlamaHfVocab(self.dir_model)
- tokens = []
- scores = []
- toktypes = []
-
- for text, score, toktype in vocab.all_tokens():
- tokens.append(text)
- scores.append(score)
- toktypes.append(toktype)
-
- assert len(tokens) == vocab.vocab_size
-
- self.gguf_writer.add_tokenizer_model("llama")
- self.gguf_writer.add_tokenizer_pre("default")
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_scores(scores)
- self.gguf_writer.add_token_types(toktypes)
-
- special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
- special_vocab.add_to_gguf(self.gguf_writer)
-
-
-@Model.register("GPTNeoXForCausalLM")
-class GPTNeoXModel(Model):
- model_arch = gguf.MODEL_ARCH.GPTNEOX
-
- def set_gguf_parameters(self):
- block_count = self.hparams["num_hidden_layers"]
-
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
- self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
- self.gguf_writer.add_rope_dimension_count(
- int(self.hparams["rotary_pct"] * (self.hparams["hidden_size"] // self.hparams["num_attention_heads"])),
- )
- self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
- self.gguf_writer.add_parallel_residual(self.hparams.get("use_parallel_residual", True))
- self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
- n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
-
- tensors: list[tuple[str, Tensor]] = []
-
- if re.match(r"gpt_neox\.layers\.\d+\.attention\.query_key_value\.weight", name):
- # Map bloom-style qkv_linear to gpt-style qkv_linear
- # bloom: https://github.com/huggingface/transformers/blob/main/src/transformers/models/bloom/modeling_bloom.py#L238-L252 # noqa
- # gpt-2: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/modeling_gpt2.py#L312 # noqa
- qkv_weights = data_torch.reshape((n_head, 3, n_embed // n_head, n_embed))
- data_torch = torch.cat(
- (
- qkv_weights[:, 0, :, :].reshape((-1, n_embed)),
- qkv_weights[:, 1, :, :].reshape((-1, n_embed)),
- qkv_weights[:, 2, :, :].reshape((-1, n_embed)),
- ),
- dim=0,
- )
- logger.info("re-format attention.linear_qkv.weight")
- elif re.match(r"gpt_neox\.layers\.\d+\.attention\.query_key_value\.bias", name):
- qkv_bias = data_torch.reshape((n_head, 3, n_embed // n_head))
- data_torch = torch.cat(
- (
- qkv_bias[:, 0, :].reshape((n_embed,)),
- qkv_bias[:, 1, :].reshape((n_embed,)),
- qkv_bias[:, 2, :].reshape((n_embed,)),
- ),
- dim=0,
- )
- logger.info("re-format attention.linear_qkv.bias")
-
- tensors.append((self.map_tensor_name(name), data_torch))
-
- return tensors
-
-
-@Model.register("BloomForCausalLM")
-class BloomModel(Model):
- model_arch = gguf.MODEL_ARCH.BLOOM
-
- def set_gguf_parameters(self):
- self.gguf_writer.add_name("Bloom")
- n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
- n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
- self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed))
- self.gguf_writer.add_embedding_length(n_embed)
- self.gguf_writer.add_feed_forward_length(4 * n_embed)
- self.gguf_writer.add_block_count(self.hparams["n_layer"])
- self.gguf_writer.add_head_count(n_head)
- self.gguf_writer.add_head_count_kv(n_head)
- self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_file_type(self.ftype)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
- n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
-
- name = re.sub(r'transformer\.', '', name)
-
- tensors: list[tuple[str, Tensor]] = []
-
- if re.match(r"h\.\d+\.self_attention\.query_key_value\.weight", name):
- # Map bloom-style qkv_linear to gpt-style qkv_linear
- # bloom: https://github.com/huggingface/transformers/blob/main/src/transformers/models/bloom/modeling_bloom.py#L238-L252 # noqa
- # gpt-2: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/modeling_gpt2.py#L312 # noqa
- qkv_weights = data_torch.reshape((n_head, 3, n_embed // n_head, n_embed))
- data_torch = torch.cat(
- (
- qkv_weights[:, 0, :, :].reshape((-1, n_embed)),
- qkv_weights[:, 1, :, :].reshape((-1, n_embed)),
- qkv_weights[:, 2, :, :].reshape((-1, n_embed)),
- ),
- dim=0,
- )
- logger.info("re-format attention.linear_qkv.weight")
- elif re.match(r"h\.\d+\.self_attention\.query_key_value\.bias", name):
- qkv_bias = data_torch.reshape((n_head, 3, n_embed // n_head))
- data_torch = torch.cat(
- (
- qkv_bias[:, 0, :].reshape((n_embed,)),
- qkv_bias[:, 1, :].reshape((n_embed,)),
- qkv_bias[:, 2, :].reshape((n_embed,)),
- ),
- dim=0,
- )
- logger.info("re-format attention.linear_qkv.bias")
-
- tensors.append((self.map_tensor_name(name), data_torch))
-
- if name == "word_embeddings.weight":
- assert self.tensor_names is not None
-
- # TODO: tie them at runtime, don't duplicate in the model file
- if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")):
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
-
- return tensors
-
-
-@Model.register("MPTForCausalLM")
-class MPTModel(Model):
- model_arch = gguf.MODEL_ARCH.MPT
-
- def set_vocab(self):
- try:
- self._set_vocab_gpt2()
- except Exception:
- # Fallback for SEA-LION model
- self._set_vocab_sentencepiece()
- self.gguf_writer.add_add_bos_token(False)
- self.gguf_writer.add_pad_token_id(3)
- self.gguf_writer.add_eos_token_id(1)
- self.gguf_writer.add_unk_token_id(0)
-
- def set_gguf_parameters(self):
- block_count = self.hparams["n_layers"]
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
- self.gguf_writer.add_embedding_length(self.hparams["d_model"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_feed_forward_length(4 * self.hparams["d_model"])
- self.gguf_writer.add_head_count(self.hparams["n_heads"])
- if kv_n_heads := self.hparams["attn_config"].get("kv_n_heads"):
- self.gguf_writer.add_head_count_kv(kv_n_heads)
- self.gguf_writer.add_layer_norm_eps(1e-5)
- if self.hparams["attn_config"]["clip_qkv"] is not None:
- self.gguf_writer.add_clamp_kqv(self.hparams["attn_config"]["clip_qkv"])
- if self.hparams["attn_config"]["alibi"]:
- self.gguf_writer.add_max_alibi_bias(self.hparams["attn_config"]["alibi_bias_max"])
- else:
- self.gguf_writer.add_max_alibi_bias(0.0)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- if "scales" in name:
- new_name = self.map_tensor_name(name, try_suffixes=(".weight", ".bias", ".scales"))
- new_name = new_name.replace("scales", "act.scales")
- else:
- new_name = self.map_tensor_name(name, try_suffixes=(".weight", ".bias"))
-
- return [(new_name, data_torch)]
-
-
-@Model.register("OrionForCausalLM")
-class OrionModel(Model):
- model_arch = gguf.MODEL_ARCH.ORION
-
- def set_vocab(self):
- self._set_vocab_sentencepiece()
-
- def set_gguf_parameters(self):
- block_count = self.hparams["num_hidden_layers"]
- head_count = self.hparams["num_attention_heads"]
- head_count_kv = self.hparams.get("num_key_value_heads", head_count)
- hf_repo = self.hparams.get("_name_or_path", "")
-
- ctx_length = 0
- if "max_sequence_length" in self.hparams:
- ctx_length = self.hparams["max_sequence_length"]
- elif "max_position_embeddings" in self.hparams:
- ctx_length = self.hparams["max_position_embeddings"]
- elif "model_max_length" in self.hparams:
- ctx_length = self.hparams["model_max_length"]
- else:
- raise ValueError("gguf: can not find ctx length parameter.")
-
- self.gguf_writer.add_file_type(self.ftype)
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_source_hf_repo(hf_repo)
- self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
- self.gguf_writer.add_context_length(ctx_length)
- self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
- self.gguf_writer.add_head_count(head_count)
- self.gguf_writer.add_head_count_kv(head_count_kv)
- # note: config provides rms norm but it is actually layer norm
- # ref: https://huggingface.co/OrionStarAI/Orion-14B-Chat/blob/276a17221ce42beb45f66fac657a41540e71f4f5/modeling_orion.py#L570-L571
- self.gguf_writer.add_layer_norm_eps(self.hparams["rms_norm_eps"])
-
-
-@Model.register("BaichuanForCausalLM", "BaiChuanForCausalLM")
-class BaichuanModel(Model):
- model_arch = gguf.MODEL_ARCH.BAICHUAN
-
- def set_vocab(self):
- self._set_vocab_sentencepiece()
-
- def set_gguf_parameters(self):
- block_count = self.hparams["num_hidden_layers"]
- head_count = self.hparams["num_attention_heads"]
- head_count_kv = self.hparams.get("num_key_value_heads", head_count)
- hf_repo = self.hparams.get("_name_or_path", "")
-
- ctx_length = 0
- if "max_sequence_length" in self.hparams:
- ctx_length = self.hparams["max_sequence_length"]
- elif "max_position_embeddings" in self.hparams:
- ctx_length = self.hparams["max_position_embeddings"]
- elif "model_max_length" in self.hparams:
- ctx_length = self.hparams["model_max_length"]
- else:
- raise ValueError("gguf: can not find ctx length parameter.")
-
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_source_hf_repo(hf_repo)
- self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
- self.gguf_writer.add_context_length(ctx_length)
- self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
- self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
- self.gguf_writer.add_head_count(head_count)
- self.gguf_writer.add_head_count_kv(head_count_kv)
- self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
- self.gguf_writer.add_file_type(self.ftype)
-
- if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
- if self.hparams["rope_scaling"].get("type") == "linear":
- self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
- self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- head_count = self.hparams["num_attention_heads"]
- head_count_kv = self.hparams.get("num_key_value_heads", head_count)
-
- tensors: list[tuple[str, Tensor]] = []
-
- if bid is not None and name == f"model.layers.{bid}.self_attn.W_pack.weight":
- logger.info(f"Unpacking and permuting layer {bid}")
- tensors = [
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid),
- self._reverse_hf_permute_part(data_torch, 0, head_count, head_count)),
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid),
- self._reverse_hf_permute_part(data_torch, 1, head_count, head_count_kv)),
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid),
- self._reverse_hf_part(data_torch, 2)),
- ]
- else:
- tensors = [(self.map_tensor_name(name), data_torch)]
-
- return tensors
-
- def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
- if n_kv_head is not None and n_head != n_kv_head:
- n_head //= n_kv_head
-
- return (
- weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
- .swapaxes(1, 2)
- .reshape(weights.shape)
- )
-
- def _reverse_hf_permute_part(
- self, weights: Tensor, n_part: int, n_head: int, n_head_kv: int | None = None,
- ) -> Tensor:
- r = weights.shape[0] // 3
- return self._reverse_hf_permute(weights[r * n_part:r * n_part + r, ...], n_head, n_head_kv)
-
- def _reverse_hf_part(self, weights: Tensor, n_part: int) -> Tensor:
- r = weights.shape[0] // 3
- return weights[r * n_part:r * n_part + r, ...]
-
-
-@Model.register("XverseForCausalLM")
-class XverseModel(Model):
- model_arch = gguf.MODEL_ARCH.XVERSE
-
- def set_vocab(self):
- assert (self.dir_model / "tokenizer.json").is_file()
- dir_model = self.dir_model
- hparams = self.hparams
-
- tokens: list[bytes] = []
- toktypes: list[int] = []
-
- from transformers import AutoTokenizer
- tokenizer = AutoTokenizer.from_pretrained(dir_model)
- vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
- # Since we are checking the maximum index, we need to ensure it's strictly less than vocab_size,
- # because vocab_size is the count of items, and indexes start at 0.
- max_vocab_index = max(tokenizer.get_vocab().values())
- if max_vocab_index >= vocab_size:
- raise ValueError("Vocabulary size exceeds expected maximum size.")
-
- reverse_vocab: dict[int, str] = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
- added_vocab = tokenizer.get_added_vocab()
-
- for token_id in range(vocab_size):
- token_text = reverse_vocab[token_id].encode('utf-8')
- # replace "\x00" to string with length > 0
- if token_text == b"\x00":
- toktype = gguf.TokenType.BYTE # special
- token_text = f"<{token_text}>".encode('utf-8')
- elif re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
- toktype = gguf.TokenType.BYTE # special
- elif reverse_vocab[token_id] in added_vocab:
- if tokenizer.added_tokens_decoder[token_id].special:
- toktype = gguf.TokenType.CONTROL
- else:
- toktype = gguf.TokenType.USER_DEFINED
- else:
- toktype = gguf.TokenType.NORMAL
-
- tokens.append(token_text)
- toktypes.append(toktype)
-
- self.gguf_writer.add_tokenizer_model("llama")
- self.gguf_writer.add_tokenizer_pre("default")
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_types(toktypes)
-
- special_vocab = gguf.SpecialVocab(dir_model, n_vocab=len(tokens))
- special_vocab.add_to_gguf(self.gguf_writer)
-
- def set_gguf_parameters(self):
- block_count = self.hparams["num_hidden_layers"]
- head_count = self.hparams["num_attention_heads"]
- head_count_kv = self.hparams.get("num_key_value_heads", head_count)
- hf_repo = self.hparams.get("_name_or_path", "")
-
- ctx_length = 0
- if "max_sequence_length" in self.hparams:
- ctx_length = self.hparams["max_sequence_length"]
- elif "max_position_embeddings" in self.hparams:
- ctx_length = self.hparams["max_position_embeddings"]
- elif "model_max_length" in self.hparams:
- ctx_length = self.hparams["model_max_length"]
- else:
- raise ValueError("gguf: can not find ctx length parameter.")
-
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_source_hf_repo(hf_repo)
- self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
- self.gguf_writer.add_context_length(ctx_length)
- self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
- self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
- self.gguf_writer.add_head_count(head_count)
- self.gguf_writer.add_head_count_kv(head_count_kv)
- self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
- self.gguf_writer.add_file_type(self.ftype)
-
- if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
- if self.hparams["rope_scaling"].get("type") == "linear":
- self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
- self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- head_count = self.hparams["num_attention_heads"]
- head_count_kv = self.hparams.get("num_key_value_heads", head_count)
-
- # HF models permute some of the tensors, so we need to undo that
- if name.endswith("q_proj.weight"):
- data_torch = self._reverse_hf_permute(data_torch, head_count, head_count)
- if name.endswith("k_proj.weight"):
- data_torch = self._reverse_hf_permute(data_torch, head_count, head_count_kv)
-
- return [(self.map_tensor_name(name), data_torch)]
-
- def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
- if n_kv_head is not None and n_head != n_kv_head:
- n_head //= n_kv_head
-
- return (
- weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
- .swapaxes(1, 2)
- .reshape(weights.shape)
- )
-
-
-@Model.register("FalconForCausalLM", "RWForCausalLM")
-class FalconModel(Model):
- model_arch = gguf.MODEL_ARCH.FALCON
-
- def set_gguf_parameters(self):
- block_count = self.hparams.get("num_hidden_layers")
- if block_count is None:
- block_count = self.hparams["n_layer"] # old name
-
- n_head = self.hparams.get("num_attention_heads")
- if n_head is None:
- n_head = self.hparams["n_head"] # old name
-
- n_head_kv = self.hparams.get("num_kv_heads")
- if n_head_kv is None:
- n_head_kv = self.hparams.get("n_head_kv", 1) # old name
-
- self.gguf_writer.add_name("Falcon")
- self.gguf_writer.add_context_length(2048) # not in config.json
- self.gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
- self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
- self.gguf_writer.add_feed_forward_length(4 * self.hparams["hidden_size"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_head_count(n_head)
- self.gguf_writer.add_head_count_kv(n_head_kv)
- self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_file_type(self.ftype)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- # QKV tensor transform
- # The original query_key_value tensor contains n_head_kv "kv groups",
- # each consisting of n_head/n_head_kv query weights followed by one key
- # and one value weight (shared by all query heads in the kv group).
- # This layout makes it a big pain to work with in GGML.
- # So we rearrange them here,, so that we have n_head query weights
- # followed by n_head_kv key weights followed by n_head_kv value weights,
- # in contiguous fashion.
- # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-ggml.py
-
- if "query_key_value" in name:
- n_head = self.find_hparam(["num_attention_heads", "n_head"])
- n_head_kv = self.find_hparam(["num_kv_heads", "n_head_kv"], optional=True) or 1
- head_dim = self.hparams["hidden_size"] // n_head
-
- qkv = data_torch.view(n_head_kv, n_head // n_head_kv + 2, head_dim, head_dim * n_head)
- q = qkv[:, :-2].reshape(n_head * head_dim, head_dim * n_head)
- k = qkv[:, [-2]].reshape(n_head_kv * head_dim, head_dim * n_head)
- v = qkv[:, [-1]].reshape(n_head_kv * head_dim, head_dim * n_head)
- data_torch = torch.cat((q, k, v)).reshape_as(data_torch)
-
- return [(self.map_tensor_name(name), data_torch)]
-
-
-@Model.register("GPTBigCodeForCausalLM")
-class StarCoderModel(Model):
- model_arch = gguf.MODEL_ARCH.STARCODER
-
- def set_gguf_parameters(self):
- block_count = self.hparams["n_layer"]
-
- self.gguf_writer.add_name("StarCoder")
- self.gguf_writer.add_context_length(self.hparams["n_positions"])
- self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
- self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_head_count(self.hparams["n_head"])
- self.gguf_writer.add_head_count_kv(1)
- self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_file_type(self.ftype)
-
-
-@Model.register("GPTRefactForCausalLM")
-class RefactModel(Model):
- model_arch = gguf.MODEL_ARCH.REFACT
-
- def set_vocab(self):
- super().set_vocab()
-
- # TODO: how to determine special FIM tokens automatically?
- special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False,
- special_token_types = ['prefix', 'suffix', 'middle', 'fsep', 'eot'])
- special_vocab._set_special_token("prefix", 1)
- special_vocab._set_special_token("suffix", 3)
- special_vocab._set_special_token("middle", 2)
- special_vocab._set_special_token("fsep", 4) # is this correct?
- special_vocab.add_to_gguf(self.gguf_writer)
-
- def set_gguf_parameters(self):
- hidden_dim = self.hparams["n_embd"]
- inner_dim = 4 * hidden_dim
- hidden_dim = int(2 * inner_dim / 3)
- multiple_of = 256
- ff_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
-
- block_count = self.hparams["n_layer"]
-
- self.gguf_writer.add_name("Refact")
- # refact uses Alibi. So this is from config.json which might be used by training.
- self.gguf_writer.add_context_length(self.hparams["n_positions"])
- self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
-
- self.gguf_writer.add_feed_forward_length(ff_dim)
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_head_count(self.hparams["n_head"])
- self.gguf_writer.add_head_count_kv(1)
- self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_file_type(self.ftype)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- hidden_dim = self.hparams["n_embd"]
- inner_dim = 4 * hidden_dim
- hidden_dim = int(2 * inner_dim / 3)
- multiple_of = 256
- ff_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
- n_head = self.hparams["n_head"]
- n_head_kv = 1
- head_dim = self.hparams["n_embd"] // n_head
-
- tensors: list[tuple[str, Tensor]] = []
-
- if bid is not None:
- if name == f"transformer.h.{bid}.attn.kv.weight":
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), data_torch[:n_head_kv * head_dim]))
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), data_torch[n_head_kv * head_dim:]))
- elif name == f"transformer.h.{bid}.attn.q.weight":
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), data_torch))
- elif name == f"transformer.h.{bid}.mlp.gate_up_proj.weight":
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), data_torch[:ff_dim]))
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), data_torch[ff_dim:]))
-
- if len(tensors) == 0:
- tensors.append((self.map_tensor_name(name), data_torch))
-
- return tensors
-
-
-@Model.register("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM")
-class StableLMModel(Model):
- model_arch = gguf.MODEL_ARCH.STABLELM
-
- def set_vocab(self):
- if (self.dir_model / "tokenizer.json").is_file():
- self._set_vocab_gpt2()
- else:
- # StableLM 2 1.6B uses a vocab in a similar format to Qwen's vocab
- self._set_vocab_qwen()
-
- def set_gguf_parameters(self):
- hparams = self.hparams
- block_count = hparams["num_hidden_layers"]
-
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
- self.gguf_writer.add_embedding_length(hparams["hidden_size"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
- rotary_factor = self.find_hparam(["partial_rotary_factor", "rope_pct"])
- self.gguf_writer.add_rope_dimension_count(int(rotary_factor * (hparams["hidden_size"] // hparams["num_attention_heads"])))
- self.gguf_writer.add_head_count(hparams["num_attention_heads"])
- self.gguf_writer.add_head_count_kv(hparams["num_key_value_heads"])
- self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True)
- self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))
- self.gguf_writer.add_file_type(self.ftype)
-
- _q_norms: list[dict[str, Tensor]] | None = None
- _k_norms: list[dict[str, Tensor]] | None = None
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- n_head = self.hparams["num_attention_heads"]
- n_kv_head = self.hparams["num_key_value_heads"]
-
- if name.find("q_layernorm.norms") != -1:
- assert bid is not None
-
- if self._q_norms is None:
- self._q_norms = [{} for _ in range(self.block_count)]
-
- self._q_norms[bid][name] = data_torch
-
- if len(self._q_norms[bid]) >= n_head:
- return self._stack_qk_norm(bid, n_head, self._q_norms[bid], "q_layernorm")
- else:
- return []
-
- if name.find("k_layernorm.norms") != -1:
- assert bid is not None
-
- if self._k_norms is None:
- self._k_norms = [{} for _ in range(self.block_count)]
-
- self._k_norms[bid][name] = data_torch
-
- if len(self._k_norms[bid]) >= n_kv_head:
- return self._stack_qk_norm(bid, n_kv_head, self._k_norms[bid], "k_layernorm")
- else:
- return []
-
- return [(self.map_tensor_name(name), data_torch)]
-
- def _stack_qk_norm(self, bid: int, n_head: int, norms: dict[str, Tensor], layer_name: str = "q_layernorm"):
- datas: list[Tensor] = []
- # extract the norms in order
- for xid in range(n_head):
- ename = f"model.layers.{bid}.self_attn.{layer_name}.norms.{xid}.weight"
- datas.append(norms[ename])
- del norms[ename]
- data_torch = torch.stack(datas, dim=0)
-
- merged_name = f"model.layers.{bid}.self_attn.{layer_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- return [(new_name, data_torch)]
-
- def write_tensors(self):
- super().write_tensors()
-
- if self._q_norms is not None or self._k_norms is not None:
- # flatten two `list[dict[str, Tensor]]` into a single `list[str]`
- norms = (
- [k for d in self._q_norms for k in d.keys()] if self._q_norms is not None else []
- ) + (
- [k for d in self._k_norms for k in d.keys()] if self._k_norms is not None else []
- )
- if len(norms) > 0:
- raise ValueError(f"Unprocessed norms: {norms}")
-
-
-@Model.register("LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
-class LlamaModel(Model):
- model_arch = gguf.MODEL_ARCH.LLAMA
-
- def set_vocab(self):
- try:
- self. _set_vocab_sentencepiece()
- except FileNotFoundError:
- try:
- self._set_vocab_llama_hf()
- except (FileNotFoundError, TypeError):
- # Llama 3
- self._set_vocab_gpt2()
-
- # Apply to CodeLlama only (and ignore for Llama 3 with a vocab size of 128256)
- if self.hparams.get("vocab_size", 32000) == 32016:
- special_vocab = gguf.SpecialVocab(
- self.dir_model, load_merges=False,
- special_token_types = ['prefix', 'suffix', 'middle', 'eot']
- )
- special_vocab._set_special_token("prefix", 32007)
- special_vocab._set_special_token("suffix", 32008)
- special_vocab._set_special_token("middle", 32009)
- special_vocab._set_special_token("eot", 32010)
- special_vocab.add_to_gguf(self.gguf_writer)
-
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- hparams = self.hparams
- self.gguf_writer.add_vocab_size(hparams["vocab_size"])
- self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
-
- if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
- if self.hparams["rope_scaling"].get("type") == "linear":
- self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
- self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
-
- tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
- if tokenizer_config_file.is_file():
- with open(tokenizer_config_file, "r", encoding="utf-8") as f:
- tokenizer_config_json = json.load(f)
- if "add_prefix_space" in tokenizer_config_json:
- self.gguf_writer.add_add_space_prefix(tokenizer_config_json["add_prefix_space"])
-
- # Apply to granite small models only
- if self.hparams.get("vocab_size", 32000) == 49152:
- self.gguf_writer.add_add_bos_token(False)
-
- @staticmethod
- def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
- if n_head_kv is not None and n_head != n_head_kv:
- n_head = n_head_kv
- return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
- .swapaxes(1, 2)
- .reshape(weights.shape))
-
- _experts: list[dict[str, Tensor]] | None = None
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- n_head = self.hparams["num_attention_heads"]
- n_kv_head = self.hparams.get("num_key_value_heads")
-
- if name.endswith(("q_proj.weight", "q_proj.bias")):
- data_torch = LlamaModel.permute(data_torch, n_head, n_head)
- if name.endswith(("k_proj.weight", "k_proj.bias")):
- data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
-
- # process the experts separately
- if name.find("block_sparse_moe.experts") != -1:
- n_experts = self.hparams["num_local_experts"]
-
- assert bid is not None
-
- if self._experts is None:
- self._experts = [{} for _ in range(self.block_count)]
-
- self._experts[bid][name] = data_torch
-
- if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
- # merge the experts into a single 3d tensor
- for wid in ["w1", "w2", "w3"]:
- datas: list[Tensor] = []
-
- for xid in range(n_experts):
- ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.{wid}.weight"
- datas.append(self._experts[bid][ename])
- del self._experts[bid][ename]
-
- data_torch = torch.stack(datas, dim=0)
-
- merged_name = f"layers.{bid}.feed_forward.experts.{wid}.weight"
-
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
- else:
- return []
-
- return [(self.map_tensor_name(name), data_torch)]
-
- def write_tensors(self):
- super().write_tensors()
-
- if self._experts is not None:
- # flatten `list[dict[str, Tensor]]` into `list[str]`
- experts = [k for d in self._experts for k in d.keys()]
- if len(experts) > 0:
- raise ValueError(f"Unprocessed experts: {experts}")
-
-
-@Model.register("BitnetForCausalLM")
-class BitnetModel(Model):
- model_arch = gguf.MODEL_ARCH.BITNET
-
- def set_vocab(self):
- self._set_vocab_sentencepiece()
-
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
- self.gguf_writer.add_rope_scaling_factor(1.0)
-
- def weight_quant(self, weight):
- dtype = weight.dtype
- weight = weight.float()
- s = 1 / weight.abs().mean().clamp(min=1e-5)
- weight = (weight * s).round().clamp(-1, 1) / s
- scale = weight.abs().max().unsqueeze(0)
- weight = torch.where(weight.abs().less(1e-6), 0, weight).type(dtype)
- weight = torch.sign(weight).type(dtype)
- return weight.type(dtype), scale.type(torch.float32)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- new_name = self.map_tensor_name(name)
-
- if any(self.match_model_tensor_name(new_name, key, bid) for key in [
- gguf.MODEL_TENSOR.ATTN_Q,
- gguf.MODEL_TENSOR.ATTN_K,
- gguf.MODEL_TENSOR.ATTN_V,
- gguf.MODEL_TENSOR.ATTN_OUT,
- gguf.MODEL_TENSOR.FFN_UP,
- gguf.MODEL_TENSOR.FFN_DOWN,
- gguf.MODEL_TENSOR.FFN_GATE,
- ]):
- # transform weight into 1/0/-1 (in fp32)
- weight_torch, scale_torch = self.weight_quant(data_torch)
- yield (new_name, weight_torch)
- yield (new_name.removesuffix(".weight") + ".scale", scale_torch)
- else:
- yield (new_name, data_torch)
-
-
-@Model.register("GrokForCausalLM")
-class GrokModel(Model):
- model_arch = gguf.MODEL_ARCH.GROK
-
- def set_vocab(self):
- self._set_vocab_sentencepiece()
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- self.gguf_writer.add_name("Grok")
-
- _experts: list[dict[str, Tensor]] | None = None
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- # process the experts separately
- if name.find(".moe.") != -1:
- n_experts = self.hparams["num_local_experts"]
-
- assert bid is not None
-
- if self._experts is None:
- self._experts = [{} for _ in range(self.block_count)]
-
- self._experts[bid][name] = data_torch
-
- if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
- # merge the experts into a single 3d tensor
- for wid in ["linear", "linear_1", "linear_v"]:
- datas: list[Tensor] = []
-
- for xid in range(n_experts):
- ename = f"transformer.decoder_layer.{bid}.moe.{xid}.{wid}.weight"
- datas.append(self._experts[bid][ename])
- del self._experts[bid][ename]
-
- data_torch = torch.stack(datas, dim=0)
-
- merged_name = f"transformer.decoder_layer.{bid}.moe.{wid}.weight"
-
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
- else:
- return []
-
- return [(self.map_tensor_name(name), data_torch)]
-
-
-@Model.register("DbrxForCausalLM")
-class DbrxModel(Model):
- model_arch = gguf.MODEL_ARCH.DBRX
-
- def set_gguf_parameters(self):
- ffn_config = self.hparams["ffn_config"]
- attn_config = self.hparams["attn_config"]
- self.gguf_writer.add_name(self.hparams["model_type"])
- self.gguf_writer.add_block_count(self.hparams["n_layers"])
-
- self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
- self.gguf_writer.add_embedding_length(self.hparams["d_model"])
- self.gguf_writer.add_feed_forward_length(ffn_config["ffn_hidden_size"])
-
- self.gguf_writer.add_head_count(self.hparams["n_heads"])
- self.gguf_writer.add_head_count_kv(attn_config["kv_n_heads"])
-
- self.gguf_writer.add_rope_freq_base(attn_config["rope_theta"])
-
- self.gguf_writer.add_clamp_kqv(attn_config["clip_qkv"])
- self.gguf_writer.add_file_type(self.ftype)
-
- self.gguf_writer.add_expert_count(ffn_config["moe_num_experts"])
- self.gguf_writer.add_expert_used_count(ffn_config["moe_top_k"])
-
- self.gguf_writer.add_layer_norm_eps(1e-5)
-
- self.gguf_writer.add_file_type(self.ftype)
- logger.info(f"gguf: file type = {self.ftype}")
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- n_expert = self.hparams["ffn_config"]["moe_num_experts"]
- n_ff = self.hparams["ffn_config"]["ffn_hidden_size"]
- n_embd = self.hparams["d_model"]
-
- # Specific behavior for experts tensors: suffix .weight, view as 3D and transpose
- # original implementation expects (n_expert, n_ff, n_embd) for all experts weights
- # But llama.cpp moe graph works differently
- # AND the dimensions in ggml are typically in the reverse order of the pytorch dimensions
- # so (n_expert, n_ff, n_embd) in pytorch is {n_embd, n_ff, n_expert} in ggml_tensor
- exp_tensor_names = {"ffn.experts.mlp.w1": None, # LLM_TENSOR_FFN_GATE_EXPS ggml_tensor->ne{n_embd, n_ff, n_expert}
- "ffn.experts.mlp.w2": (0, 2, 1), # LLM_TENSOR_FFN_DOWN_EXPS ggml_tensor->ne{n_ff, n_embd, n_expert}
- "ffn.experts.mlp.v1": None} # LLM_TENSOR_FFN_UP_EXPS ggml_tensor->ne{n_embd, n_ff, n_expert}
- experts = False
-
- for exp_tensor_name in exp_tensor_names.keys():
- if name.find(exp_tensor_name) != -1 and name.find(".weight") == -1:
- experts = True
- data_torch = data_torch.view(n_expert, n_ff, n_embd)
- if (permute_tensor := exp_tensor_names[exp_tensor_name]) is not None:
- data_torch = data_torch.permute(*permute_tensor)
- break
-
- # map tensor names
- # In MoE models the ffn tensors are typically most of the model weights,
- # and need to be quantizable. Quantize expects tensor names to be suffixed by .weight.
- # Every other model has the weight names ending in .weight,
- # let's assume that is the convention which is not the case for dbrx:
- # https://huggingface.co/databricks/dbrx-instruct/blob/main/model.safetensors.index.json#L15
- new_name = self.map_tensor_name(name if not experts else name + ".weight", try_suffixes=(".weight",))
-
- return [(new_name, data_torch)]
-
- def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
- del name, new_name, bid # unused
-
- return n_dims > 1
-
-
-@Model.register("MiniCPMForCausalLM")
-class MiniCPMModel(Model):
- model_arch = gguf.MODEL_ARCH.MINICPM
-
- def set_gguf_parameters(self):
- block_count = self.hparams["num_hidden_layers"]
- self.gguf_writer.add_name("MiniCPM")
- self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
- self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
- self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
- self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
- self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
- self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
- self.gguf_writer.add_file_type(self.ftype)
-
- def set_vocab(self):
- self._set_vocab_llama_hf()
-
- def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
- if n_kv_head is not None and n_head != n_kv_head:
- n_head //= n_kv_head
-
- return (
- weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
- .swapaxes(1, 2)
- .reshape(weights.shape)
- )
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- n_head = self.hparams["num_attention_heads"]
- n_kv_head = self.hparams.get("num_key_value_heads")
-
- # HF models permute some of the tensors, so we need to undo that
- if name.endswith(("q_proj.weight")):
- data_torch = self._reverse_hf_permute(data_torch, n_head, n_head)
- if name.endswith(("k_proj.weight")):
- data_torch = self._reverse_hf_permute(data_torch, n_head, n_kv_head)
-
- return [(self.map_tensor_name(name), data_torch)]
-
-
-@Model.register("QWenLMHeadModel")
-class QwenModel(Model):
- model_arch = gguf.MODEL_ARCH.QWEN
-
- @staticmethod
- def token_bytes_to_string(b):
- from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
- byte_encoder = bytes_to_unicode()
- return ''.join([byte_encoder[ord(char)] for char in b.decode('latin-1')])
-
- @staticmethod
- def bpe(mergeable_ranks: dict[bytes, int], token: bytes, max_rank: int | None = None) -> list[bytes]:
- parts = [bytes([b]) for b in token]
- while True:
- min_idx = None
- min_rank = None
- for i, pair in enumerate(zip(parts[:-1], parts[1:])):
- rank = mergeable_ranks.get(pair[0] + pair[1])
- if rank is not None and (min_rank is None or rank < min_rank):
- min_idx = i
- min_rank = rank
- if min_rank is None or (max_rank is not None and min_rank >= max_rank):
- break
- assert min_idx is not None
- parts = parts[:min_idx] + [parts[min_idx] + parts[min_idx + 1]] + parts[min_idx + 2:]
- return parts
-
- def set_vocab(self):
- self._set_vocab_qwen()
-
- def set_gguf_parameters(self):
- self.gguf_writer.add_name("Qwen")
- self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
- self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"])
- self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
- self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
- self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
- self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
- self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
- self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_file_type(self.ftype)
-
-
-@Model.register("Qwen2ForCausalLM")
-class Qwen2Model(Model):
- model_arch = gguf.MODEL_ARCH.QWEN2
-
- def set_vocab(self):
- try:
- self._set_vocab_sentencepiece()
- except FileNotFoundError:
- self._set_vocab_gpt2()
-
-
-@Model.register("Qwen2MoeForCausalLM")
-class Qwen2MoeModel(Model):
- model_arch = gguf.MODEL_ARCH.QWEN2MOE
-
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- if (n_experts := self.hparams.get("num_experts")) is not None:
- self.gguf_writer.add_expert_count(n_experts)
- if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None:
- self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
- logger.info(f"gguf: expert feed forward length = {moe_intermediate_size}")
- if (shared_expert_intermediate_size := self.hparams.get('shared_expert_intermediate_size')) is not None:
- self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size)
- logger.info(f"gguf: expert shared feed forward length = {shared_expert_intermediate_size}")
-
- _experts: list[dict[str, Tensor]] | None = None
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- # process the experts separately
- if name.find("experts") != -1:
- n_experts = self.hparams["num_experts"]
- assert bid is not None
-
- if self._experts is None:
- self._experts = [{} for _ in range(self.block_count)]
-
- self._experts[bid][name] = data_torch
-
- if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
- # merge the experts into a single 3d tensor
- for w_name in ["down_proj", "gate_proj", "up_proj"]:
- datas: list[Tensor] = []
-
- for xid in range(n_experts):
- ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
- datas.append(self._experts[bid][ename])
- del self._experts[bid][ename]
-
- data_torch = torch.stack(datas, dim=0)
-
- merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
-
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
- else:
- return []
-
- return [(self.map_tensor_name(name), data_torch)]
-
- def write_tensors(self):
- super().write_tensors()
-
- if self._experts is not None:
- # flatten `list[dict[str, Tensor]]` into `list[str]`
- experts = [k for d in self._experts for k in d.keys()]
- if len(experts) > 0:
- raise ValueError(f"Unprocessed experts: {experts}")
-
-
-@Model.register("GPT2LMHeadModel")
-class GPT2Model(Model):
- model_arch = gguf.MODEL_ARCH.GPT2
-
- def set_gguf_parameters(self):
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_block_count(self.hparams["n_layer"])
- self.gguf_writer.add_context_length(self.hparams["n_ctx"])
- self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
- self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
- self.gguf_writer.add_head_count(self.hparams["n_head"])
- self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_file_type(self.ftype)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- tensors: list[tuple[str, Tensor]] = []
-
- # we don't need these
- if name.endswith((".attn.bias", ".attn.masked_bias")):
- return tensors
-
- if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_proj.weight")):
- data_torch = data_torch.transpose(1, 0)
-
- new_name = self.map_tensor_name(name)
-
- tensors.append((new_name, data_torch))
-
- # note: GPT2 output is tied to (same as) wte in original model
- if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
-
- return tensors
-
-
-@Model.register("PhiForCausalLM")
-class Phi2Model(Model):
- model_arch = gguf.MODEL_ARCH.PHI2
-
- def set_gguf_parameters(self):
- block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
-
- rot_pct = self.find_hparam(["partial_rotary_factor"])
- n_embd = self.find_hparam(["hidden_size", "n_embd"])
- n_head = self.find_hparam(["num_attention_heads", "n_head"])
-
- self.gguf_writer.add_name("Phi2")
- self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
-
- self.gguf_writer.add_embedding_length(n_embd)
- self.gguf_writer.add_feed_forward_length(4 * n_embd)
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_head_count(n_head)
- self.gguf_writer.add_head_count_kv(n_head)
- self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_epsilon", "layer_norm_eps"]))
- self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
- self.gguf_writer.add_file_type(self.ftype)
- self.gguf_writer.add_add_bos_token(False)
-
-
-@Model.register("Phi3ForCausalLM")
-class Phi3MiniModel(Model):
- model_arch = gguf.MODEL_ARCH.PHI3
-
- def set_vocab(self):
- from sentencepiece import SentencePieceProcessor
-
- tokenizer_path = self.dir_model / 'tokenizer.model'
-
- if not tokenizer_path.is_file():
- raise ValueError(f'Error: Missing {tokenizer_path}')
-
- tokenizer = SentencePieceProcessor()
- tokenizer.LoadFromFile(str(tokenizer_path))
-
- vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
-
- tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
- scores: list[float] = [-10000.0] * vocab_size
- toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
-
- for token_id in range(tokenizer.vocab_size()):
-
- piece = tokenizer.IdToPiece(token_id)
- text = piece.encode("utf-8")
- score = tokenizer.GetScore(token_id)
-
- toktype = SentencePieceTokenTypes.NORMAL
- if tokenizer.IsUnknown(token_id):
- toktype = SentencePieceTokenTypes.UNKNOWN
- elif tokenizer.IsControl(token_id):
- toktype = SentencePieceTokenTypes.CONTROL
- elif tokenizer.IsUnused(token_id):
- toktype = SentencePieceTokenTypes.UNUSED
- elif tokenizer.IsByte(token_id):
- toktype = SentencePieceTokenTypes.BYTE
-
- tokens[token_id] = text
- scores[token_id] = score
- toktypes[token_id] = toktype
-
- added_tokens_file = self.dir_model / 'added_tokens.json'
- if added_tokens_file.is_file():
- with open(added_tokens_file, "r", encoding="utf-8") as f:
- added_tokens_json = json.load(f)
-
- for key in added_tokens_json:
- token_id = added_tokens_json[key]
- if (token_id >= vocab_size):
- logger.debug(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
- continue
-
- tokens[token_id] = key.encode("utf-8")
- scores[token_id] = -1000.0
- toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
-
- tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
- if tokenizer_config_file.is_file():
- with open(tokenizer_config_file, "r", encoding="utf-8") as f:
- tokenizer_config_json = json.load(f)
- added_tokens_decoder = tokenizer_config_json.get("added_tokens_decoder", {})
- for token_id, foken_data in added_tokens_decoder.items():
- token_id = int(token_id)
- token = foken_data["content"].encode("utf-8")
- if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN:
- assert tokens[token_id] == token
- tokens[token_id] = token
- scores[token_id] = -1000.0
- toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
- if foken_data.get("special"):
- toktypes[token_id] = SentencePieceTokenTypes.CONTROL
-
- tokenizer_file = self.dir_model / 'tokenizer.json'
- if tokenizer_file.is_file():
- with open(tokenizer_file, "r", encoding="utf-8") as f:
- tokenizer_json = json.load(f)
- added_tokens = tokenizer_json.get("added_tokens", [])
- for foken_data in added_tokens:
- token_id = int(foken_data["id"])
- token = foken_data["content"].encode("utf-8")
- if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN:
- assert tokens[token_id] == token
- tokens[token_id] = token
- scores[token_id] = -1000.0
- toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
- if foken_data.get("special"):
- toktypes[token_id] = SentencePieceTokenTypes.CONTROL
-
- self.gguf_writer.add_tokenizer_model("llama")
- self.gguf_writer.add_tokenizer_pre("default")
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_scores(scores)
- self.gguf_writer.add_token_types(toktypes)
-
- special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
- special_vocab.add_to_gguf(self.gguf_writer)
-
- def set_gguf_parameters(self):
- block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
-
- n_embd = self.find_hparam(["hidden_size", "n_embd"])
- n_head = self.find_hparam(["num_attention_heads", "n_head"])
- n_head_kv = self.find_hparam(["num_key_value_heads", "n_head_kv"])
- rms_eps = self.find_hparam(["rms_norm_eps"])
- max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
- orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
- rope_dims = n_embd // n_head
-
- self.gguf_writer.add_name("Phi3")
- self.gguf_writer.add_context_length(max_pos_embds)
- self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds)
- self.gguf_writer.add_embedding_length(n_embd)
- self.gguf_writer.add_feed_forward_length(self.find_hparam(["intermediate_size"]))
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_head_count(n_head)
- self.gguf_writer.add_head_count_kv(n_head_kv)
- self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
- self.gguf_writer.add_rope_dimension_count(rope_dims)
- self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
- self.gguf_writer.add_file_type(self.ftype)
-
- # write rope scaling for long context (128k) model
- rope_scaling = self.find_hparam(['rope_scaling'], True)
- if (rope_scaling is None):
- return
-
- scale = max_pos_embds / orig_max_pos_embds
-
- rope_scaling_type = rope_scaling.get('type', '').lower()
- if len(rope_scaling_type) == 0:
- raise KeyError('Missing the required key rope_scaling.type')
-
- if rope_scaling_type == 'su' or rope_scaling_type == 'longrope':
- attn_factor = math.sqrt(1 + math.log(scale) / math.log(orig_max_pos_embds)) if scale > 1.0 else 1.0
- elif rope_scaling_type == 'yarn':
- attn_factor = 0.1 * math.log(scale) + 1.0 if scale > 1.0 else 1.0
- else:
- raise NotImplementedError(f'The rope scaling type {rope_scaling_type} is not supported yet')
-
- self.gguf_writer.add_rope_scaling_attn_factors(attn_factor)
-
- long_factors = rope_scaling.get('long_factor', None)
- short_factors = rope_scaling.get('short_factor', None)
-
- if long_factors is None or short_factors is None:
- raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
-
- if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
- raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
-
- self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
- self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))
-
-
-@Model.register("PlamoForCausalLM")
-class PlamoModel(Model):
- model_arch = gguf.MODEL_ARCH.PLAMO
-
- def set_vocab(self):
- self._set_vocab_sentencepiece()
-
- def set_gguf_parameters(self):
- hparams = self.hparams
- block_count = hparams["num_hidden_layers"]
-
- self.gguf_writer.add_name("PLaMo")
- self.gguf_writer.add_context_length(4096) # not in config.json
- self.gguf_writer.add_embedding_length(hparams["hidden_size"])
- self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_head_count(hparams["num_attention_heads"])
- self.gguf_writer.add_head_count_kv(5) # hparams["num_key_value_heads"]) is wrong
- self.gguf_writer.add_layer_norm_rms_eps(hparams["rms_norm_eps"])
- self.gguf_writer.add_file_type(self.ftype)
-
- def shuffle_attn_q_weight(self, data_torch):
- assert data_torch.size() == (5120, 5120)
- data_torch = data_torch.reshape(8, 5, 128, 5120)
- data_torch = torch.permute(data_torch, (1, 0, 2, 3))
- data_torch = torch.reshape(data_torch, (5120, 5120))
- return data_torch
-
- def shuffle_attn_output_weight(self, data_torch):
- assert data_torch.size() == (5120, 5120)
- data_torch = data_torch.reshape(5120, 8, 5, 128)
- data_torch = torch.permute(data_torch, (0, 2, 1, 3))
- data_torch = torch.reshape(data_torch, (5120, 5120))
- return data_torch
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- new_name = self.map_tensor_name(name)
-
- # shuffle for broadcasting of gqa in ggml_mul_mat
- if new_name.endswith("attn_q.weight"):
- data_torch = self.shuffle_attn_q_weight(data_torch)
- elif new_name.endswith("attn_output.weight"):
- data_torch = self.shuffle_attn_output_weight(data_torch)
-
- return [(new_name, data_torch)]
-
-
-@Model.register("CodeShellForCausalLM")
-class CodeShellModel(Model):
- model_arch = gguf.MODEL_ARCH.CODESHELL
-
- def set_gguf_parameters(self):
- block_count = self.hparams["n_layer"]
-
- self.gguf_writer.add_name("CodeShell")
- self.gguf_writer.add_context_length(self.hparams["n_positions"])
- self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
- self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_head_count(self.hparams["n_head"])
- self.gguf_writer.add_head_count_kv(self.hparams["num_query_groups"])
- self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_file_type(self.ftype)
- self.gguf_writer.add_rope_freq_base(10000.0)
- self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
- self.gguf_writer.add_rope_scaling_factor(1.0)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- new_name = self.map_tensor_name(name)
-
- tensors: list[tuple[str, Tensor]] = [(new_name, data_torch)]
-
- if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
- assert self.tensor_names is not None
-
- if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")):
- # copy tok_embd.weight to output.weight
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
-
- return tensors
-
-
-@Model.register("InternLM2ForCausalLM")
-class InternLM2Model(Model):
- model_arch = gguf.MODEL_ARCH.INTERNLM2
-
- def set_vocab(self):
- # (TODO): Is there a better way?
- # Copy from _set_vocab_sentencepiece, The only difference is that we will treat the character
- # \x00 specially and convert it into an emoji character to prevent it from being mistakenly
- # recognized as an empty string in C++.
- from sentencepiece import SentencePieceProcessor
- from sentencepiece import sentencepiece_model_pb2 as model
-
- tokenizer_path = self.dir_model / 'tokenizer.model'
-
- tokens: list[bytes] = []
- scores: list[float] = []
- toktypes: list[int] = []
-
- if not tokenizer_path.is_file():
- logger.error(f'Error: Missing {tokenizer_path}')
- sys.exit(1)
-
- sentencepiece_model = model.ModelProto()
- sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
- add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
-
- tokenizer = SentencePieceProcessor()
- tokenizer.LoadFromFile(str(tokenizer_path))
-
- vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
-
- for token_id in range(vocab_size):
- piece = tokenizer.IdToPiece(token_id)
- text = piece.encode("utf-8")
- score = tokenizer.GetScore(token_id)
- if text == b"\x00":
- # (TODO): fixme
- # Hack here and replace the \x00 characters.
- logger.warning(f"InternLM2 convert token '{text}' to '🐉'!")
- text = "🐉".encode("utf-8")
-
- toktype = SentencePieceTokenTypes.NORMAL
- if tokenizer.IsUnknown(token_id):
- toktype = SentencePieceTokenTypes.UNKNOWN
- elif tokenizer.IsControl(token_id):
- toktype = SentencePieceTokenTypes.CONTROL
- elif tokenizer.IsUnused(token_id):
- toktype = SentencePieceTokenTypes.UNUSED
- elif tokenizer.IsByte(token_id):
- toktype = SentencePieceTokenTypes.BYTE
-
- tokens.append(text)
- scores.append(score)
- toktypes.append(toktype)
-
- added_tokens_file = self.dir_model / 'added_tokens.json'
- if added_tokens_file.is_file():
- with open(added_tokens_file, "r", encoding="utf-8") as f:
- added_tokens_json = json.load(f)
-
- for key in added_tokens_json:
- tokens.append(key.encode("utf-8"))
- scores.append(-1000.0)
- toktypes.append(SentencePieceTokenTypes.USER_DEFINED)
-
- self.gguf_writer.add_tokenizer_model("llama")
- self.gguf_writer.add_tokenizer_pre("default")
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_scores(scores)
- self.gguf_writer.add_token_types(toktypes)
- self.gguf_writer.add_add_space_prefix(add_prefix)
-
- special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
- old_eos = special_vocab.special_token_ids["eos"]
- if "chat" in os.path.basename(self.dir_model.absolute()):
- # For the chat model, we replace the eos with '<|im_end|>'.
- # TODO: this is a hack, should be fixed
- # https://github.com/ggerganov/llama.cpp/pull/6745#issuecomment-2067687048
- special_vocab.special_token_ids["eos"] = self._try_get_sft_eos(tokenizer)
- logger.warning(f"Replace eos:{old_eos} with a special token:{special_vocab.special_token_ids['eos']} \
-in chat mode so that the conversation can end normally.")
-
- special_vocab.add_to_gguf(self.gguf_writer)
-
- def _try_get_sft_eos(self, tokenizer):
- unused_145_list = tokenizer.Encode('[UNUSED_TOKEN_145]')
- im_end_list = tokenizer.Encode('<|im_end|>')
- eos_token = None
- assert (len(unused_145_list) == 1) ^ (len(im_end_list) == 1)
- if len(unused_145_list) == 1:
- eos_token = unused_145_list[0]
- if len(im_end_list) == 1:
- eos_token = im_end_list[0]
- assert eos_token
- return eos_token
-
- def _hf_permute_qk(self, weights, n_head: int, n_head_kv: int):
- if n_head_kv is not None and n_head != n_head_kv:
- n_head = n_head_kv
- return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
- .swapaxes(1, 2)
- .reshape(weights.shape))
-
- def set_gguf_parameters(self):
- self.gguf_writer.add_name("InternLM2")
- self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
- self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"])
- self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
- self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
- self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
- self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
- self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
- self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
- self.gguf_writer.add_file_type(self.ftype)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- num_heads = self.hparams["num_attention_heads"]
- num_kv_heads = self.hparams["num_key_value_heads"]
- hidden_size = self.hparams["hidden_size"]
- q_per_kv = num_heads // num_kv_heads
- head_dim = hidden_size // num_heads
- num_groups = num_heads // q_per_kv
-
- qkv_pattern = r"model\.layers\.(\d+)\.attention\.wqkv"
-
- if re.match(qkv_pattern, name):
- bid = re.findall(qkv_pattern, name)[0]
- qkv = data_torch
- # qkv = rearrange(qkv.T, " o (g n i) ->o g n i", g=num_groups, n=q_per_kv + 2, i=head_dim)
- qkv = qkv.T.reshape((-1, num_groups, q_per_kv + 2, head_dim))
- q, k, v = qkv[..., : q_per_kv, :], qkv[..., q_per_kv: q_per_kv + 1, :], qkv[..., q_per_kv + 1: q_per_kv + 2, :]
- # The model weights of q and k equire additional reshape.
- # q = self._hf_permute_qk(rearrange(q, " o g n i -> o (g n i)").T, num_heads, num_heads)
- q = self._hf_permute_qk(q.reshape((q.shape[0], -1)).T, num_heads, num_heads)
- # k = self._hf_permute_qk(rearrange(k, " o g n i -> o (g n i)").T, num_heads, num_kv_heads)
- k = self._hf_permute_qk(k.reshape((k.shape[0], -1)).T, num_heads, num_kv_heads)
- # v = rearrange(v, " o g n i -> o (g n i)").T
- v = v.reshape((v.shape[0], -1)).T
- return [
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), q),
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), k),
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), v),
- ]
- else:
- return [(self.map_tensor_name(name), data_torch)]
-
-
-@Model.register("BertModel", "CamembertModel")
-class BertModel(Model):
- model_arch = gguf.MODEL_ARCH.BERT
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.vocab_size = None
-
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- self.gguf_writer.add_causal_attention(False)
-
- # get pooling path
- pooling_path = None
- module_path = self.dir_model / "modules.json"
- if module_path.is_file():
- with open(module_path, encoding="utf-8") as f:
- modules = json.load(f)
- for mod in modules:
- if mod["type"] == "sentence_transformers.models.Pooling":
- pooling_path = mod["path"]
- break
-
- # get pooling type
- if pooling_path is not None:
- with open(self.dir_model / pooling_path / "config.json", encoding="utf-8") as f:
- pooling = json.load(f)
- if pooling["pooling_mode_mean_tokens"]:
- pooling_type = gguf.PoolingType.MEAN
- elif pooling["pooling_mode_cls_token"]:
- pooling_type = gguf.PoolingType.CLS
- else:
- raise NotImplementedError("Only MEAN and CLS pooling types supported")
- self.gguf_writer.add_pooling_type(pooling_type)
-
- def set_vocab(self):
- tokens, toktypes, tokpre = self.get_vocab_base()
- self.vocab_size = len(tokens)
-
- # we need this to validate the size of the token_type embeddings
- # though currently we are passing all zeros to the token_type embeddings
- self.gguf_writer.add_token_type_count(2) # "Sequence A" or "Sequence B"
-
- # convert to phantom space vocab
- def phantom(tok):
- if tok.startswith("[") and tok.endswith("]"):
- return tok
- if tok.startswith("##"):
- return tok[2:]
- return "\u2581" + tok
- tokens = list(map(phantom, tokens))
-
- # add vocab to gguf
- self.gguf_writer.add_tokenizer_model("bert")
- self.gguf_writer.add_tokenizer_pre(tokpre)
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_types(toktypes)
-
- # handle special tokens
- special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
- special_vocab.add_to_gguf(self.gguf_writer)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- # we are only using BERT for embeddings so we don't need the pooling layer
- if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias"):
- return [] # we don't need these
-
- return [(self.map_tensor_name(name), data_torch)]
-
-
-@Model.register("NomicBertModel")
-class NomicBertModel(BertModel):
- model_arch = gguf.MODEL_ARCH.NOMIC_BERT
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- # the HF config claims n_ctx=8192, but it uses RoPE scaling
- self.hparams["n_ctx"] = 2048
-
- # SwigLU activation
- assert self.hparams["activation_function"] == "swiglu"
- # this doesn't do anything in the HF version
- assert self.hparams["causal"] is False
- # no bias tensors
- assert self.hparams["qkv_proj_bias"] is False
- assert self.hparams["mlp_fc1_bias"] is False
- assert self.hparams["mlp_fc2_bias"] is False
- # norm at end of layer
- assert self.hparams["prenorm"] is False
- # standard RoPE
- assert self.hparams["rotary_emb_fraction"] == 1.0
- assert self.hparams["rotary_emb_interleaved"] is False
- assert self.hparams["rotary_emb_scale_base"] is None
-
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
-
-
-@Model.register("GemmaForCausalLM")
-class GemmaModel(Model):
- model_arch = gguf.MODEL_ARCH.GEMMA
-
- def set_vocab(self):
- self._set_vocab_sentencepiece()
-
- # TODO: these special tokens should be exported only for the CodeGemma family
- special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False,
- special_token_types = ['prefix', 'suffix', 'middle', 'fsep', 'eot'])
- special_vocab._set_special_token("prefix", 67)
- special_vocab._set_special_token("suffix", 69)
- special_vocab._set_special_token("middle", 68)
- special_vocab._set_special_token("fsep", 70)
- special_vocab._set_special_token("eot", 107)
- special_vocab.add_to_gguf(self.gguf_writer)
-
- self.gguf_writer.add_add_space_prefix(False)
-
- def set_gguf_parameters(self):
- hparams = self.hparams
- block_count = hparams["num_hidden_layers"]
-
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
- self.gguf_writer.add_embedding_length(hparams["hidden_size"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
- self.gguf_writer.add_head_count(hparams["num_attention_heads"])
- self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"])
- self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
- self.gguf_writer.add_key_length(hparams["head_dim"])
- self.gguf_writer.add_value_length(hparams["head_dim"])
- self.gguf_writer.add_file_type(self.ftype)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- # lm_head is not used in llama.cpp, while autoawq will include this tensor in model
- # To prevent errors, skip loading lm_head.weight.
- if name == "lm_head.weight":
- logger.debug(f"Skipping get tensor {name!r} in safetensors so that convert can end normally.")
- return []
-
- # ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
- if name.endswith("norm.weight"):
- data_torch = data_torch + 1
-
- return [(self.map_tensor_name(name), data_torch)]
-
-
-@Model.register("Gemma2ForCausalLM")
-class Gemma2Model(Model):
- model_arch = gguf.MODEL_ARCH.GEMMA2
-
- def set_vocab(self):
- tokens, scores, toktypes = self._create_vocab_sentencepiece()
- # hack: This is required so that we can properly use start/end-of-turn for chat template
- for i in range(108):
- # including <unusedX>, <start_of_turn>, <end_of_turn>
- toktypes[i] = SentencePieceTokenTypes.CONTROL
- self.gguf_writer.add_tokenizer_model("llama")
- self.gguf_writer.add_tokenizer_pre("default")
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_scores(scores)
- self.gguf_writer.add_token_types(toktypes)
-
- special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
- special_vocab.add_to_gguf(self.gguf_writer)
-
- self.gguf_writer.add_add_space_prefix(False)
-
- def set_gguf_parameters(self):
- hparams = self.hparams
- block_count = hparams["num_hidden_layers"]
-
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
- self.gguf_writer.add_embedding_length(hparams["hidden_size"])
- self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
- self.gguf_writer.add_head_count(hparams["num_attention_heads"])
- self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"])
- self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
- self.gguf_writer.add_key_length(hparams["head_dim"])
- self.gguf_writer.add_value_length(hparams["head_dim"])
- self.gguf_writer.add_file_type(self.ftype)
- self.gguf_writer.add_attn_logit_softcapping(
- self.hparams["attn_logit_softcapping"]
- )
- self.gguf_writer.add_final_logit_softcapping(
- self.hparams["final_logit_softcapping"]
- )
- self.gguf_writer.add_sliding_window(self.hparams["sliding_window"])
-
- # sanity check
- attn_scalar = self.hparams["query_pre_attn_scalar"]
- if attn_scalar != hparams["hidden_size"] / hparams["num_attention_heads"]:
- raise ValueError("query_pre_attn_scalar must be equal to n_embd / n_head")
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unusem
-
- # lm_head is not used in llama.cpp, while autoawq will include this tensor in model
- # To prevent errors, skip loading lm_head.weight.
- if name == "lm_head.weight":
- logger.debug(f"Skipping get tensor {name!r} in safetensors so that convert can end normally.")
- return []
-
- # ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
- if name.endswith("norm.weight"):
- data_torch = data_torch + 1
-
- return [(self.map_tensor_name(name), data_torch)]
-
-
-@Model.register("Starcoder2ForCausalLM")
-class StarCoder2Model(Model):
- model_arch = gguf.MODEL_ARCH.STARCODER2
-
-
-@Model.register("MambaForCausalLM", "MambaLMHeadModel")
-class MambaModel(Model):
- model_arch = gguf.MODEL_ARCH.MAMBA
-
- def set_vocab(self):
- vocab_size = self.hparams["vocab_size"]
- # Round vocab size to next multiple of 8
- pad_vocab = self.hparams.get("pad_vocab_size_multiple", 8)
- # pad using ceiling division
- # ref: https://stackoverflow.com/a/17511341/22827863
- vocab_size = -(vocab_size // -pad_vocab) * pad_vocab
- self.hparams["vocab_size"] = vocab_size
-
- if (self.dir_model / "tokenizer.json").is_file():
- self._set_vocab_gpt2()
- elif (self.dir_model / "tokenizer.model").is_file():
- self._set_vocab_sentencepiece()
- else:
- # Use the GPT-NeoX tokenizer when no tokenizer files are present
- tokenizer_path = Path(sys.path[0]) / "models" / "ggml-vocab-gpt-neox.gguf"
- logger.warning(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'")
- neox_reader = gguf.GGUFReader(tokenizer_path, "r")
-
- field = neox_reader.get_field(gguf.Keys.Tokenizer.MODEL)
- self.gguf_writer.add_tokenizer_model(bytes(field.parts[-1]).decode("utf-8") if field else "gpt2")
-
- field = neox_reader.get_field(gguf.Keys.Tokenizer.PRE)
- self.gguf_writer.add_tokenizer_pre(bytes(field.parts[-1]).decode("utf-8") if field else "mpt")
-
- field = neox_reader.get_field(gguf.Keys.Tokenizer.LIST)
- assert field
- self.gguf_writer.add_token_list([bytes(field.parts[i]) for i in field.data][:vocab_size])
-
- field = neox_reader.get_field(gguf.Keys.Tokenizer.TOKEN_TYPE)
- assert field
- self.gguf_writer.add_token_types([field.parts[i].tolist()[0] for i in field.data][:vocab_size])
-
- field = neox_reader.get_field(gguf.Keys.Tokenizer.MERGES)
- assert field
- self.gguf_writer.add_token_merges([bytes(field.parts[i]) for i in field.data])
-
- field = neox_reader.get_field(gguf.Keys.Tokenizer.BOS_ID)
- self.gguf_writer.add_bos_token_id(field.parts[-1].tolist()[0] if field else 1)
-
- field = neox_reader.get_field(gguf.Keys.Tokenizer.EOS_ID)
- self.gguf_writer.add_eos_token_id(field.parts[-1].tolist()[0] if field else 0)
-
- field = neox_reader.get_field(gguf.Keys.Tokenizer.UNK_ID)
- self.gguf_writer.add_unk_token_id(field.parts[-1].tolist()[0] if field else 0)
-
- field = neox_reader.get_field(gguf.Keys.Tokenizer.PAD_ID)
- self.gguf_writer.add_pad_token_id(field.parts[-1].tolist()[0] if field else 0)
-
- def set_gguf_parameters(self):
- d_model = self.find_hparam(["hidden_size", "d_model"])
- d_conv = self.find_hparam(["conv_kernel", "d_conv"], optional=True) or 4
- d_inner = self.find_hparam(["intermediate_size", "d_inner"], optional=True) or 2 * d_model
- d_state = self.find_hparam(["state_size", "d_state"], optional=True) or 16
- # ceiling division
- # ref: https://stackoverflow.com/a/17511341/22827863
- # ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
- dt_rank = self.find_hparam(["time_step_rank", "dt_rank"], optional=True) or -(d_model // -16)
- rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5
-
- # Fail early for models which don't have a block expansion factor of 2
- assert d_inner == 2 * d_model
-
- self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
- self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
- self.gguf_writer.add_embedding_length(d_model)
- self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
- self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
- self.gguf_writer.add_block_count(self.hparams["n_layer"])
- self.gguf_writer.add_ssm_conv_kernel(d_conv)
- self.gguf_writer.add_ssm_inner_size(d_inner)
- self.gguf_writer.add_ssm_state_size(d_state)
- self.gguf_writer.add_ssm_time_step_rank(dt_rank)
- self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
- self.gguf_writer.add_file_type(self.ftype)
-
- _tok_embd = None
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
- tok_embd_name = self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD)
-
- new_name = self.map_tensor_name(name)
-
- if name.endswith(".A_log"):
- logger.debug("A_log --> A ==> " + new_name)
- data_torch = -torch.exp(data_torch)
-
- # assuming token_embd.weight is seen before output.weight
- if self._tok_embd is not None and new_name == output_name:
- if torch.equal(self._tok_embd, data_torch):
- logger.debug(f"{output_name} is equivalent to {tok_embd_name}, omitting")
- return []
- elif new_name == tok_embd_name:
- self._tok_embd = data_torch
-
- return [(new_name, data_torch)]
-
- def extra_f32_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
- del n_dims # unused
-
- return bid is not None and new_name in (
- self.format_tensor_name(n, bid, ".weight" if name.endswith(".weight") else "") for n in [
- gguf.MODEL_TENSOR.SSM_CONV1D,
- gguf.MODEL_TENSOR.SSM_X,
- gguf.MODEL_TENSOR.SSM_DT,
- gguf.MODEL_TENSOR.SSM_A,
- gguf.MODEL_TENSOR.SSM_D,
- ]
- )
-
-
-@Model.register("CohereForCausalLM")
-class CommandR2Model(Model):
- model_arch = gguf.MODEL_ARCH.COMMAND_R
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- # max_position_embeddings = 8192 in config.json but model was actually
- # trained on 128k context length
- # aya-23 models don't have model_max_length specified
- self.hparams["max_position_embeddings"] = self.find_hparam(["model_max_length", "max_position_embeddings"])
-
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- self.gguf_writer.add_logit_scale(self.hparams["logit_scale"])
- self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
-
-
-@Model.register("OlmoForCausalLM")
-@Model.register("OLMoForCausalLM")
-class OlmoModel(Model):
- model_arch = gguf.MODEL_ARCH.OLMO
-
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- self.gguf_writer.add_layer_norm_eps(1e-5)
- clip_qkv = self.hparams.get("clip_qkv")
- if clip_qkv is not None:
- self.gguf_writer.add_clamp_kqv(clip_qkv)
-
- # Same as super class, but permuting q_proj, k_proj
- # Copied from: LlamaModel
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- n_head = self.hparams["num_attention_heads"]
- n_kv_head = self.hparams.get("num_key_value_heads")
-
- if name.endswith("q_proj.weight"):
- data_torch = LlamaModel.permute(data_torch, n_head, n_head)
- if name.endswith("k_proj.weight"):
- data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
-
- return [(self.map_tensor_name(name), data_torch)]
-
-
-@Model.register("JinaBertModel", "JinaBertForMaskedLM")
-class JinaBertV2Model(BertModel):
- model_arch = gguf.MODEL_ARCH.JINA_BERT_V2
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.intermediate_size = self.hparams["intermediate_size"]
-
- def get_tensors(self):
- for name, data in super().get_tensors():
- if 'gated_layer' in name:
- d1 = data[:self.intermediate_size, :]
- name1 = name.replace('gated_layers', 'gated_layers_w')
- name1 = name1.replace('up_gated_layer', 'gated_layers_v')
- d2 = data[self.intermediate_size:, :]
- name2 = name.replace('gated_layers', 'gated_layers_v')
- name2 = name2.replace('up_gated_layer', 'gated_layers_w')
- yield name1, d1
- yield name2, d2
- continue
-
- yield name, data
-
- def set_vocab(self, *args, **kwargs):
- tokenizer_class = 'BertTokenizer'
- with open(self.dir_model / "tokenizer_config.json", "r", encoding="utf-8") as f:
- tokenizer_class = json.load(f)['tokenizer_class']
-
- if tokenizer_class == 'BertTokenizer':
- super().set_vocab()
- elif tokenizer_class == 'RobertaTokenizer':
- self._set_vocab_gpt2()
- self.gguf_writer.add_token_type_count(2)
- else:
- raise NotImplementedError(f'Tokenizer {tokenizer_class} is not supported for JinaBertModel')
- self.gguf_writer.add_add_bos_token(True)
- self.gguf_writer.add_add_eos_token(True)
-
-
-@Model.register("ArcticForCausalLM")
-class ArcticModel(Model):
- model_arch = gguf.MODEL_ARCH.ARCTIC
-
- def set_vocab(self):
- # The reason for using a custom implementation here is that the
- # snowflake-arctic-instruct model redefined tokens 31998 and 31999 from
- # tokenizer.model and used them as BOS and EOS instead of adding new tokens.
- from sentencepiece import SentencePieceProcessor
-
- tokenizer_path = self.dir_model / 'tokenizer.model'
-
- if not tokenizer_path.is_file():
- logger.error(f'Error: Missing {tokenizer_path}')
- sys.exit(1)
-
- # Read the whole vocabulary from the tokenizer.model file
- tokenizer = SentencePieceProcessor()
- tokenizer.LoadFromFile(str(tokenizer_path))
-
- vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
-
- tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
- scores: list[float] = [-10000.0] * vocab_size
- toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
-
- for token_id in range(tokenizer.vocab_size()):
-
- piece = tokenizer.IdToPiece(token_id)
- text = piece.encode("utf-8")
- score = tokenizer.GetScore(token_id)
-
- toktype = SentencePieceTokenTypes.NORMAL
- if tokenizer.IsUnknown(token_id):
- toktype = SentencePieceTokenTypes.UNKNOWN
- elif tokenizer.IsControl(token_id):
- toktype = SentencePieceTokenTypes.CONTROL
- elif tokenizer.IsUnused(token_id):
- toktype = SentencePieceTokenTypes.UNUSED
- elif tokenizer.IsByte(token_id):
- toktype = SentencePieceTokenTypes.BYTE
-
- tokens[token_id] = text
- scores[token_id] = score
- toktypes[token_id] = toktype
-
- # Use the added_tokens_decoder field from tokeniser_config.json as the source
- # of information about added/redefined tokens and modify them accordingly.
- tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
- if tokenizer_config_file.is_file():
- with open(tokenizer_config_file, "r", encoding="utf-8") as f:
- tokenizer_config_json = json.load(f)
-
- if "added_tokens_decoder" in tokenizer_config_json:
- added_tokens_decoder = tokenizer_config_json["added_tokens_decoder"]
- for token_id, token_json in added_tokens_decoder.items():
- token_id = int(token_id)
- if (token_id >= vocab_size):
- logger.debug(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
- continue
-
- token_content = token_json["content"]
- token_type = SentencePieceTokenTypes.USER_DEFINED
- token_score = -10000.0
-
- # Map unk_token to UNKNOWN, other special tokens to CONTROL
- # Set the score to 0.0 as in the original tokenizer.model
- if ("special" in token_json) and token_json["special"]:
- if token_content == tokenizer_config_json["unk_token"]:
- token_type = SentencePieceTokenTypes.UNKNOWN
- else:
- token_type = SentencePieceTokenTypes.CONTROL
- token_score = 0.0
-
- logger.info(f"Setting added token {token_id} to '{token_content}' (type: {token_type}, score: {token_score:.2f})")
- tokens[token_id] = token_content.encode("utf-8")
- toktypes[token_id] = token_type
- scores[token_id] = token_score
-
- self.gguf_writer.add_tokenizer_model("llama")
- self.gguf_writer.add_tokenizer_pre("default")
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_scores(scores)
- self.gguf_writer.add_token_types(toktypes)
-
- special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
- special_vocab.add_to_gguf(self.gguf_writer)
-
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- hparams = self.hparams
- self.gguf_writer.add_vocab_size(hparams["vocab_size"])
- self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
-
- _experts: list[dict[str, Tensor]] | None = None
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- n_head = self.hparams["num_attention_heads"]
- n_kv_head = self.hparams.get("num_key_value_heads")
-
- if name.endswith("q_proj.weight"):
- data_torch = LlamaModel.permute(data_torch, n_head, n_head)
- if name.endswith("k_proj.weight"):
- data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
-
- # process the experts separately
- if name.find("block_sparse_moe.experts") != -1:
- n_experts = self.hparams["num_local_experts"]
-
- assert bid is not None
-
- if self._experts is None:
- self._experts = [{} for _ in range(self.block_count)]
-
- self._experts[bid][name] = data_torch
-
- if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
- # merge the experts into a single 3d tensor
- for wid in ["w1", "w2", "w3"]:
- datas: list[Tensor] = []
-
- for xid in range(n_experts):
- ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.{wid}.weight"
- datas.append(self._experts[bid][ename])
- del self._experts[bid][ename]
-
- data_torch = torch.stack(datas, dim=0)
-
- merged_name = f"layers.{bid}.feed_forward.experts.{wid}.weight"
-
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
- else:
- return []
-
- return [(self.map_tensor_name(name), data_torch)]
-
- def write_tensors(self):
- super().write_tensors()
-
- if self._experts is not None:
- # flatten `list[dict[str, Tensor]]` into `list[str]`
- experts = [k for d in self._experts for k in d.keys()]
- if len(experts) > 0:
- raise ValueError(f"Unprocessed experts: {experts}")
-
-
-@Model.register("DeepseekV2ForCausalLM")
-class DeepseekV2Model(Model):
- model_arch = gguf.MODEL_ARCH.DEEPSEEK2
-
- def set_vocab(self):
- self._set_vocab_gpt2()
-
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- hparams = self.hparams
-
- self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
- self.gguf_writer.add_vocab_size(hparams["vocab_size"])
- if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None:
- self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"])
- self.gguf_writer.add_kv_lora_rank(hparams["kv_lora_rank"])
- self.gguf_writer.add_key_length(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
- self.gguf_writer.add_value_length(hparams["v_head_dim"])
- self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])
- self.gguf_writer.add_expert_count(hparams["n_routed_experts"])
- self.gguf_writer.add_expert_shared_count(hparams["n_shared_experts"])
- self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
- self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
-
- if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
- if self.hparams["rope_scaling"].get("type") == "yarn":
- self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
- self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
- self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
- self.gguf_writer.add_rope_scaling_yarn_log_mul(0.1 * hparams["rope_scaling"]["mscale_all_dim"])
-
- _experts: list[dict[str, Tensor]] | None = None
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- # process the experts separately
- if name.find("mlp.experts") != -1:
- n_experts = self.hparams["n_routed_experts"]
- assert bid is not None
-
- if self._experts is None:
- self._experts = [{} for _ in range(self.block_count)]
-
- self._experts[bid][name] = data_torch
-
- if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
- # merge the experts into a single 3d tensor
- for w_name in ["down_proj", "gate_proj", "up_proj"]:
- datas: list[Tensor] = []
-
- for xid in range(n_experts):
- ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
- datas.append(self._experts[bid][ename])
- del self._experts[bid][ename]
-
- data_torch = torch.stack(datas, dim=0)
-
- merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
-
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
- else:
- return []
-
- return [(self.map_tensor_name(name), data_torch)]
-
- def write_tensors(self):
- super().write_tensors()
-
- if self._experts is not None:
- # flatten `list[dict[str, Tensor]]` into `list[str]`
- experts = [k for d in self._experts for k in d.keys()]
- if len(experts) > 0:
- raise ValueError(f"Unprocessed experts: {experts}")
-
-
-@Model.register("T5WithLMHeadModel")
-@Model.register("T5ForConditionalGeneration")
-@Model.register("MT5ForConditionalGeneration")
-@Model.register("UMT5ForConditionalGeneration")
-class T5Model(Model):
- model_arch = gguf.MODEL_ARCH.T5
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.shared_token_embeddings_found = False
-
- def set_vocab(self):
- # to avoid TypeError: Descriptors cannot be created directly
- # exception when importing sentencepiece_model_pb2
- os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
- from sentencepiece import SentencePieceProcessor
- from sentencepiece import sentencepiece_model_pb2 as model
-
- tokenizer_path = self.dir_model / 'tokenizer.model'
-
- # many older models use spiece.model tokenizer model filename
- if not tokenizer_path.is_file():
- tokenizer_path = self.dir_model / 'spiece.model'
-
- if not tokenizer_path.is_file():
- raise FileNotFoundError(f"File not found: {tokenizer_path}")
-
- sentencepiece_model = model.ModelProto()
- sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
-
- # some models like Pile-T5 family use BPE tokenizer instead of Unigram
- if sentencepiece_model.trainer_spec.model_type == 2: # BPE
- # assure the tokenizer model file name is correct
- assert tokenizer_path.name == 'tokenizer.model'
- return self._set_vocab_sentencepiece()
- else:
- assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
-
- add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
- remove_whitespaces = sentencepiece_model.normalizer_spec.remove_extra_whitespaces
- precompiled_charsmap = sentencepiece_model.normalizer_spec.precompiled_charsmap
-
- tokenizer = SentencePieceProcessor()
- tokenizer.LoadFromFile(str(tokenizer_path))
-
- vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
-
- tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
- scores: list[float] = [-10000.0] * vocab_size
- toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
-
- for token_id in range(tokenizer.vocab_size()):
- piece = tokenizer.IdToPiece(token_id)
- text = piece.encode("utf-8")
- score = tokenizer.GetScore(token_id)
-
- toktype = SentencePieceTokenTypes.NORMAL
- if tokenizer.IsUnknown(token_id):
- toktype = SentencePieceTokenTypes.UNKNOWN
- elif tokenizer.IsControl(token_id):
- toktype = SentencePieceTokenTypes.CONTROL
- elif tokenizer.IsUnused(token_id):
- toktype = SentencePieceTokenTypes.UNUSED
- elif tokenizer.IsByte(token_id):
- toktype = SentencePieceTokenTypes.BYTE
-
- tokens[token_id] = text
- scores[token_id] = score
- toktypes[token_id] = toktype
-
- added_tokens_file = self.dir_model / 'added_tokens.json'
- if added_tokens_file.is_file():
- with open(added_tokens_file, "r", encoding="utf-8") as f:
- added_tokens_json = json.load(f)
- for key in added_tokens_json:
- token_id = added_tokens_json[key]
- if (token_id >= vocab_size):
- logger.warning(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
- continue
-
- tokens[token_id] = key.encode("utf-8")
- scores[token_id] = -1000.0
- toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
-
- if vocab_size > len(tokens):
- pad_count = vocab_size - len(tokens)
- logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
- for i in range(1, pad_count + 1):
- tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
- scores.append(-1000.0)
- toktypes.append(SentencePieceTokenTypes.UNUSED)
-
- self.gguf_writer.add_tokenizer_model("t5")
- self.gguf_writer.add_tokenizer_pre("default")
- self.gguf_writer.add_token_list(tokens)
- self.gguf_writer.add_token_scores(scores)
- self.gguf_writer.add_token_types(toktypes)
- self.gguf_writer.add_add_space_prefix(add_prefix)
- self.gguf_writer.add_remove_extra_whitespaces(remove_whitespaces)
- if precompiled_charsmap:
- self.gguf_writer.add_precompiled_charsmap(precompiled_charsmap)
-
- special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
- special_vocab.add_to_gguf(self.gguf_writer)
-
- self.gguf_writer.add_add_bos_token(False)
- self.gguf_writer.add_add_eos_token(True)
-
- def set_gguf_parameters(self):
- self.gguf_writer.add_name("T5")
- if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None:
- logger.warning("Couldn't find context length in config.json, assuming default value of 512")
- n_ctx = 512
- self.gguf_writer.add_context_length(n_ctx)
- self.gguf_writer.add_embedding_length(self.hparams["d_model"])
- self.gguf_writer.add_feed_forward_length(self.hparams["d_ff"])
- self.gguf_writer.add_block_count(self.hparams["num_layers"])
- self.gguf_writer.add_head_count(self.hparams["num_heads"])
- self.gguf_writer.add_key_length(self.hparams["d_kv"])
- self.gguf_writer.add_value_length(self.hparams["d_kv"])
- self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_relative_attn_buckets_count(self.hparams["relative_attention_num_buckets"])
- self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_decoder_start_token_id(self.hparams["decoder_start_token_id"])
- self.gguf_writer.add_file_type(self.ftype)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- # T5 based models contain shared token embeddings tensors saved randomly as either "encoder.embed_tokens.weight",
- # "decoder.embed_tokens.weight" or "shared.weight" tensor. In some models there are even multiple of them stored
- # in the safetensors files. We use the first tensor from these three as the token embeddings for both encoder
- # and decoder and ignore the remaining ones.
- if name in ["decoder.embed_tokens.weight", "encoder.embed_tokens.weight", "shared.weight"]:
- if not self.shared_token_embeddings_found:
- name = "shared.weight"
- self.shared_token_embeddings_found = True
- else:
- logger.debug(f"Skipping shared tensor {name!r} in safetensors so that convert can end normally.")
- return []
-
- return [(self.map_tensor_name(name), data_torch)]
-
-
-@Model.register("JAISLMHeadModel")
-class JaisModel(Model):
- model_arch = gguf.MODEL_ARCH.JAIS
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- # SwigLU activation
- assert self.hparams["activation_function"] == "swiglu"
- # ALiBi position embedding
- assert self.hparams["position_embedding_type"] == "alibi"
-
- # Embeddings scale
- self.embeddings_scale = 1.0
- # note: For some JAIS flavors, output is tied to (same as) wte in original model
- self.output_is_wte = False
- if 'mup_embeddings_scale' in self.hparams:
- self.output_is_wte = True # Hack (?)
- self.embeddings_scale = self.hparams['mup_embeddings_scale']
- elif 'embeddings_scale' in self.hparams:
- self.embeddings_scale = self.hparams['embeddings_scale']
- else:
- assert False
-
- self.width_scale = 1.0
- if 'mup_output_alpha' in self.hparams:
- assert 'mup_width_scale' in self.hparams
- self.width_scale = self.hparams['mup_output_alpha'] * self.hparams['mup_width_scale']
- elif 'width_scale' in self.hparams:
- self.width_scale = self.hparams['width_scale']
- else:
- assert False
-
- self.max_alibi_bias = 8.0
-
- def set_vocab(self):
- self._set_vocab_gpt2()
-
- def set_gguf_parameters(self):
- self.gguf_writer.add_name(self.dir_model.name)
- self.gguf_writer.add_block_count(self.hparams["n_layer"])
- self.gguf_writer.add_context_length(self.hparams["n_positions"])
- self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
- self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
- self.gguf_writer.add_head_count(self.hparams["n_head"])
- self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_file_type(self.ftype)
-
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- tensors: list[tuple[str, Tensor]] = []
-
- # we don't need these
- if name.endswith((".attn.bias")):
- return tensors
-
- if name.endswith(("relative_pe.slopes")):
- # Calculate max ALiBi bias (this is the inverse of the ALiBi calculation)
- # Some other models has max_alibi_bias spelled out explicitly in the hyperparams,
- # but Jais's PyTorch model simply precalculates the slope values and places them
- # in relative_pes.slopes
- n_head_closest_log2 = 2 ** math.floor(math.log2(self.hparams["n_head"]))
- first_val = float(data_torch._data[0])
- self.max_alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)
-
- return tensors
-
- if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
- data_torch = data_torch.transpose(1, 0)
-
- new_name = self.map_tensor_name(name)
-
- if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
- tensors.append((new_name, data_torch * self.embeddings_scale))
- if self.output_is_wte:
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
- elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
- assert not self.output_is_wte
- tensors.append((new_name, data_torch * self.width_scale))
- else:
- tensors.append((new_name, data_torch))
-
- return tensors
-
- def write_tensors(self):
- super().write_tensors()
- self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
-
-
-###### CONVERSION LOGIC ######
-
-
-# tree of lazy tensors
-class LazyTorchTensor(gguf.LazyBase):
- _tensor_type = torch.Tensor
- # to keep the type-checker happy
- dtype: torch.dtype
- shape: torch.Size
-
- # only used when converting a torch.Tensor to a np.ndarray
- _dtype_map: dict[torch.dtype, type] = {
- torch.float16: np.float16,
- torch.float32: np.float32,
- }
-
- def numpy(self) -> gguf.LazyNumpyTensor:
- dtype = self._dtype_map[self.dtype]
- return gguf.LazyNumpyTensor(
- meta=gguf.LazyNumpyTensor.meta_with_dtype_and_shape(dtype, self.shape),
- lazy=self._lazy,
- args=(self,),
- func=(lambda s: s[0].numpy())
- )
-
- @classmethod
- def meta_with_dtype_and_shape(cls, dtype: torch.dtype, shape: torch.Size) -> Tensor:
- return torch.empty(size=shape, dtype=dtype, device="meta")
-
- @classmethod
- def __torch_function__(cls, func, types, args=(), kwargs=None):
- del types # unused
-
- if kwargs is None:
- kwargs = {}
-
- if func is torch.Tensor.numpy:
- return args[0].numpy()
-
- return LazyTorchTensor._wrap_fn(func)(*args, **kwargs)
-
-
-def parse_args() -> argparse.Namespace:
- parser = argparse.ArgumentParser(
- description="Convert a huggingface model to a GGML compatible file")
- parser.add_argument(
- "--vocab-only", action="store_true",
- help="extract only the vocab",
- )
- parser.add_argument(
- "--awq-path", type=Path, default=None,
- help="Path to scale awq cache file",
- )
- parser.add_argument(
- "--outfile", type=Path,
- help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
- )
- parser.add_argument(
- "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f16",
- help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
- )
- parser.add_argument(
- "--bigendian", action="store_true",
- help="model is executed on big endian machine",
- )
- parser.add_argument(
- "model", type=Path,
- help="directory containing model file",
- )
- parser.add_argument(
- "--use-temp-file", action="store_true",
- help="use the tempfile library while processing (helpful when running out of memory, process killed)",
- )
- parser.add_argument(
- "--no-lazy", action="store_true",
- help="use more RAM by computing all outputs before writing (use in case lazy evaluation is broken)",
- )
- parser.add_argument(
- "--model-name", type=str, default=None,
- help="name of the model",
- )
- parser.add_argument(
- "--verbose", action="store_true",
- help="increase output verbosity",
- )
- parser.add_argument(
- "--split-max-tensors", type=int, default=0,
- help="max tensors in each split",
- )
- parser.add_argument(
- "--split-max-size", type=str, default="0",
- help="max size per split N(M|G)",
- )
- parser.add_argument(
- "--dry-run", action="store_true",
- help="only print out a split plan and exit, without writing any new files",
- )
- parser.add_argument(
- "--no-tensor-first-split", action="store_true",
- help="do not add tensors to the first split (disabled by default)"
- )
-
- return parser.parse_args()
-
-
-def split_str_to_n_bytes(split_str: str) -> int:
- if split_str.endswith("K"):
- n = int(split_str[:-1]) * 1000
- elif split_str.endswith("M"):
- n = int(split_str[:-1]) * 1000 * 1000
- elif split_str.endswith("G"):
- n = int(split_str[:-1]) * 1000 * 1000 * 1000
- elif split_str.isnumeric():
- n = int(split_str)
- else:
- raise ValueError(f"Invalid split size: {split_str}, must be a number, optionally followed by K, M, or G")
-
- if n < 0:
- raise ValueError(f"Invalid split size: {split_str}, must be positive")
-
- return n
-
-
-def main() -> None:
- args = parse_args()
-
- logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
-
- dir_model = args.model
-
- if args.awq_path:
- sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
- from awq.apply_awq import add_scale_weights # type: ignore[import-not-found]
- tmp_model_path = args.model / "weighted_model"
- dir_model = tmp_model_path
- if tmp_model_path.is_dir():
- logger.info(f"{tmp_model_path} exists as a weighted model.")
- else:
- tmp_model_path.mkdir(parents=True, exist_ok=True)
- logger.info("Saving new weighted model ...")
- add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
- logger.info(f"Saved weighted model at {tmp_model_path}.")
-
- if not dir_model.is_dir():
- logger.error(f'Error: {args.model} is not a directory')
- sys.exit(1)
-
- ftype_map: dict[str, gguf.LlamaFileType] = {
- "f32": gguf.LlamaFileType.ALL_F32,
- "f16": gguf.LlamaFileType.MOSTLY_F16,
- "bf16": gguf.LlamaFileType.MOSTLY_BF16,
- "q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
- "auto": gguf.LlamaFileType.GUESSED,
- }
-
- is_split = args.split_max_tensors > 0 or args.split_max_size != "0"
- if args.use_temp_file and is_split:
- logger.error("Error: Cannot use temp file when splitting")
- sys.exit(1)
-
- if args.outfile is not None:
- fname_out = args.outfile
- else:
- # output in the same directory as the model by default
- fname_out = dir_model / 'ggml-model-{ftype}.gguf'
-
- logger.info(f"Loading model: {dir_model.name}")
-
- hparams = Model.load_hparams(dir_model)
-
- with torch.inference_mode():
- try:
- model_class = Model.from_model_architecture(hparams["architectures"][0])
- except NotImplementedError:
- logger.error(f"Model {hparams['architectures'][0]} is not supported")
- sys.exit(1)
-
- model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file,
- args.no_lazy, args.model_name, split_max_tensors=args.split_max_tensors,
- split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
- small_first_shard=args.no_tensor_first_split)
-
- logger.info("Set model parameters")
- model_instance.set_gguf_parameters()
-
- logger.info("Set model tokenizer")
- model_instance.set_vocab()
-
- model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
-
- if args.vocab_only:
- logger.info("Exporting model vocab...")
- model_instance.write_vocab()
- logger.info(f"Model vocab successfully exported to {model_instance.fname_out}")
- else:
- logger.info("Exporting model...")
- model_instance.write()
- out_path = f"{model_instance.fname_out.parent}{os.sep}" if is_split else model_instance.fname_out
- logger.info(f"Model successfully exported to {out_path}")
-
-
-if __name__ == '__main__':
- main()
+++ /dev/null
-#!/usr/bin/env python3
-from __future__ import annotations
-
-import logging
-import argparse
-import os
-import struct
-import sys
-from enum import IntEnum
-from pathlib import Path
-
-import numpy as np
-
-if 'NO_LOCAL_GGUF' not in os.environ:
- sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
-import gguf
-
-logger = logging.getLogger("ggml-to-gguf")
-
-
-class GGMLFormat(IntEnum):
- GGML = 0
- GGMF = 1
- GGJT = 2
-
-
-class GGMLFType(IntEnum):
- ALL_F32 = 0
- MOSTLY_F16 = 1
- MOSTLY_Q4_0 = 2
- MOSTLY_Q4_1 = 3
- MOSTLY_Q4_1_SOME_F16 = 4
- MOSTLY_Q8_0 = 7
- MOSTLY_Q5_0 = 8
- MOSTLY_Q5_1 = 9
- MOSTLY_Q2_K = 10
- MOSTLY_Q3_K_S = 11
- MOSTLY_Q3_K_M = 12
- MOSTLY_Q3_K_L = 13
- MOSTLY_Q4_K_S = 14
- MOSTLY_Q4_K_M = 15
- MOSTLY_Q5_K_S = 16
- MOSTLY_Q5_K_M = 17
- MOSTLY_Q6_K = 18
-
-
-class Hyperparameters:
- def __init__(self):
- self.n_vocab = self.n_embd = self.n_mult = self.n_head = 0
- self.n_layer = self.n_rot = self.n_ff = 0
- self.ftype = GGMLFType.ALL_F32
-
- def set_n_ff(self, model):
- ff_tensor_idx = model.tensor_map.get(b'layers.0.feed_forward.w1.weight')
- assert ff_tensor_idx is not None, 'Missing layer 0 FF tensor'
- ff_tensor = model.tensors[ff_tensor_idx]
- self.n_ff = ff_tensor.dims[1]
-
- def load(self, data, offset):
- (
- self.n_vocab,
- self.n_embd,
- self.n_mult,
- self.n_head,
- self.n_layer,
- self.n_rot,
- ftype,
- ) = struct.unpack('<7I', data[offset:offset + (4 * 7)])
- try:
- self.ftype = GGMLFType(ftype)
- except ValueError:
- raise ValueError(f'Invalid ftype {ftype}')
- return 4 * 7
-
- def __str__(self):
- return f'<Hyperparameters: n_vocab={self.n_vocab}, n_embd={self.n_embd}, n_mult={self.n_mult}, n_head={self.n_head}, n_layer={self.n_layer}, n_rot={self.n_rot}, n_ff={self.n_ff}, ftype={self.ftype.name}>'
-
-
-class Vocab:
- def __init__(self, load_scores = True):
- self.items = []
- self.load_scores = load_scores
-
- def load(self, data, offset, n_vocab):
- orig_offset = offset
- for _ in range(n_vocab):
- itemlen = struct.unpack('<I', data[offset:offset + 4])[0]
- assert itemlen < 4096, 'Absurd vocab item length'
- offset += 4
- item_text = bytes(data[offset:offset + itemlen])
- offset += itemlen
- if self.load_scores:
- item_score = struct.unpack('<f', data[offset:offset + 4])[0]
- offset += 4
- else:
- item_score = 0.0
- self.items.append((item_text, item_score))
- return offset - orig_offset
-
-
-class Tensor:
- def __init__(self, use_padding = True):
- self.name = None
- self.dims: tuple[int, ...] = ()
- self.dtype = None
- self.start_offset = 0
- self.len_bytes = np.int64(0)
- self.use_padding = use_padding
-
- def load(self, data, offset):
- orig_offset = offset
- (n_dims, name_len, dtype) = struct.unpack('<3I', data[offset:offset + 12])
- assert n_dims >= 0 and n_dims <= 4, f'Invalid tensor dimensions {n_dims}'
- assert name_len < 4096, 'Absurd tensor name length'
- quant = gguf.GGML_QUANT_SIZES.get(dtype)
- assert quant is not None, 'Unknown tensor type'
- (blksize, tysize) = quant
- offset += 12
- self.dtype= dtype
- self.dims = struct.unpack(f'<{n_dims}I', data[offset:offset + (4 * n_dims)])
- offset += 4 * n_dims
- self.name = bytes(data[offset:offset + name_len])
- offset += name_len
- pad = ((offset + 31) & ~31) - offset if self.use_padding else 0
- offset += pad
- n_elems = np.prod(self.dims)
- n_bytes = np.int64(np.int64(n_elems) * np.int64(tysize)) // np.int64(blksize)
- self.start_offset = offset
- self.len_bytes = n_bytes
- offset += n_bytes
- return offset - orig_offset
-
-
-class GGMLModel:
- def __init__(self):
- self.hyperparameters = None
- self.vocab = None
- self.tensor_map = {}
- self.tensors = []
-
- def validate_header(self, data, offset):
- magic = bytes(data[offset:offset + 4])
- if magic == b'GGUF':
- raise ValueError('File is already in GGUF format.')
- if magic == b'lmgg':
- self.file_format = GGMLFormat.GGML
- self.format_version = 1
- return 4
- version = struct.unpack('<I', data[offset + 4:offset + 8])[0]
- if magic == b'fmgg':
- if version != 1:
- raise ValueError(f'Cannot handle unexpected GGMF file version {version}')
- self.file_format = GGMLFormat.GGMF
- self.format_version = version
- return 8
- if magic == b'tjgg':
- if version < 1 or version > 3:
- raise ValueError(f'Cannot handle unexpected GGJT file version {version}')
- self.file_format = GGMLFormat.GGJT
- self.format_version = version
- return 8
- raise ValueError(f"Unexpected file magic {magic!r}! This doesn't look like a GGML format file.")
-
- def validate_conversion(self, ftype):
- err = ''
- if (self.file_format < GGMLFormat.GGJT or self.format_version < 2):
- if ftype not in (GGMLFType.ALL_F32, GGMLFType.MOSTLY_F16):
- err = 'Quantizations changed in GGJTv2. Can only convert unquantized GGML files older than GGJTv2.'
- elif (self.file_format == GGMLFormat.GGJT and self.format_version == 2):
- if ftype in (GGMLFType.MOSTLY_Q4_0, GGMLFType.MOSTLY_Q4_1,
- GGMLFType.MOSTLY_Q4_1_SOME_F16, GGMLFType.MOSTLY_Q8_0):
- err = 'Q4 and Q8 quantizations changed in GGJTv3.'
- if len(err) > 0:
- raise ValueError(f'{err} Sorry, your {self.file_format.name}v{self.format_version} file of type {ftype.name} is not eligible for conversion.')
-
- def load(self, data, offset):
- offset += self.validate_header(data, offset)
- hp = Hyperparameters()
- offset += hp.load(data, offset)
- logger.info(f'* File format: {self.file_format.name}v{self.format_version} with ftype {hp.ftype.name}')
- self.validate_conversion(hp.ftype)
- vocab = Vocab(load_scores = self.file_format > GGMLFormat.GGML)
- offset += vocab.load(data, offset, hp.n_vocab)
- tensors: list[Tensor] = []
- tensor_map = {}
- while offset < len(data):
- tensor = Tensor(use_padding = self.file_format > GGMLFormat.GGMF)
- offset += tensor.load(data, offset)
- tensor_map[tensor.name] = len(tensors)
- tensors.append(tensor)
- self.hyperparameters = hp
- self.vocab = vocab
- self.tensors = tensors
- self.tensor_map = tensor_map
- hp.set_n_ff(self)
- return offset
-
-
-class GGMLToGGUF:
- def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override = None, special_vocab = None):
- hp = ggml_model.hyperparameters
- self.model = ggml_model
- self.data = data
- self.cfg = cfg
- self.params_override = params_override
- self.vocab_override = vocab_override
- self.special_vocab = special_vocab
- if params_override is not None:
- n_kv_head = params_override.n_head_kv
- else:
- if cfg.gqa == 1:
- n_kv_head = hp.n_head
- else:
- gqa = float(cfg.gqa)
- n_kv_head = None
- for x in range(1, 256):
- if float(hp.n_head) / float(x) == gqa:
- n_kv_head = x
- assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
- logger.info(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
- self.n_kv_head = n_kv_head
- self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer)
-
- def save(self):
- logger.info('* Preparing to save GGUF file')
- gguf_writer = gguf.GGUFWriter(
- self.cfg.output,
- gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA],
- use_temp_file = False)
- self.add_params(gguf_writer)
- self.add_vocab(gguf_writer)
- if self.special_vocab is not None:
- self.special_vocab.add_to_gguf(gguf_writer)
- self.add_tensors(gguf_writer)
- logger.info(" gguf: write header")
- gguf_writer.write_header_to_file()
- logger.info(" gguf: write metadata")
- gguf_writer.write_kv_data_to_file()
- logger.info(" gguf: write tensors")
- gguf_writer.write_tensors_to_file()
- gguf_writer.close()
-
- def add_params(self, gguf_writer):
- hp = self.model.hyperparameters
- cfg = self.cfg
- if cfg.desc is not None:
- desc = cfg.desc
- else:
- desc = f'converted from legacy {self.model.file_format.name}v{self.model.format_version} {hp.ftype.name} format'
- try:
- # Filenames aren't necessarily valid UTF8.
- name = cfg.name if cfg.name is not None else cfg.input.name
- except UnicodeDecodeError:
- name = None
- logger.info('* Adding model parameters and KV items')
- if name is not None:
- gguf_writer.add_name(name)
- gguf_writer.add_description(desc)
- gguf_writer.add_file_type(int(hp.ftype))
- if self.params_override is not None:
- po = self.params_override
- assert po.n_embd == hp.n_embd, 'Model hyperparams mismatch'
- assert po.n_layer == hp.n_layer, 'Model hyperparams mismatch'
- assert po.n_head == hp.n_head, 'Model hyperparams mismatch'
- gguf_writer.add_context_length (po.n_ctx)
- gguf_writer.add_embedding_length (po.n_embd)
- gguf_writer.add_block_count (po.n_layer)
- gguf_writer.add_feed_forward_length (po.n_ff)
- gguf_writer.add_rope_dimension_count(po.n_embd // po.n_head)
- gguf_writer.add_head_count (po.n_head)
- gguf_writer.add_head_count_kv (po.n_head_kv)
- gguf_writer.add_layer_norm_rms_eps (po.f_norm_eps)
- return
- gguf_writer.add_context_length(cfg.context_length)
- gguf_writer.add_embedding_length(hp.n_embd)
- gguf_writer.add_block_count(hp.n_layer)
- gguf_writer.add_feed_forward_length(hp.n_ff)
- gguf_writer.add_rope_dimension_count(hp.n_embd // hp.n_head)
- gguf_writer.add_head_count(hp.n_head)
- gguf_writer.add_head_count_kv(self.n_kv_head)
- gguf_writer.add_layer_norm_rms_eps(float(cfg.eps))
-
- def add_vocab(self, gguf_writer):
- hp = self.model.hyperparameters
- gguf_writer.add_tokenizer_model('llama')
- gguf_writer.add_tokenizer_pre('default')
- tokens = []
- scores = []
- toktypes = []
- if self.vocab_override is not None:
- vo = self.vocab_override
- logger.info('* Adding vocab item(s)')
- for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
- tokens.append(vbytes)
- scores.append(score)
- toktypes.append(ttype)
- assert len(tokens) == hp.n_vocab, \
- f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
- gguf_writer.add_token_list(tokens)
- gguf_writer.add_token_scores(scores)
- if len(toktypes) > 0:
- gguf_writer.add_token_types(toktypes)
- return
- logger.info(f'* Adding {hp.n_vocab} vocab item(s)')
- assert len(self.model.vocab.items) >= 3, 'Cannot handle unexpectedly short model vocab'
- for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
- tt = 1 # Normal
- # Special handling for UNK, BOS, EOS tokens.
- if tokid <= 2:
- if tokid == 0:
- vbytes = b'<unk>'
- tt = 2
- elif tokid == 1:
- vbytes = b'<s>'
- tt = 3
- else:
- vbytes = b'</s>'
- tt = 3
- elif len(vbytes) == 0:
- tt = 3 # Control
- elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
- vbytes = bytes(f'<0x{vbytes[0]:02X}>', encoding = 'UTF-8')
- tt = 6 # Byte
- else:
- vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
- toktypes.append(tt)
- tokens.append(vbytes)
- scores.append(vscore)
- gguf_writer.add_token_list(tokens)
- gguf_writer.add_token_scores(scores)
- gguf_writer.add_token_types(toktypes)
- gguf_writer.add_unk_token_id(0)
- gguf_writer.add_bos_token_id(1)
- gguf_writer.add_eos_token_id(2)
-
- def add_tensors(self, gguf_writer):
- tensor_map = self.name_map
- data = self.data
- logger.info(f'* Adding {len(self.model.tensors)} tensor(s)')
- for tensor in self.model.tensors:
- name = str(tensor.name, 'UTF-8')
- mapped_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
- assert mapped_name is not None, f'Bad name {name}'
- tempdims = list(tensor.dims[:])
- if len(tempdims) > 1:
- temp = tempdims[1]
- tempdims[1] = tempdims[0]
- tempdims[0] = temp
- gguf_writer.add_tensor(
- mapped_name,
- data[tensor.start_offset:tensor.start_offset + tensor.len_bytes],
- raw_shape = tempdims,
- raw_dtype = tensor.dtype)
-
-
-def handle_metadata(cfg, hp):
- import convert
- assert cfg.model_metadata_dir.is_dir(), 'Metadata dir is not a directory'
- hf_config_path = cfg.model_metadata_dir / "config.json"
- orig_config_path = cfg.model_metadata_dir / "params.json"
- # We pass a fake model here. "original" mode will check the shapes of some
- # tensors if information is missing in the .json file: other than that, the
- # model data isn't used so this should be safe (at least for now).
- fakemodel = {
- 'tok_embeddings.weight': convert.LazyTensor.__new__(convert.LazyTensor),
- 'layers.0.feed_forward.w1.weight': convert.LazyTensor.__new__(convert.LazyTensor),
- }
- fakemodel['tok_embeddings.weight'].shape = [hp.n_vocab]
- fakemodel['layers.0.feed_forward.w1.weight'].shape = [hp.n_ff]
- if hf_config_path.exists():
- params = convert.Params.loadHFTransformerJson(fakemodel, hf_config_path)
- elif orig_config_path.exists():
- params = convert.Params.loadOriginalParamsJson(fakemodel, orig_config_path)
- else:
- raise ValueError('Unable to load metadata')
- vocab_path = Path(cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir)
- vocab_factory = convert.VocabFactory(vocab_path)
- vocab, special_vocab = vocab_factory.load_vocab(cfg.vocabtype.split(","), cfg.model_metadata_dir)
- convert.check_vocab_size(params, vocab)
- return params, vocab, special_vocab
-
-
-def handle_args():
- parser = argparse.ArgumentParser(description = 'Convert GGML models to GGUF')
- parser.add_argument('--input', '-i', type = Path, required = True,
- help = 'Input GGMLv3 filename')
- parser.add_argument('--output', '-o', type = Path, required = True,
- help ='Output GGUF filename')
- parser.add_argument('--name',
- help = 'Set model name')
- parser.add_argument('--desc',
- help = 'Set model description')
- parser.add_argument('--gqa', type = int, default = 1,
- help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
- parser.add_argument('--eps', default = '5.0e-06',
- help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2')
- parser.add_argument('--context-length', '-c', type=int, default = 2048,
- help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096')
- parser.add_argument('--model-metadata-dir', '-m', type = Path,
- help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
- parser.add_argument("--vocab-dir", type=Path,
- help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
- parser.add_argument("--vocabtype", default="spm,hfft",
- help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm,hfft)")
- parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
- return parser.parse_args()
-
-
-def main():
- cfg = handle_args()
- logging.basicConfig(level=logging.DEBUG if cfg.verbose else logging.INFO)
- logger.info(f'* Using config: {cfg}')
- logger.warning('=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===')
- if cfg.model_metadata_dir is None and (cfg.gqa == 1 or cfg.eps == '5.0e-06'):
- logger.info('- Note: If converting LLaMA2, specifying "--eps 1e-5" is required. 70B models also need "--gqa 8".')
- data = np.memmap(cfg.input, mode = 'r')
- model = GGMLModel()
- logger.info('* Scanning GGML input file')
- offset = model.load(data, 0) # noqa
- logger.info(f'* GGML model hyperparameters: {model.hyperparameters}')
- vocab_override = None
- params_override = None
- special_vocab = None
- if cfg.model_metadata_dir is not None:
- (params_override, vocab_override, special_vocab) = handle_metadata(cfg, model.hyperparameters)
- logger.info('!! Note: When overriding params the --gqa, --eps and --context-length options are ignored.')
- logger.info(f'* Overriding params: {params_override}')
- logger.info(f'* Overriding vocab: {vocab_override}')
- logger.info(f'* Special vocab: {special_vocab}')
- else:
- logger.warning('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n')
- if model.file_format == GGMLFormat.GGML:
- logger.info('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!')
- converter = GGMLToGGUF(
- model, data, cfg,
- params_override = params_override,
- vocab_override = vocab_override,
- special_vocab = special_vocab
- )
- converter.save()
- logger.info(f'* Successful completion. Output saved to: {cfg.output}')
-
-
-if __name__ == '__main__':
- main()
--- /dev/null
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+import logging
+import argparse
+import contextlib
+import json
+import os
+import re
+import sys
+from enum import IntEnum
+from pathlib import Path
+from hashlib import sha256
+from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Sequence, TypeVar, cast
+
+import math
+import numpy as np
+import torch
+
+if TYPE_CHECKING:
+ from torch import Tensor
+
+if 'NO_LOCAL_GGUF' not in os.environ:
+ sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
+import gguf
+
+logger = logging.getLogger("hf-to-gguf")
+
+
+###### MODEL DEFINITIONS ######
+
+class SentencePieceTokenTypes(IntEnum):
+ NORMAL = 1
+ UNKNOWN = 2
+ CONTROL = 3
+ USER_DEFINED = 4
+ UNUSED = 5
+ BYTE = 6
+
+
+AnyModel = TypeVar("AnyModel", bound="type[Model]")
+
+
+class Model:
+ _model_classes: dict[str, type[Model]] = {}
+
+ dir_model: Path
+ ftype: gguf.LlamaFileType
+ is_big_endian: bool
+ endianess: gguf.GGUFEndian
+ use_temp_file: bool
+ lazy: bool
+ model_name: str | None
+ part_names: list[str]
+ is_safetensors: bool
+ hparams: dict[str, Any]
+ block_count: int
+ tensor_map: gguf.TensorNameMap
+ tensor_names: set[str] | None
+ fname_out: Path
+ gguf_writer: gguf.GGUFWriter
+
+ # subclasses should define this!
+ model_arch: gguf.MODEL_ARCH
+
+ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool,
+ model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
+ if type(self) is Model:
+ raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
+ self.dir_model = dir_model
+ self.ftype = ftype
+ self.is_big_endian = is_big_endian
+ self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
+ self.use_temp_file = use_temp_file
+ self.lazy = not eager
+ self.model_name = model_name
+ self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors")
+ self.is_safetensors = len(self.part_names) > 0
+ if not self.is_safetensors:
+ self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
+ self.hparams = Model.load_hparams(self.dir_model)
+ self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
+ self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
+ self.tensor_names = None
+ if self.ftype == gguf.LlamaFileType.GUESSED:
+ # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
+ _, first_tensor = next(self.get_tensors())
+ if first_tensor.dtype == torch.float16:
+ logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
+ self.ftype = gguf.LlamaFileType.MOSTLY_F16
+ else:
+ logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})")
+ self.ftype = gguf.LlamaFileType.MOSTLY_BF16
+ ftype_up: str = self.ftype.name.partition("_")[2].upper()
+ ftype_lw: str = ftype_up.lower()
+ # allow templating the file name with the output ftype, useful with the "auto" ftype
+ self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up)
+ self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
+ split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
+
+ @classmethod
+ def __init_subclass__(cls):
+ # can't use an abstract property, because overriding it without type errors
+ # would require using decorated functions instead of simply defining the property
+ if "model_arch" not in cls.__dict__:
+ raise TypeError(f"Missing property 'model_arch' for {cls.__name__!r}")
+
+ def find_hparam(self, keys: Iterable[str], optional: bool = False) -> Any:
+ key = next((k for k in keys if k in self.hparams), None)
+ if key is not None:
+ return self.hparams[key]
+ if optional:
+ return None
+ raise KeyError(f"could not find any of: {keys}")
+
+ def set_vocab(self):
+ self._set_vocab_gpt2()
+
+ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
+ tensor_names_from_parts: set[str] = set()
+
+ if len(self.part_names) > 1:
+ self.tensor_names = set()
+ index_name = "model.safetensors" if self.is_safetensors else "pytorch_model.bin"
+ index_name += ".index.json"
+ logger.info(f"gguf: loading model weight map from '{index_name}'")
+ with open(self.dir_model / index_name, "r", encoding="utf-8") as f:
+ index: dict[str, Any] = json.load(f)
+ weight_map = index.get("weight_map")
+ if weight_map is None or not isinstance(weight_map, dict):
+ raise ValueError(f"Can't load 'weight_map' from {index_name!r}")
+ self.tensor_names.update(weight_map.keys())
+ else:
+ self.tensor_names = tensor_names_from_parts
+
+ for part_name in self.part_names:
+ logger.info(f"gguf: loading model part '{part_name}'")
+ ctx: ContextManager[Any]
+ if self.is_safetensors:
+ from safetensors import safe_open
+ ctx = cast(ContextManager[Any], safe_open(self.dir_model / part_name, framework="pt", device="cpu"))
+ else:
+ ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True))
+
+ with ctx as model_part:
+ tensor_names_from_parts.update(model_part.keys())
+
+ for name in model_part.keys():
+ data = model_part.get_tensor(name) if self.is_safetensors else model_part[name]
+ if self.lazy:
+ data = LazyTorchTensor.from_eager(data)
+ yield name, data
+
+ # only verify tensor name presence; it doesn't matter if they are not in the right files
+ if len(sym_diff := tensor_names_from_parts.symmetric_difference(self.tensor_names)) > 0:
+ raise ValueError(f"Mismatch between weight map and model parts for tensor names: {sym_diff}")
+
+ def format_tensor_name(self, key: gguf.MODEL_TENSOR, bid: int | None = None, suffix: str = ".weight") -> str:
+ if key not in gguf.MODEL_TENSORS[self.model_arch]:
+ raise ValueError(f"Missing {key!r} for MODEL_TENSORS of {self.model_arch!r}")
+ name: str = gguf.TENSOR_NAMES[key]
+ if "{bid}" in name:
+ assert bid is not None
+ name = name.format(bid=bid)
+ return name + suffix
+
+ def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, bid: int | None, suffix: str = ".weight") -> bool:
+ if key not in gguf.MODEL_TENSORS[self.model_arch]:
+ return False
+ key_name: str = gguf.TENSOR_NAMES[key]
+ if "{bid}" in key_name:
+ if bid is None:
+ return False
+ key_name = key_name.format(bid=bid)
+ else:
+ if bid is not None:
+ return False
+ return name == (key_name + suffix)
+
+ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str:
+ new_name = self.tensor_map.get_name(key=name, try_suffixes=try_suffixes)
+ if new_name is None:
+ raise ValueError(f"Can not map tensor {name!r}")
+ return new_name
+
+ def set_gguf_parameters(self):
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_block_count(self.block_count)
+
+ if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
+ self.gguf_writer.add_context_length(n_ctx)
+ logger.info(f"gguf: context length = {n_ctx}")
+
+ n_embd = self.find_hparam(["hidden_size", "n_embd"])
+ self.gguf_writer.add_embedding_length(n_embd)
+ logger.info(f"gguf: embedding length = {n_embd}")
+
+ if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None:
+ self.gguf_writer.add_feed_forward_length(n_ff)
+ logger.info(f"gguf: feed forward length = {n_ff}")
+
+ n_head = self.find_hparam(["num_attention_heads", "n_head"])
+ self.gguf_writer.add_head_count(n_head)
+ logger.info(f"gguf: head count = {n_head}")
+
+ if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None:
+ self.gguf_writer.add_head_count_kv(n_head_kv)
+ logger.info(f"gguf: key-value head count = {n_head_kv}")
+
+ if (rope_theta := self.hparams.get("rope_theta")) is not None:
+ self.gguf_writer.add_rope_freq_base(rope_theta)
+ logger.info(f"gguf: rope theta = {rope_theta}")
+ if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
+ self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps)
+ logger.info(f"gguf: rms norm epsilon = {f_rms_eps}")
+ if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None:
+ self.gguf_writer.add_layer_norm_eps(f_norm_eps)
+ logger.info(f"gguf: layer norm epsilon = {f_norm_eps}")
+ if (n_experts := self.hparams.get("num_local_experts")) is not None:
+ self.gguf_writer.add_expert_count(n_experts)
+ logger.info(f"gguf: expert count = {n_experts}")
+ if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
+ self.gguf_writer.add_expert_used_count(n_experts_used)
+ logger.info(f"gguf: experts used count = {n_experts_used}")
+
+ self.gguf_writer.add_file_type(self.ftype)
+ logger.info(f"gguf: file type = {self.ftype}")
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+ def extra_f32_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
+ del name, new_name, bid, n_dims # unused
+
+ return False
+
+ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
+ del name, new_name, bid, n_dims # unused
+
+ return False
+
+ def write_tensors(self):
+ max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
+
+ for name, data_torch in self.get_tensors():
+ # we don't need these
+ if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
+ continue
+
+ old_dtype = data_torch.dtype
+
+ # convert any unsupported data types to float32
+ if data_torch.dtype not in (torch.float16, torch.float32):
+ data_torch = data_torch.to(torch.float32)
+
+ # use the first number-like part of the tensor name as the block id
+ bid = None
+ for part in name.split("."):
+ if part.isdecimal():
+ bid = int(part)
+ break
+
+ for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)):
+ data: np.ndarray = data # type hint
+ n_dims = len(data.shape)
+ data_dtype = data.dtype
+ data_qtype: gguf.GGMLQuantizationType | None = None
+
+ # when both are True, f32 should win
+ extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims)
+ extra_f16 = self.extra_f16_tensors(name, new_name, bid, n_dims)
+
+ # Most of the codebase that takes in 1D tensors or norms only handles F32 tensors
+ # Conditions should closely match those in llama_model_quantize_internal in llama.cpp
+ extra_f32 = any(cond for cond in (
+ extra_f32,
+ n_dims == 1,
+ new_name.endswith("_norm.weight"),
+ ))
+
+ # Some tensor types are always in float32
+ extra_f32 = extra_f32 or any(self.match_model_tensor_name(new_name, key, bid) for key in (
+ gguf.MODEL_TENSOR.FFN_GATE_INP,
+ gguf.MODEL_TENSOR.POS_EMBD,
+ gguf.MODEL_TENSOR.TOKEN_TYPES,
+ ))
+
+ # if f16 desired, convert any float32 2-dim weight tensors to float16
+ extra_f16 = any(cond for cond in (
+ extra_f16,
+ (name.endswith(".weight") and n_dims >= 2),
+ ))
+
+ if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
+ if self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
+ data = gguf.quantize_bf16(data)
+ assert data.dtype == np.int16
+ data_qtype = gguf.GGMLQuantizationType.BF16
+
+ elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data):
+ data = gguf.quantize_q8_0(data)
+ assert data.dtype == np.uint8
+ data_qtype = gguf.GGMLQuantizationType.Q8_0
+
+ else: # default to float16 for quantized tensors
+ if data_dtype != np.float16:
+ data = data.astype(np.float16)
+ data_qtype = gguf.GGMLQuantizationType.F16
+
+ if data_qtype is None: # by default, convert to float32
+ if data_dtype != np.float32:
+ data = data.astype(np.float32)
+ data_qtype = gguf.GGMLQuantizationType.F32
+
+ shape = gguf.quant_shape_from_byte_shape(data.shape, data_qtype) if data.dtype == np.uint8 else data.shape
+
+ # reverse shape to make it similar to the internal ggml dimension order
+ shape_str = f"{{{', '.join(str(n) for n in reversed(shape))}}}"
+
+ # n_dims is implicit in the shape
+ logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
+
+ self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype)
+
+ def write(self):
+ self.write_tensors()
+ self.gguf_writer.write_header_to_file(self.fname_out)
+ self.gguf_writer.write_kv_data_to_file()
+ self.gguf_writer.write_tensors_to_file(progress=True)
+ self.gguf_writer.close()
+
+ def write_vocab(self):
+ if len(self.gguf_writer.tensors) != 1:
+ raise ValueError('Splitting the vocabulary is not supported')
+ self.gguf_writer.write_header_to_file(self.fname_out)
+ self.gguf_writer.write_kv_data_to_file()
+ self.gguf_writer.close()
+
+ @staticmethod
+ def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]:
+ part_names: list[str] = []
+ for filename in os.listdir(dir_model):
+ if filename.startswith(prefix) and filename.endswith(suffix):
+ part_names.append(filename)
+
+ part_names.sort()
+
+ return part_names
+
+ @staticmethod
+ def load_hparams(dir_model: Path):
+ with open(dir_model / "config.json", "r", encoding="utf-8") as f:
+ return json.load(f)
+
+ @classmethod
+ def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]:
+ assert names
+
+ def func(modelcls: AnyModel) -> AnyModel:
+ for name in names:
+ cls._model_classes[name] = modelcls
+ return modelcls
+ return func
+
+ @classmethod
+ def from_model_architecture(cls, arch: str) -> type[Model]:
+ try:
+ return cls._model_classes[arch]
+ except KeyError:
+ raise NotImplementedError(f'Architecture {arch!r} not supported!') from None
+
+ # used for GPT-2 BPE and WordPiece vocabs
+ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
+ tokens: list[str] = []
+ toktypes: list[int] = []
+
+ from transformers import AutoTokenizer
+ tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
+ vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab))
+ assert max(tokenizer.vocab.values()) < vocab_size
+
+ tokpre = self.get_vocab_base_pre(tokenizer)
+
+ reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
+ added_vocab = tokenizer.get_added_vocab()
+
+ for i in range(vocab_size):
+ if i not in reverse_vocab:
+ tokens.append(f"[PAD{i}]")
+ toktypes.append(gguf.TokenType.USER_DEFINED)
+ elif reverse_vocab[i] in added_vocab:
+ tokens.append(reverse_vocab[i])
+ if tokenizer.added_tokens_decoder[i].special:
+ toktypes.append(gguf.TokenType.CONTROL)
+ else:
+ toktypes.append(gguf.TokenType.USER_DEFINED)
+ else:
+ tokens.append(reverse_vocab[i])
+ toktypes.append(gguf.TokenType.NORMAL)
+
+ return tokens, toktypes, tokpre
+
+ # NOTE: this function is generated by convert-hf-to-gguf-update.py
+ # do not modify it manually!
+ # ref: https://github.com/ggerganov/llama.cpp/pull/6920
+ # Marker: Start get_vocab_base_pre
+ def get_vocab_base_pre(self, tokenizer) -> str:
+ # encoding this string and hashing the resulting tokens would (hopefully) give us a unique identifier that
+ # is specific for the BPE pre-tokenizer used by the model
+ # we will use this unique identifier to write a "tokenizer.ggml.pre" entry in the GGUF file which we can
+ # use in llama.cpp to implement the same pre-tokenizer
+
+ chktxt = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶\u200d🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````""""......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
+
+ chktok = tokenizer.encode(chktxt)
+ chkhsh = sha256(str(chktok).encode()).hexdigest()
+
+ logger.debug(f"chktok: {chktok}")
+ logger.debug(f"chkhsh: {chkhsh}")
+
+ res = None
+
+ # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
+ # or pull the latest version of the model from Huggingface
+ # don't edit the hashes manually!
+ if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
+ # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
+ res = "llama-bpe"
+ if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754":
+ # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base
+ res = "deepseek-llm"
+ if chkhsh == "347715f544604f9118bb75ed199f68779f423cabb20db6de6f31b908d04d7821":
+ # ref: https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base
+ res = "deepseek-coder"
+ if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
+ # ref: https://huggingface.co/tiiuae/falcon-7b
+ res = "falcon"
+ if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
+ # ref: https://huggingface.co/BAAI/bge-small-en-v1.5
+ res = "bert-bge"
+ if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
+ # ref: https://huggingface.co/mosaicml/mpt-7b
+ res = "mpt"
+ if chkhsh == "35d91631860c815f952d711435f48d356ebac988362536bed955d43bfa436e34":
+ # ref: https://huggingface.co/bigcode/starcoder2-3b
+ res = "starcoder"
+ if chkhsh == "3ce83efda5659b07b1ad37ca97ca5797ea4285d9b9ab0dc679e4a720c9da7454":
+ # ref: https://huggingface.co/openai-community/gpt2
+ res = "gpt-2"
+ if chkhsh == "32d85c31273f8019248f2559fed492d929ea28b17e51d81d3bb36fff23ca72b3":
+ # ref: https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b
+ res = "stablelm2"
+ if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff":
+ # ref: https://huggingface.co/smallcloudai/Refact-1_6-base
+ res = "refact"
+ if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
+ # ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
+ res = "command-r"
+ if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
+ # ref: https://huggingface.co/Qwen/Qwen1.5-7B
+ res = "qwen2"
+ if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
+ # ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
+ res = "olmo"
+ if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
+ # ref: https://huggingface.co/databricks/dbrx-base
+ res = "dbrx"
+ if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
+ # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
+ res = "jina-v2-en"
+ if chkhsh == "171aeeedd6fb548d418a7461d053f11b6f1f1fc9b387bd66640d28a4b9f5c643":
+ # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-es
+ res = "jina-v2-es"
+ if chkhsh == "27949a2493fc4a9f53f5b9b029c82689cfbe5d3a1929bb25e043089e28466de6":
+ # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-de
+ res = "jina-v2-de"
+ if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d":
+ # ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct
+ res = "smaug-bpe"
+ if chkhsh == "c7ea5862a53e4272c035c8238367063e2b270d51faa48c0f09e9d5b54746c360":
+ # ref: https://huggingface.co/LumiOpen/Poro-34B-chat
+ res = "poro-chat"
+ if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
+ # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
+ res = "jina-v2-code"
+ if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee":
+ # ref: https://huggingface.co/LumiOpen/Viking-7B
+ res = "viking"
+ if chkhsh == "b53802fb28e26d645c3a310b34bfe07da813026ec7c7716883404d5e0f8b1901":
+ # ref: https://huggingface.co/core42/jais-13b
+ res = "jais"
+
+ if res is None:
+ logger.warning("\n")
+ logger.warning("**************************************************************************************")
+ logger.warning("** WARNING: The BPE pre-tokenizer was not recognized!")
+ logger.warning("** There are 2 possible reasons for this:")
+ logger.warning("** - the model has not been added to convert-hf-to-gguf-update.py yet")
+ logger.warning("** - the pre-tokenization config has changed upstream")
+ logger.warning("** Check your model files and convert-hf-to-gguf-update.py and update them accordingly.")
+ logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
+ logger.warning("**")
+ logger.warning(f"** chkhsh: {chkhsh}")
+ logger.warning("**************************************************************************************")
+ logger.warning("\n")
+ raise NotImplementedError("BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
+
+ logger.debug(f"tokenizer.ggml.pre: {repr(res)}")
+ logger.debug(f"chkhsh: {chkhsh}")
+
+ return res
+ # Marker: End get_vocab_base_pre
+
+ def _set_vocab_gpt2(self) -> None:
+ tokens, toktypes, tokpre = self.get_vocab_base()
+ self.gguf_writer.add_tokenizer_model("gpt2")
+ self.gguf_writer.add_tokenizer_pre(tokpre)
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_types(toktypes)
+
+ special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def _set_vocab_qwen(self):
+ dir_model = self.dir_model
+ hparams = self.hparams
+ tokens: list[str] = []
+ toktypes: list[int] = []
+
+ from transformers import AutoTokenizer
+ tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
+ vocab_size = hparams["vocab_size"]
+ assert max(tokenizer.get_vocab().values()) < vocab_size
+
+ tokpre = self.get_vocab_base_pre(tokenizer)
+
+ merges = []
+ vocab = {}
+ mergeable_ranks = tokenizer.mergeable_ranks
+ for token, rank in mergeable_ranks.items():
+ vocab[QwenModel.token_bytes_to_string(token)] = rank
+ if len(token) == 1:
+ continue
+ merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
+ assert len(merged) == 2
+ merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
+
+ # for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined
+ added_vocab = tokenizer.special_tokens
+ reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **added_vocab}.items()}
+
+ for i in range(vocab_size):
+ if i not in reverse_vocab:
+ tokens.append(f"[PAD{i}]")
+ toktypes.append(gguf.TokenType.USER_DEFINED)
+ elif reverse_vocab[i] in added_vocab:
+ tokens.append(reverse_vocab[i])
+ toktypes.append(gguf.TokenType.CONTROL)
+ else:
+ tokens.append(reverse_vocab[i])
+ toktypes.append(gguf.TokenType.NORMAL)
+
+ self.gguf_writer.add_tokenizer_model("gpt2")
+ self.gguf_writer.add_tokenizer_pre(tokpre)
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_types(toktypes)
+
+ special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
+ special_vocab.merges = merges
+ # only add special tokens when they were not already loaded from config.json
+ if len(special_vocab.special_token_ids) == 0:
+ special_vocab._set_special_token("bos", tokenizer.special_tokens["<|endoftext|>"])
+ special_vocab._set_special_token("eos", tokenizer.special_tokens["<|endoftext|>"])
+ # this one is usually not in config.json anyway
+ special_vocab._set_special_token("unk", tokenizer.special_tokens["<|endoftext|>"])
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def _set_vocab_sentencepiece(self, add_to_gguf=True):
+ tokens, scores, toktypes = self._create_vocab_sentencepiece()
+
+ self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_scores(scores)
+ self.gguf_writer.add_token_types(toktypes)
+
+ special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def _create_vocab_sentencepiece(self):
+ from sentencepiece import SentencePieceProcessor
+
+ tokenizer_path = self.dir_model / 'tokenizer.model'
+
+ tokens: list[bytes] = []
+ scores: list[float] = []
+ toktypes: list[int] = []
+
+ if not tokenizer_path.is_file():
+ raise FileNotFoundError(f"File not found: {tokenizer_path}")
+
+ tokenizer = SentencePieceProcessor()
+ tokenizer.LoadFromFile(str(tokenizer_path))
+
+ vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
+
+ tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
+ scores: list[float] = [-10000.0] * vocab_size
+ toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
+
+ for token_id in range(tokenizer.vocab_size()):
+ piece = tokenizer.IdToPiece(token_id)
+ text = piece.encode("utf-8")
+ score = tokenizer.GetScore(token_id)
+
+ toktype = SentencePieceTokenTypes.NORMAL
+ if tokenizer.IsUnknown(token_id):
+ toktype = SentencePieceTokenTypes.UNKNOWN
+ elif tokenizer.IsControl(token_id):
+ toktype = SentencePieceTokenTypes.CONTROL
+ elif tokenizer.IsUnused(token_id):
+ toktype = SentencePieceTokenTypes.UNUSED
+ elif tokenizer.IsByte(token_id):
+ toktype = SentencePieceTokenTypes.BYTE
+
+ tokens[token_id] = text
+ scores[token_id] = score
+ toktypes[token_id] = toktype
+
+ added_tokens_file = self.dir_model / 'added_tokens.json'
+ if added_tokens_file.is_file():
+ with open(added_tokens_file, "r", encoding="utf-8") as f:
+ added_tokens_json = json.load(f)
+ for key in added_tokens_json:
+ token_id = added_tokens_json[key]
+ if (token_id >= vocab_size):
+ logger.warning(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
+ continue
+
+ tokens[token_id] = key.encode("utf-8")
+ scores[token_id] = -1000.0
+ toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
+
+ if vocab_size > len(tokens):
+ pad_count = vocab_size - len(tokens)
+ logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
+ for i in range(1, pad_count + 1):
+ tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
+ scores.append(-1000.0)
+ toktypes.append(SentencePieceTokenTypes.UNUSED)
+
+ return tokens, scores, toktypes
+
+ def _set_vocab_llama_hf(self):
+ vocab = gguf.LlamaHfVocab(self.dir_model)
+ tokens = []
+ scores = []
+ toktypes = []
+
+ for text, score, toktype in vocab.all_tokens():
+ tokens.append(text)
+ scores.append(score)
+ toktypes.append(toktype)
+
+ assert len(tokens) == vocab.vocab_size
+
+ self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_scores(scores)
+ self.gguf_writer.add_token_types(toktypes)
+
+ special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+
+@Model.register("GPTNeoXForCausalLM")
+class GPTNeoXModel(Model):
+ model_arch = gguf.MODEL_ARCH.GPTNEOX
+
+ def set_gguf_parameters(self):
+ block_count = self.hparams["num_hidden_layers"]
+
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
+ self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
+ self.gguf_writer.add_rope_dimension_count(
+ int(self.hparams["rotary_pct"] * (self.hparams["hidden_size"] // self.hparams["num_attention_heads"])),
+ )
+ self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
+ self.gguf_writer.add_parallel_residual(self.hparams.get("use_parallel_residual", True))
+ self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
+ n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
+
+ tensors: list[tuple[str, Tensor]] = []
+
+ if re.match(r"gpt_neox\.layers\.\d+\.attention\.query_key_value\.weight", name):
+ # Map bloom-style qkv_linear to gpt-style qkv_linear
+ # bloom: https://github.com/huggingface/transformers/blob/main/src/transformers/models/bloom/modeling_bloom.py#L238-L252 # noqa
+ # gpt-2: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/modeling_gpt2.py#L312 # noqa
+ qkv_weights = data_torch.reshape((n_head, 3, n_embed // n_head, n_embed))
+ data_torch = torch.cat(
+ (
+ qkv_weights[:, 0, :, :].reshape((-1, n_embed)),
+ qkv_weights[:, 1, :, :].reshape((-1, n_embed)),
+ qkv_weights[:, 2, :, :].reshape((-1, n_embed)),
+ ),
+ dim=0,
+ )
+ logger.info("re-format attention.linear_qkv.weight")
+ elif re.match(r"gpt_neox\.layers\.\d+\.attention\.query_key_value\.bias", name):
+ qkv_bias = data_torch.reshape((n_head, 3, n_embed // n_head))
+ data_torch = torch.cat(
+ (
+ qkv_bias[:, 0, :].reshape((n_embed,)),
+ qkv_bias[:, 1, :].reshape((n_embed,)),
+ qkv_bias[:, 2, :].reshape((n_embed,)),
+ ),
+ dim=0,
+ )
+ logger.info("re-format attention.linear_qkv.bias")
+
+ tensors.append((self.map_tensor_name(name), data_torch))
+
+ return tensors
+
+
+@Model.register("BloomForCausalLM")
+class BloomModel(Model):
+ model_arch = gguf.MODEL_ARCH.BLOOM
+
+ def set_gguf_parameters(self):
+ self.gguf_writer.add_name("Bloom")
+ n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
+ n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
+ self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed))
+ self.gguf_writer.add_embedding_length(n_embed)
+ self.gguf_writer.add_feed_forward_length(4 * n_embed)
+ self.gguf_writer.add_block_count(self.hparams["n_layer"])
+ self.gguf_writer.add_head_count(n_head)
+ self.gguf_writer.add_head_count_kv(n_head)
+ self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
+ n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
+
+ name = re.sub(r'transformer\.', '', name)
+
+ tensors: list[tuple[str, Tensor]] = []
+
+ if re.match(r"h\.\d+\.self_attention\.query_key_value\.weight", name):
+ # Map bloom-style qkv_linear to gpt-style qkv_linear
+ # bloom: https://github.com/huggingface/transformers/blob/main/src/transformers/models/bloom/modeling_bloom.py#L238-L252 # noqa
+ # gpt-2: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/modeling_gpt2.py#L312 # noqa
+ qkv_weights = data_torch.reshape((n_head, 3, n_embed // n_head, n_embed))
+ data_torch = torch.cat(
+ (
+ qkv_weights[:, 0, :, :].reshape((-1, n_embed)),
+ qkv_weights[:, 1, :, :].reshape((-1, n_embed)),
+ qkv_weights[:, 2, :, :].reshape((-1, n_embed)),
+ ),
+ dim=0,
+ )
+ logger.info("re-format attention.linear_qkv.weight")
+ elif re.match(r"h\.\d+\.self_attention\.query_key_value\.bias", name):
+ qkv_bias = data_torch.reshape((n_head, 3, n_embed // n_head))
+ data_torch = torch.cat(
+ (
+ qkv_bias[:, 0, :].reshape((n_embed,)),
+ qkv_bias[:, 1, :].reshape((n_embed,)),
+ qkv_bias[:, 2, :].reshape((n_embed,)),
+ ),
+ dim=0,
+ )
+ logger.info("re-format attention.linear_qkv.bias")
+
+ tensors.append((self.map_tensor_name(name), data_torch))
+
+ if name == "word_embeddings.weight":
+ assert self.tensor_names is not None
+
+ # TODO: tie them at runtime, don't duplicate in the model file
+ if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")):
+ tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
+
+ return tensors
+
+
+@Model.register("MPTForCausalLM")
+class MPTModel(Model):
+ model_arch = gguf.MODEL_ARCH.MPT
+
+ def set_vocab(self):
+ try:
+ self._set_vocab_gpt2()
+ except Exception:
+ # Fallback for SEA-LION model
+ self._set_vocab_sentencepiece()
+ self.gguf_writer.add_add_bos_token(False)
+ self.gguf_writer.add_pad_token_id(3)
+ self.gguf_writer.add_eos_token_id(1)
+ self.gguf_writer.add_unk_token_id(0)
+
+ def set_gguf_parameters(self):
+ block_count = self.hparams["n_layers"]
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
+ self.gguf_writer.add_embedding_length(self.hparams["d_model"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_feed_forward_length(4 * self.hparams["d_model"])
+ self.gguf_writer.add_head_count(self.hparams["n_heads"])
+ if kv_n_heads := self.hparams["attn_config"].get("kv_n_heads"):
+ self.gguf_writer.add_head_count_kv(kv_n_heads)
+ self.gguf_writer.add_layer_norm_eps(1e-5)
+ if self.hparams["attn_config"]["clip_qkv"] is not None:
+ self.gguf_writer.add_clamp_kqv(self.hparams["attn_config"]["clip_qkv"])
+ if self.hparams["attn_config"]["alibi"]:
+ self.gguf_writer.add_max_alibi_bias(self.hparams["attn_config"]["alibi_bias_max"])
+ else:
+ self.gguf_writer.add_max_alibi_bias(0.0)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ if "scales" in name:
+ new_name = self.map_tensor_name(name, try_suffixes=(".weight", ".bias", ".scales"))
+ new_name = new_name.replace("scales", "act.scales")
+ else:
+ new_name = self.map_tensor_name(name, try_suffixes=(".weight", ".bias"))
+
+ return [(new_name, data_torch)]
+
+
+@Model.register("OrionForCausalLM")
+class OrionModel(Model):
+ model_arch = gguf.MODEL_ARCH.ORION
+
+ def set_vocab(self):
+ self._set_vocab_sentencepiece()
+
+ def set_gguf_parameters(self):
+ block_count = self.hparams["num_hidden_layers"]
+ head_count = self.hparams["num_attention_heads"]
+ head_count_kv = self.hparams.get("num_key_value_heads", head_count)
+ hf_repo = self.hparams.get("_name_or_path", "")
+
+ ctx_length = 0
+ if "max_sequence_length" in self.hparams:
+ ctx_length = self.hparams["max_sequence_length"]
+ elif "max_position_embeddings" in self.hparams:
+ ctx_length = self.hparams["max_position_embeddings"]
+ elif "model_max_length" in self.hparams:
+ ctx_length = self.hparams["model_max_length"]
+ else:
+ raise ValueError("gguf: can not find ctx length parameter.")
+
+ self.gguf_writer.add_file_type(self.ftype)
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_source_hf_repo(hf_repo)
+ self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
+ self.gguf_writer.add_context_length(ctx_length)
+ self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
+ self.gguf_writer.add_head_count(head_count)
+ self.gguf_writer.add_head_count_kv(head_count_kv)
+ # note: config provides rms norm but it is actually layer norm
+ # ref: https://huggingface.co/OrionStarAI/Orion-14B-Chat/blob/276a17221ce42beb45f66fac657a41540e71f4f5/modeling_orion.py#L570-L571
+ self.gguf_writer.add_layer_norm_eps(self.hparams["rms_norm_eps"])
+
+
+@Model.register("BaichuanForCausalLM", "BaiChuanForCausalLM")
+class BaichuanModel(Model):
+ model_arch = gguf.MODEL_ARCH.BAICHUAN
+
+ def set_vocab(self):
+ self._set_vocab_sentencepiece()
+
+ def set_gguf_parameters(self):
+ block_count = self.hparams["num_hidden_layers"]
+ head_count = self.hparams["num_attention_heads"]
+ head_count_kv = self.hparams.get("num_key_value_heads", head_count)
+ hf_repo = self.hparams.get("_name_or_path", "")
+
+ ctx_length = 0
+ if "max_sequence_length" in self.hparams:
+ ctx_length = self.hparams["max_sequence_length"]
+ elif "max_position_embeddings" in self.hparams:
+ ctx_length = self.hparams["max_position_embeddings"]
+ elif "model_max_length" in self.hparams:
+ ctx_length = self.hparams["model_max_length"]
+ else:
+ raise ValueError("gguf: can not find ctx length parameter.")
+
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_source_hf_repo(hf_repo)
+ self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
+ self.gguf_writer.add_context_length(ctx_length)
+ self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
+ self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
+ self.gguf_writer.add_head_count(head_count)
+ self.gguf_writer.add_head_count_kv(head_count_kv)
+ self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
+ if self.hparams["rope_scaling"].get("type") == "linear":
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
+ self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ head_count = self.hparams["num_attention_heads"]
+ head_count_kv = self.hparams.get("num_key_value_heads", head_count)
+
+ tensors: list[tuple[str, Tensor]] = []
+
+ if bid is not None and name == f"model.layers.{bid}.self_attn.W_pack.weight":
+ logger.info(f"Unpacking and permuting layer {bid}")
+ tensors = [
+ (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid),
+ self._reverse_hf_permute_part(data_torch, 0, head_count, head_count)),
+ (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid),
+ self._reverse_hf_permute_part(data_torch, 1, head_count, head_count_kv)),
+ (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid),
+ self._reverse_hf_part(data_torch, 2)),
+ ]
+ else:
+ tensors = [(self.map_tensor_name(name), data_torch)]
+
+ return tensors
+
+ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
+ if n_kv_head is not None and n_head != n_kv_head:
+ n_head //= n_kv_head
+
+ return (
+ weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
+ .swapaxes(1, 2)
+ .reshape(weights.shape)
+ )
+
+ def _reverse_hf_permute_part(
+ self, weights: Tensor, n_part: int, n_head: int, n_head_kv: int | None = None,
+ ) -> Tensor:
+ r = weights.shape[0] // 3
+ return self._reverse_hf_permute(weights[r * n_part:r * n_part + r, ...], n_head, n_head_kv)
+
+ def _reverse_hf_part(self, weights: Tensor, n_part: int) -> Tensor:
+ r = weights.shape[0] // 3
+ return weights[r * n_part:r * n_part + r, ...]
+
+
+@Model.register("XverseForCausalLM")
+class XverseModel(Model):
+ model_arch = gguf.MODEL_ARCH.XVERSE
+
+ def set_vocab(self):
+ assert (self.dir_model / "tokenizer.json").is_file()
+ dir_model = self.dir_model
+ hparams = self.hparams
+
+ tokens: list[bytes] = []
+ toktypes: list[int] = []
+
+ from transformers import AutoTokenizer
+ tokenizer = AutoTokenizer.from_pretrained(dir_model)
+ vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
+ # Since we are checking the maximum index, we need to ensure it's strictly less than vocab_size,
+ # because vocab_size is the count of items, and indexes start at 0.
+ max_vocab_index = max(tokenizer.get_vocab().values())
+ if max_vocab_index >= vocab_size:
+ raise ValueError("Vocabulary size exceeds expected maximum size.")
+
+ reverse_vocab: dict[int, str] = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
+ added_vocab = tokenizer.get_added_vocab()
+
+ for token_id in range(vocab_size):
+ token_text = reverse_vocab[token_id].encode('utf-8')
+ # replace "\x00" to string with length > 0
+ if token_text == b"\x00":
+ toktype = gguf.TokenType.BYTE # special
+ token_text = f"<{token_text}>".encode('utf-8')
+ elif re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
+ toktype = gguf.TokenType.BYTE # special
+ elif reverse_vocab[token_id] in added_vocab:
+ if tokenizer.added_tokens_decoder[token_id].special:
+ toktype = gguf.TokenType.CONTROL
+ else:
+ toktype = gguf.TokenType.USER_DEFINED
+ else:
+ toktype = gguf.TokenType.NORMAL
+
+ tokens.append(token_text)
+ toktypes.append(toktype)
+
+ self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_types(toktypes)
+
+ special_vocab = gguf.SpecialVocab(dir_model, n_vocab=len(tokens))
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def set_gguf_parameters(self):
+ block_count = self.hparams["num_hidden_layers"]
+ head_count = self.hparams["num_attention_heads"]
+ head_count_kv = self.hparams.get("num_key_value_heads", head_count)
+ hf_repo = self.hparams.get("_name_or_path", "")
+
+ ctx_length = 0
+ if "max_sequence_length" in self.hparams:
+ ctx_length = self.hparams["max_sequence_length"]
+ elif "max_position_embeddings" in self.hparams:
+ ctx_length = self.hparams["max_position_embeddings"]
+ elif "model_max_length" in self.hparams:
+ ctx_length = self.hparams["model_max_length"]
+ else:
+ raise ValueError("gguf: can not find ctx length parameter.")
+
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_source_hf_repo(hf_repo)
+ self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
+ self.gguf_writer.add_context_length(ctx_length)
+ self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
+ self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
+ self.gguf_writer.add_head_count(head_count)
+ self.gguf_writer.add_head_count_kv(head_count_kv)
+ self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
+ if self.hparams["rope_scaling"].get("type") == "linear":
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
+ self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ head_count = self.hparams["num_attention_heads"]
+ head_count_kv = self.hparams.get("num_key_value_heads", head_count)
+
+ # HF models permute some of the tensors, so we need to undo that
+ if name.endswith("q_proj.weight"):
+ data_torch = self._reverse_hf_permute(data_torch, head_count, head_count)
+ if name.endswith("k_proj.weight"):
+ data_torch = self._reverse_hf_permute(data_torch, head_count, head_count_kv)
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
+ if n_kv_head is not None and n_head != n_kv_head:
+ n_head //= n_kv_head
+
+ return (
+ weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
+ .swapaxes(1, 2)
+ .reshape(weights.shape)
+ )
+
+
+@Model.register("FalconForCausalLM", "RWForCausalLM")
+class FalconModel(Model):
+ model_arch = gguf.MODEL_ARCH.FALCON
+
+ def set_gguf_parameters(self):
+ block_count = self.hparams.get("num_hidden_layers")
+ if block_count is None:
+ block_count = self.hparams["n_layer"] # old name
+
+ n_head = self.hparams.get("num_attention_heads")
+ if n_head is None:
+ n_head = self.hparams["n_head"] # old name
+
+ n_head_kv = self.hparams.get("num_kv_heads")
+ if n_head_kv is None:
+ n_head_kv = self.hparams.get("n_head_kv", 1) # old name
+
+ self.gguf_writer.add_name("Falcon")
+ self.gguf_writer.add_context_length(2048) # not in config.json
+ self.gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
+ self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
+ self.gguf_writer.add_feed_forward_length(4 * self.hparams["hidden_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_head_count(n_head)
+ self.gguf_writer.add_head_count_kv(n_head_kv)
+ self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ # QKV tensor transform
+ # The original query_key_value tensor contains n_head_kv "kv groups",
+ # each consisting of n_head/n_head_kv query weights followed by one key
+ # and one value weight (shared by all query heads in the kv group).
+ # This layout makes it a big pain to work with in GGML.
+ # So we rearrange them here,, so that we have n_head query weights
+ # followed by n_head_kv key weights followed by n_head_kv value weights,
+ # in contiguous fashion.
+ # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-ggml.py
+
+ if "query_key_value" in name:
+ n_head = self.find_hparam(["num_attention_heads", "n_head"])
+ n_head_kv = self.find_hparam(["num_kv_heads", "n_head_kv"], optional=True) or 1
+ head_dim = self.hparams["hidden_size"] // n_head
+
+ qkv = data_torch.view(n_head_kv, n_head // n_head_kv + 2, head_dim, head_dim * n_head)
+ q = qkv[:, :-2].reshape(n_head * head_dim, head_dim * n_head)
+ k = qkv[:, [-2]].reshape(n_head_kv * head_dim, head_dim * n_head)
+ v = qkv[:, [-1]].reshape(n_head_kv * head_dim, head_dim * n_head)
+ data_torch = torch.cat((q, k, v)).reshape_as(data_torch)
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+
+@Model.register("GPTBigCodeForCausalLM")
+class StarCoderModel(Model):
+ model_arch = gguf.MODEL_ARCH.STARCODER
+
+ def set_gguf_parameters(self):
+ block_count = self.hparams["n_layer"]
+
+ self.gguf_writer.add_name("StarCoder")
+ self.gguf_writer.add_context_length(self.hparams["n_positions"])
+ self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
+ self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_head_count(self.hparams["n_head"])
+ self.gguf_writer.add_head_count_kv(1)
+ self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+
+@Model.register("GPTRefactForCausalLM")
+class RefactModel(Model):
+ model_arch = gguf.MODEL_ARCH.REFACT
+
+ def set_vocab(self):
+ super().set_vocab()
+
+ # TODO: how to determine special FIM tokens automatically?
+ special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False,
+ special_token_types = ['prefix', 'suffix', 'middle', 'fsep', 'eot'])
+ special_vocab._set_special_token("prefix", 1)
+ special_vocab._set_special_token("suffix", 3)
+ special_vocab._set_special_token("middle", 2)
+ special_vocab._set_special_token("fsep", 4) # is this correct?
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def set_gguf_parameters(self):
+ hidden_dim = self.hparams["n_embd"]
+ inner_dim = 4 * hidden_dim
+ hidden_dim = int(2 * inner_dim / 3)
+ multiple_of = 256
+ ff_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
+
+ block_count = self.hparams["n_layer"]
+
+ self.gguf_writer.add_name("Refact")
+ # refact uses Alibi. So this is from config.json which might be used by training.
+ self.gguf_writer.add_context_length(self.hparams["n_positions"])
+ self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
+
+ self.gguf_writer.add_feed_forward_length(ff_dim)
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_head_count(self.hparams["n_head"])
+ self.gguf_writer.add_head_count_kv(1)
+ self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ hidden_dim = self.hparams["n_embd"]
+ inner_dim = 4 * hidden_dim
+ hidden_dim = int(2 * inner_dim / 3)
+ multiple_of = 256
+ ff_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
+ n_head = self.hparams["n_head"]
+ n_head_kv = 1
+ head_dim = self.hparams["n_embd"] // n_head
+
+ tensors: list[tuple[str, Tensor]] = []
+
+ if bid is not None:
+ if name == f"transformer.h.{bid}.attn.kv.weight":
+ tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), data_torch[:n_head_kv * head_dim]))
+ tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), data_torch[n_head_kv * head_dim:]))
+ elif name == f"transformer.h.{bid}.attn.q.weight":
+ tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), data_torch))
+ elif name == f"transformer.h.{bid}.mlp.gate_up_proj.weight":
+ tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), data_torch[:ff_dim]))
+ tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), data_torch[ff_dim:]))
+
+ if len(tensors) == 0:
+ tensors.append((self.map_tensor_name(name), data_torch))
+
+ return tensors
+
+
+@Model.register("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM")
+class StableLMModel(Model):
+ model_arch = gguf.MODEL_ARCH.STABLELM
+
+ def set_vocab(self):
+ if (self.dir_model / "tokenizer.json").is_file():
+ self._set_vocab_gpt2()
+ else:
+ # StableLM 2 1.6B uses a vocab in a similar format to Qwen's vocab
+ self._set_vocab_qwen()
+
+ def set_gguf_parameters(self):
+ hparams = self.hparams
+ block_count = hparams["num_hidden_layers"]
+
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
+ self.gguf_writer.add_embedding_length(hparams["hidden_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
+ rotary_factor = self.find_hparam(["partial_rotary_factor", "rope_pct"])
+ self.gguf_writer.add_rope_dimension_count(int(rotary_factor * (hparams["hidden_size"] // hparams["num_attention_heads"])))
+ self.gguf_writer.add_head_count(hparams["num_attention_heads"])
+ self.gguf_writer.add_head_count_kv(hparams["num_key_value_heads"])
+ self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True)
+ self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))
+ self.gguf_writer.add_file_type(self.ftype)
+
+ _q_norms: list[dict[str, Tensor]] | None = None
+ _k_norms: list[dict[str, Tensor]] | None = None
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ n_head = self.hparams["num_attention_heads"]
+ n_kv_head = self.hparams["num_key_value_heads"]
+
+ if name.find("q_layernorm.norms") != -1:
+ assert bid is not None
+
+ if self._q_norms is None:
+ self._q_norms = [{} for _ in range(self.block_count)]
+
+ self._q_norms[bid][name] = data_torch
+
+ if len(self._q_norms[bid]) >= n_head:
+ return self._stack_qk_norm(bid, n_head, self._q_norms[bid], "q_layernorm")
+ else:
+ return []
+
+ if name.find("k_layernorm.norms") != -1:
+ assert bid is not None
+
+ if self._k_norms is None:
+ self._k_norms = [{} for _ in range(self.block_count)]
+
+ self._k_norms[bid][name] = data_torch
+
+ if len(self._k_norms[bid]) >= n_kv_head:
+ return self._stack_qk_norm(bid, n_kv_head, self._k_norms[bid], "k_layernorm")
+ else:
+ return []
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+ def _stack_qk_norm(self, bid: int, n_head: int, norms: dict[str, Tensor], layer_name: str = "q_layernorm"):
+ datas: list[Tensor] = []
+ # extract the norms in order
+ for xid in range(n_head):
+ ename = f"model.layers.{bid}.self_attn.{layer_name}.norms.{xid}.weight"
+ datas.append(norms[ename])
+ del norms[ename]
+ data_torch = torch.stack(datas, dim=0)
+
+ merged_name = f"model.layers.{bid}.self_attn.{layer_name}.weight"
+ new_name = self.map_tensor_name(merged_name)
+
+ return [(new_name, data_torch)]
+
+ def write_tensors(self):
+ super().write_tensors()
+
+ if self._q_norms is not None or self._k_norms is not None:
+ # flatten two `list[dict[str, Tensor]]` into a single `list[str]`
+ norms = (
+ [k for d in self._q_norms for k in d.keys()] if self._q_norms is not None else []
+ ) + (
+ [k for d in self._k_norms for k in d.keys()] if self._k_norms is not None else []
+ )
+ if len(norms) > 0:
+ raise ValueError(f"Unprocessed norms: {norms}")
+
+
+@Model.register("LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
+class LlamaModel(Model):
+ model_arch = gguf.MODEL_ARCH.LLAMA
+
+ def set_vocab(self):
+ try:
+ self. _set_vocab_sentencepiece()
+ except FileNotFoundError:
+ try:
+ self._set_vocab_llama_hf()
+ except (FileNotFoundError, TypeError):
+ # Llama 3
+ self._set_vocab_gpt2()
+
+ # Apply to CodeLlama only (and ignore for Llama 3 with a vocab size of 128256)
+ if self.hparams.get("vocab_size", 32000) == 32016:
+ special_vocab = gguf.SpecialVocab(
+ self.dir_model, load_merges=False,
+ special_token_types = ['prefix', 'suffix', 'middle', 'eot']
+ )
+ special_vocab._set_special_token("prefix", 32007)
+ special_vocab._set_special_token("suffix", 32008)
+ special_vocab._set_special_token("middle", 32009)
+ special_vocab._set_special_token("eot", 32010)
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ hparams = self.hparams
+ self.gguf_writer.add_vocab_size(hparams["vocab_size"])
+ self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
+
+ if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
+ if self.hparams["rope_scaling"].get("type") == "linear":
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
+ self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
+
+ tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
+ if tokenizer_config_file.is_file():
+ with open(tokenizer_config_file, "r", encoding="utf-8") as f:
+ tokenizer_config_json = json.load(f)
+ if "add_prefix_space" in tokenizer_config_json:
+ self.gguf_writer.add_add_space_prefix(tokenizer_config_json["add_prefix_space"])
+
+ # Apply to granite small models only
+ if self.hparams.get("vocab_size", 32000) == 49152:
+ self.gguf_writer.add_add_bos_token(False)
+
+ @staticmethod
+ def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
+ if n_head_kv is not None and n_head != n_head_kv:
+ n_head = n_head_kv
+ return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
+ .swapaxes(1, 2)
+ .reshape(weights.shape))
+
+ _experts: list[dict[str, Tensor]] | None = None
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ n_head = self.hparams["num_attention_heads"]
+ n_kv_head = self.hparams.get("num_key_value_heads")
+
+ if name.endswith(("q_proj.weight", "q_proj.bias")):
+ data_torch = LlamaModel.permute(data_torch, n_head, n_head)
+ if name.endswith(("k_proj.weight", "k_proj.bias")):
+ data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
+
+ # process the experts separately
+ if name.find("block_sparse_moe.experts") != -1:
+ n_experts = self.hparams["num_local_experts"]
+
+ assert bid is not None
+
+ if self._experts is None:
+ self._experts = [{} for _ in range(self.block_count)]
+
+ self._experts[bid][name] = data_torch
+
+ if len(self._experts[bid]) >= n_experts * 3:
+ tensors: list[tuple[str, Tensor]] = []
+
+ # merge the experts into a single 3d tensor
+ for wid in ["w1", "w2", "w3"]:
+ datas: list[Tensor] = []
+
+ for xid in range(n_experts):
+ ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.{wid}.weight"
+ datas.append(self._experts[bid][ename])
+ del self._experts[bid][ename]
+
+ data_torch = torch.stack(datas, dim=0)
+
+ merged_name = f"layers.{bid}.feed_forward.experts.{wid}.weight"
+
+ new_name = self.map_tensor_name(merged_name)
+
+ tensors.append((new_name, data_torch))
+ return tensors
+ else:
+ return []
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+ def write_tensors(self):
+ super().write_tensors()
+
+ if self._experts is not None:
+ # flatten `list[dict[str, Tensor]]` into `list[str]`
+ experts = [k for d in self._experts for k in d.keys()]
+ if len(experts) > 0:
+ raise ValueError(f"Unprocessed experts: {experts}")
+
+
+@Model.register("BitnetForCausalLM")
+class BitnetModel(Model):
+ model_arch = gguf.MODEL_ARCH.BITNET
+
+ def set_vocab(self):
+ self._set_vocab_sentencepiece()
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
+ self.gguf_writer.add_rope_scaling_factor(1.0)
+
+ def weight_quant(self, weight):
+ dtype = weight.dtype
+ weight = weight.float()
+ s = 1 / weight.abs().mean().clamp(min=1e-5)
+ weight = (weight * s).round().clamp(-1, 1) / s
+ scale = weight.abs().max().unsqueeze(0)
+ weight = torch.where(weight.abs().less(1e-6), 0, weight).type(dtype)
+ weight = torch.sign(weight).type(dtype)
+ return weight.type(dtype), scale.type(torch.float32)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ new_name = self.map_tensor_name(name)
+
+ if any(self.match_model_tensor_name(new_name, key, bid) for key in [
+ gguf.MODEL_TENSOR.ATTN_Q,
+ gguf.MODEL_TENSOR.ATTN_K,
+ gguf.MODEL_TENSOR.ATTN_V,
+ gguf.MODEL_TENSOR.ATTN_OUT,
+ gguf.MODEL_TENSOR.FFN_UP,
+ gguf.MODEL_TENSOR.FFN_DOWN,
+ gguf.MODEL_TENSOR.FFN_GATE,
+ ]):
+ # transform weight into 1/0/-1 (in fp32)
+ weight_torch, scale_torch = self.weight_quant(data_torch)
+ yield (new_name, weight_torch)
+ yield (new_name.removesuffix(".weight") + ".scale", scale_torch)
+ else:
+ yield (new_name, data_torch)
+
+
+@Model.register("GrokForCausalLM")
+class GrokModel(Model):
+ model_arch = gguf.MODEL_ARCH.GROK
+
+ def set_vocab(self):
+ self._set_vocab_sentencepiece()
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ self.gguf_writer.add_name("Grok")
+
+ _experts: list[dict[str, Tensor]] | None = None
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ # process the experts separately
+ if name.find(".moe.") != -1:
+ n_experts = self.hparams["num_local_experts"]
+
+ assert bid is not None
+
+ if self._experts is None:
+ self._experts = [{} for _ in range(self.block_count)]
+
+ self._experts[bid][name] = data_torch
+
+ if len(self._experts[bid]) >= n_experts * 3:
+ tensors: list[tuple[str, Tensor]] = []
+
+ # merge the experts into a single 3d tensor
+ for wid in ["linear", "linear_1", "linear_v"]:
+ datas: list[Tensor] = []
+
+ for xid in range(n_experts):
+ ename = f"transformer.decoder_layer.{bid}.moe.{xid}.{wid}.weight"
+ datas.append(self._experts[bid][ename])
+ del self._experts[bid][ename]
+
+ data_torch = torch.stack(datas, dim=0)
+
+ merged_name = f"transformer.decoder_layer.{bid}.moe.{wid}.weight"
+
+ new_name = self.map_tensor_name(merged_name)
+
+ tensors.append((new_name, data_torch))
+ return tensors
+ else:
+ return []
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+
+@Model.register("DbrxForCausalLM")
+class DbrxModel(Model):
+ model_arch = gguf.MODEL_ARCH.DBRX
+
+ def set_gguf_parameters(self):
+ ffn_config = self.hparams["ffn_config"]
+ attn_config = self.hparams["attn_config"]
+ self.gguf_writer.add_name(self.hparams["model_type"])
+ self.gguf_writer.add_block_count(self.hparams["n_layers"])
+
+ self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
+ self.gguf_writer.add_embedding_length(self.hparams["d_model"])
+ self.gguf_writer.add_feed_forward_length(ffn_config["ffn_hidden_size"])
+
+ self.gguf_writer.add_head_count(self.hparams["n_heads"])
+ self.gguf_writer.add_head_count_kv(attn_config["kv_n_heads"])
+
+ self.gguf_writer.add_rope_freq_base(attn_config["rope_theta"])
+
+ self.gguf_writer.add_clamp_kqv(attn_config["clip_qkv"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ self.gguf_writer.add_expert_count(ffn_config["moe_num_experts"])
+ self.gguf_writer.add_expert_used_count(ffn_config["moe_top_k"])
+
+ self.gguf_writer.add_layer_norm_eps(1e-5)
+
+ self.gguf_writer.add_file_type(self.ftype)
+ logger.info(f"gguf: file type = {self.ftype}")
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ n_expert = self.hparams["ffn_config"]["moe_num_experts"]
+ n_ff = self.hparams["ffn_config"]["ffn_hidden_size"]
+ n_embd = self.hparams["d_model"]
+
+ # Specific behavior for experts tensors: suffix .weight, view as 3D and transpose
+ # original implementation expects (n_expert, n_ff, n_embd) for all experts weights
+ # But llama.cpp moe graph works differently
+ # AND the dimensions in ggml are typically in the reverse order of the pytorch dimensions
+ # so (n_expert, n_ff, n_embd) in pytorch is {n_embd, n_ff, n_expert} in ggml_tensor
+ exp_tensor_names = {"ffn.experts.mlp.w1": None, # LLM_TENSOR_FFN_GATE_EXPS ggml_tensor->ne{n_embd, n_ff, n_expert}
+ "ffn.experts.mlp.w2": (0, 2, 1), # LLM_TENSOR_FFN_DOWN_EXPS ggml_tensor->ne{n_ff, n_embd, n_expert}
+ "ffn.experts.mlp.v1": None} # LLM_TENSOR_FFN_UP_EXPS ggml_tensor->ne{n_embd, n_ff, n_expert}
+ experts = False
+
+ for exp_tensor_name in exp_tensor_names.keys():
+ if name.find(exp_tensor_name) != -1 and name.find(".weight") == -1:
+ experts = True
+ data_torch = data_torch.view(n_expert, n_ff, n_embd)
+ if (permute_tensor := exp_tensor_names[exp_tensor_name]) is not None:
+ data_torch = data_torch.permute(*permute_tensor)
+ break
+
+ # map tensor names
+ # In MoE models the ffn tensors are typically most of the model weights,
+ # and need to be quantizable. Quantize expects tensor names to be suffixed by .weight.
+ # Every other model has the weight names ending in .weight,
+ # let's assume that is the convention which is not the case for dbrx:
+ # https://huggingface.co/databricks/dbrx-instruct/blob/main/model.safetensors.index.json#L15
+ new_name = self.map_tensor_name(name if not experts else name + ".weight", try_suffixes=(".weight",))
+
+ return [(new_name, data_torch)]
+
+ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
+ del name, new_name, bid # unused
+
+ return n_dims > 1
+
+
+@Model.register("MiniCPMForCausalLM")
+class MiniCPMModel(Model):
+ model_arch = gguf.MODEL_ARCH.MINICPM
+
+ def set_gguf_parameters(self):
+ block_count = self.hparams["num_hidden_layers"]
+ self.gguf_writer.add_name("MiniCPM")
+ self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
+ self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
+ self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
+ self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
+ self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
+ self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ def set_vocab(self):
+ self._set_vocab_llama_hf()
+
+ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
+ if n_kv_head is not None and n_head != n_kv_head:
+ n_head //= n_kv_head
+
+ return (
+ weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
+ .swapaxes(1, 2)
+ .reshape(weights.shape)
+ )
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ n_head = self.hparams["num_attention_heads"]
+ n_kv_head = self.hparams.get("num_key_value_heads")
+
+ # HF models permute some of the tensors, so we need to undo that
+ if name.endswith(("q_proj.weight")):
+ data_torch = self._reverse_hf_permute(data_torch, n_head, n_head)
+ if name.endswith(("k_proj.weight")):
+ data_torch = self._reverse_hf_permute(data_torch, n_head, n_kv_head)
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+
+@Model.register("QWenLMHeadModel")
+class QwenModel(Model):
+ model_arch = gguf.MODEL_ARCH.QWEN
+
+ @staticmethod
+ def token_bytes_to_string(b):
+ from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
+ byte_encoder = bytes_to_unicode()
+ return ''.join([byte_encoder[ord(char)] for char in b.decode('latin-1')])
+
+ @staticmethod
+ def bpe(mergeable_ranks: dict[bytes, int], token: bytes, max_rank: int | None = None) -> list[bytes]:
+ parts = [bytes([b]) for b in token]
+ while True:
+ min_idx = None
+ min_rank = None
+ for i, pair in enumerate(zip(parts[:-1], parts[1:])):
+ rank = mergeable_ranks.get(pair[0] + pair[1])
+ if rank is not None and (min_rank is None or rank < min_rank):
+ min_idx = i
+ min_rank = rank
+ if min_rank is None or (max_rank is not None and min_rank >= max_rank):
+ break
+ assert min_idx is not None
+ parts = parts[:min_idx] + [parts[min_idx] + parts[min_idx + 1]] + parts[min_idx + 2:]
+ return parts
+
+ def set_vocab(self):
+ self._set_vocab_qwen()
+
+ def set_gguf_parameters(self):
+ self.gguf_writer.add_name("Qwen")
+ self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
+ self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"])
+ self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
+ self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
+ self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
+ self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
+ self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
+ self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+
+@Model.register("Qwen2ForCausalLM")
+class Qwen2Model(Model):
+ model_arch = gguf.MODEL_ARCH.QWEN2
+
+ def set_vocab(self):
+ try:
+ self._set_vocab_sentencepiece()
+ except FileNotFoundError:
+ self._set_vocab_gpt2()
+
+
+@Model.register("Qwen2MoeForCausalLM")
+class Qwen2MoeModel(Model):
+ model_arch = gguf.MODEL_ARCH.QWEN2MOE
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ if (n_experts := self.hparams.get("num_experts")) is not None:
+ self.gguf_writer.add_expert_count(n_experts)
+ if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None:
+ self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
+ logger.info(f"gguf: expert feed forward length = {moe_intermediate_size}")
+ if (shared_expert_intermediate_size := self.hparams.get('shared_expert_intermediate_size')) is not None:
+ self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size)
+ logger.info(f"gguf: expert shared feed forward length = {shared_expert_intermediate_size}")
+
+ _experts: list[dict[str, Tensor]] | None = None
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ # process the experts separately
+ if name.find("experts") != -1:
+ n_experts = self.hparams["num_experts"]
+ assert bid is not None
+
+ if self._experts is None:
+ self._experts = [{} for _ in range(self.block_count)]
+
+ self._experts[bid][name] = data_torch
+
+ if len(self._experts[bid]) >= n_experts * 3:
+ tensors: list[tuple[str, Tensor]] = []
+
+ # merge the experts into a single 3d tensor
+ for w_name in ["down_proj", "gate_proj", "up_proj"]:
+ datas: list[Tensor] = []
+
+ for xid in range(n_experts):
+ ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
+ datas.append(self._experts[bid][ename])
+ del self._experts[bid][ename]
+
+ data_torch = torch.stack(datas, dim=0)
+
+ merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
+
+ new_name = self.map_tensor_name(merged_name)
+
+ tensors.append((new_name, data_torch))
+ return tensors
+ else:
+ return []
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+ def write_tensors(self):
+ super().write_tensors()
+
+ if self._experts is not None:
+ # flatten `list[dict[str, Tensor]]` into `list[str]`
+ experts = [k for d in self._experts for k in d.keys()]
+ if len(experts) > 0:
+ raise ValueError(f"Unprocessed experts: {experts}")
+
+
+@Model.register("GPT2LMHeadModel")
+class GPT2Model(Model):
+ model_arch = gguf.MODEL_ARCH.GPT2
+
+ def set_gguf_parameters(self):
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_block_count(self.hparams["n_layer"])
+ self.gguf_writer.add_context_length(self.hparams["n_ctx"])
+ self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
+ self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
+ self.gguf_writer.add_head_count(self.hparams["n_head"])
+ self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ tensors: list[tuple[str, Tensor]] = []
+
+ # we don't need these
+ if name.endswith((".attn.bias", ".attn.masked_bias")):
+ return tensors
+
+ if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_proj.weight")):
+ data_torch = data_torch.transpose(1, 0)
+
+ new_name = self.map_tensor_name(name)
+
+ tensors.append((new_name, data_torch))
+
+ # note: GPT2 output is tied to (same as) wte in original model
+ if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
+ tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
+
+ return tensors
+
+
+@Model.register("PhiForCausalLM")
+class Phi2Model(Model):
+ model_arch = gguf.MODEL_ARCH.PHI2
+
+ def set_gguf_parameters(self):
+ block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
+
+ rot_pct = self.find_hparam(["partial_rotary_factor"])
+ n_embd = self.find_hparam(["hidden_size", "n_embd"])
+ n_head = self.find_hparam(["num_attention_heads", "n_head"])
+
+ self.gguf_writer.add_name("Phi2")
+ self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
+
+ self.gguf_writer.add_embedding_length(n_embd)
+ self.gguf_writer.add_feed_forward_length(4 * n_embd)
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_head_count(n_head)
+ self.gguf_writer.add_head_count_kv(n_head)
+ self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_epsilon", "layer_norm_eps"]))
+ self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
+ self.gguf_writer.add_file_type(self.ftype)
+ self.gguf_writer.add_add_bos_token(False)
+
+
+@Model.register("Phi3ForCausalLM")
+class Phi3MiniModel(Model):
+ model_arch = gguf.MODEL_ARCH.PHI3
+
+ def set_vocab(self):
+ from sentencepiece import SentencePieceProcessor
+
+ tokenizer_path = self.dir_model / 'tokenizer.model'
+
+ if not tokenizer_path.is_file():
+ raise ValueError(f'Error: Missing {tokenizer_path}')
+
+ tokenizer = SentencePieceProcessor()
+ tokenizer.LoadFromFile(str(tokenizer_path))
+
+ vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
+
+ tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
+ scores: list[float] = [-10000.0] * vocab_size
+ toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
+
+ for token_id in range(tokenizer.vocab_size()):
+
+ piece = tokenizer.IdToPiece(token_id)
+ text = piece.encode("utf-8")
+ score = tokenizer.GetScore(token_id)
+
+ toktype = SentencePieceTokenTypes.NORMAL
+ if tokenizer.IsUnknown(token_id):
+ toktype = SentencePieceTokenTypes.UNKNOWN
+ elif tokenizer.IsControl(token_id):
+ toktype = SentencePieceTokenTypes.CONTROL
+ elif tokenizer.IsUnused(token_id):
+ toktype = SentencePieceTokenTypes.UNUSED
+ elif tokenizer.IsByte(token_id):
+ toktype = SentencePieceTokenTypes.BYTE
+
+ tokens[token_id] = text
+ scores[token_id] = score
+ toktypes[token_id] = toktype
+
+ added_tokens_file = self.dir_model / 'added_tokens.json'
+ if added_tokens_file.is_file():
+ with open(added_tokens_file, "r", encoding="utf-8") as f:
+ added_tokens_json = json.load(f)
+
+ for key in added_tokens_json:
+ token_id = added_tokens_json[key]
+ if (token_id >= vocab_size):
+ logger.debug(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
+ continue
+
+ tokens[token_id] = key.encode("utf-8")
+ scores[token_id] = -1000.0
+ toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
+
+ tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
+ if tokenizer_config_file.is_file():
+ with open(tokenizer_config_file, "r", encoding="utf-8") as f:
+ tokenizer_config_json = json.load(f)
+ added_tokens_decoder = tokenizer_config_json.get("added_tokens_decoder", {})
+ for token_id, foken_data in added_tokens_decoder.items():
+ token_id = int(token_id)
+ token = foken_data["content"].encode("utf-8")
+ if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN:
+ assert tokens[token_id] == token
+ tokens[token_id] = token
+ scores[token_id] = -1000.0
+ toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
+ if foken_data.get("special"):
+ toktypes[token_id] = SentencePieceTokenTypes.CONTROL
+
+ tokenizer_file = self.dir_model / 'tokenizer.json'
+ if tokenizer_file.is_file():
+ with open(tokenizer_file, "r", encoding="utf-8") as f:
+ tokenizer_json = json.load(f)
+ added_tokens = tokenizer_json.get("added_tokens", [])
+ for foken_data in added_tokens:
+ token_id = int(foken_data["id"])
+ token = foken_data["content"].encode("utf-8")
+ if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN:
+ assert tokens[token_id] == token
+ tokens[token_id] = token
+ scores[token_id] = -1000.0
+ toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
+ if foken_data.get("special"):
+ toktypes[token_id] = SentencePieceTokenTypes.CONTROL
+
+ self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_scores(scores)
+ self.gguf_writer.add_token_types(toktypes)
+
+ special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def set_gguf_parameters(self):
+ block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
+
+ n_embd = self.find_hparam(["hidden_size", "n_embd"])
+ n_head = self.find_hparam(["num_attention_heads", "n_head"])
+ n_head_kv = self.find_hparam(["num_key_value_heads", "n_head_kv"])
+ rms_eps = self.find_hparam(["rms_norm_eps"])
+ max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
+ orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
+ rope_dims = n_embd // n_head
+
+ self.gguf_writer.add_name("Phi3")
+ self.gguf_writer.add_context_length(max_pos_embds)
+ self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds)
+ self.gguf_writer.add_embedding_length(n_embd)
+ self.gguf_writer.add_feed_forward_length(self.find_hparam(["intermediate_size"]))
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_head_count(n_head)
+ self.gguf_writer.add_head_count_kv(n_head_kv)
+ self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
+ self.gguf_writer.add_rope_dimension_count(rope_dims)
+ self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
+ self.gguf_writer.add_file_type(self.ftype)
+
+ # write rope scaling for long context (128k) model
+ rope_scaling = self.find_hparam(['rope_scaling'], True)
+ if (rope_scaling is None):
+ return
+
+ scale = max_pos_embds / orig_max_pos_embds
+
+ rope_scaling_type = rope_scaling.get('type', '').lower()
+ if len(rope_scaling_type) == 0:
+ raise KeyError('Missing the required key rope_scaling.type')
+
+ if rope_scaling_type == 'su' or rope_scaling_type == 'longrope':
+ attn_factor = math.sqrt(1 + math.log(scale) / math.log(orig_max_pos_embds)) if scale > 1.0 else 1.0
+ elif rope_scaling_type == 'yarn':
+ attn_factor = 0.1 * math.log(scale) + 1.0 if scale > 1.0 else 1.0
+ else:
+ raise NotImplementedError(f'The rope scaling type {rope_scaling_type} is not supported yet')
+
+ self.gguf_writer.add_rope_scaling_attn_factors(attn_factor)
+
+ long_factors = rope_scaling.get('long_factor', None)
+ short_factors = rope_scaling.get('short_factor', None)
+
+ if long_factors is None or short_factors is None:
+ raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
+
+ if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
+ raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
+
+ self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
+ self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))
+
+
+@Model.register("PlamoForCausalLM")
+class PlamoModel(Model):
+ model_arch = gguf.MODEL_ARCH.PLAMO
+
+ def set_vocab(self):
+ self._set_vocab_sentencepiece()
+
+ def set_gguf_parameters(self):
+ hparams = self.hparams
+ block_count = hparams["num_hidden_layers"]
+
+ self.gguf_writer.add_name("PLaMo")
+ self.gguf_writer.add_context_length(4096) # not in config.json
+ self.gguf_writer.add_embedding_length(hparams["hidden_size"])
+ self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_head_count(hparams["num_attention_heads"])
+ self.gguf_writer.add_head_count_kv(5) # hparams["num_key_value_heads"]) is wrong
+ self.gguf_writer.add_layer_norm_rms_eps(hparams["rms_norm_eps"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ def shuffle_attn_q_weight(self, data_torch):
+ assert data_torch.size() == (5120, 5120)
+ data_torch = data_torch.reshape(8, 5, 128, 5120)
+ data_torch = torch.permute(data_torch, (1, 0, 2, 3))
+ data_torch = torch.reshape(data_torch, (5120, 5120))
+ return data_torch
+
+ def shuffle_attn_output_weight(self, data_torch):
+ assert data_torch.size() == (5120, 5120)
+ data_torch = data_torch.reshape(5120, 8, 5, 128)
+ data_torch = torch.permute(data_torch, (0, 2, 1, 3))
+ data_torch = torch.reshape(data_torch, (5120, 5120))
+ return data_torch
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ new_name = self.map_tensor_name(name)
+
+ # shuffle for broadcasting of gqa in ggml_mul_mat
+ if new_name.endswith("attn_q.weight"):
+ data_torch = self.shuffle_attn_q_weight(data_torch)
+ elif new_name.endswith("attn_output.weight"):
+ data_torch = self.shuffle_attn_output_weight(data_torch)
+
+ return [(new_name, data_torch)]
+
+
+@Model.register("CodeShellForCausalLM")
+class CodeShellModel(Model):
+ model_arch = gguf.MODEL_ARCH.CODESHELL
+
+ def set_gguf_parameters(self):
+ block_count = self.hparams["n_layer"]
+
+ self.gguf_writer.add_name("CodeShell")
+ self.gguf_writer.add_context_length(self.hparams["n_positions"])
+ self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
+ self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_head_count(self.hparams["n_head"])
+ self.gguf_writer.add_head_count_kv(self.hparams["num_query_groups"])
+ self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_file_type(self.ftype)
+ self.gguf_writer.add_rope_freq_base(10000.0)
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
+ self.gguf_writer.add_rope_scaling_factor(1.0)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ new_name = self.map_tensor_name(name)
+
+ tensors: list[tuple[str, Tensor]] = [(new_name, data_torch)]
+
+ if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
+ assert self.tensor_names is not None
+
+ if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")):
+ # copy tok_embd.weight to output.weight
+ tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
+
+ return tensors
+
+
+@Model.register("InternLM2ForCausalLM")
+class InternLM2Model(Model):
+ model_arch = gguf.MODEL_ARCH.INTERNLM2
+
+ def set_vocab(self):
+ # (TODO): Is there a better way?
+ # Copy from _set_vocab_sentencepiece, The only difference is that we will treat the character
+ # \x00 specially and convert it into an emoji character to prevent it from being mistakenly
+ # recognized as an empty string in C++.
+ from sentencepiece import SentencePieceProcessor
+ from sentencepiece import sentencepiece_model_pb2 as model
+
+ tokenizer_path = self.dir_model / 'tokenizer.model'
+
+ tokens: list[bytes] = []
+ scores: list[float] = []
+ toktypes: list[int] = []
+
+ if not tokenizer_path.is_file():
+ logger.error(f'Error: Missing {tokenizer_path}')
+ sys.exit(1)
+
+ sentencepiece_model = model.ModelProto()
+ sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
+ add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
+
+ tokenizer = SentencePieceProcessor()
+ tokenizer.LoadFromFile(str(tokenizer_path))
+
+ vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
+
+ for token_id in range(vocab_size):
+ piece = tokenizer.IdToPiece(token_id)
+ text = piece.encode("utf-8")
+ score = tokenizer.GetScore(token_id)
+ if text == b"\x00":
+ # (TODO): fixme
+ # Hack here and replace the \x00 characters.
+ logger.warning(f"InternLM2 convert token '{text}' to '🐉'!")
+ text = "🐉".encode("utf-8")
+
+ toktype = SentencePieceTokenTypes.NORMAL
+ if tokenizer.IsUnknown(token_id):
+ toktype = SentencePieceTokenTypes.UNKNOWN
+ elif tokenizer.IsControl(token_id):
+ toktype = SentencePieceTokenTypes.CONTROL
+ elif tokenizer.IsUnused(token_id):
+ toktype = SentencePieceTokenTypes.UNUSED
+ elif tokenizer.IsByte(token_id):
+ toktype = SentencePieceTokenTypes.BYTE
+
+ tokens.append(text)
+ scores.append(score)
+ toktypes.append(toktype)
+
+ added_tokens_file = self.dir_model / 'added_tokens.json'
+ if added_tokens_file.is_file():
+ with open(added_tokens_file, "r", encoding="utf-8") as f:
+ added_tokens_json = json.load(f)
+
+ for key in added_tokens_json:
+ tokens.append(key.encode("utf-8"))
+ scores.append(-1000.0)
+ toktypes.append(SentencePieceTokenTypes.USER_DEFINED)
+
+ self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_scores(scores)
+ self.gguf_writer.add_token_types(toktypes)
+ self.gguf_writer.add_add_space_prefix(add_prefix)
+
+ special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
+ old_eos = special_vocab.special_token_ids["eos"]
+ if "chat" in os.path.basename(self.dir_model.absolute()):
+ # For the chat model, we replace the eos with '<|im_end|>'.
+ # TODO: this is a hack, should be fixed
+ # https://github.com/ggerganov/llama.cpp/pull/6745#issuecomment-2067687048
+ special_vocab.special_token_ids["eos"] = self._try_get_sft_eos(tokenizer)
+ logger.warning(f"Replace eos:{old_eos} with a special token:{special_vocab.special_token_ids['eos']} \
+in chat mode so that the conversation can end normally.")
+
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def _try_get_sft_eos(self, tokenizer):
+ unused_145_list = tokenizer.Encode('[UNUSED_TOKEN_145]')
+ im_end_list = tokenizer.Encode('<|im_end|>')
+ eos_token = None
+ assert (len(unused_145_list) == 1) ^ (len(im_end_list) == 1)
+ if len(unused_145_list) == 1:
+ eos_token = unused_145_list[0]
+ if len(im_end_list) == 1:
+ eos_token = im_end_list[0]
+ assert eos_token
+ return eos_token
+
+ def _hf_permute_qk(self, weights, n_head: int, n_head_kv: int):
+ if n_head_kv is not None and n_head != n_head_kv:
+ n_head = n_head_kv
+ return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
+ .swapaxes(1, 2)
+ .reshape(weights.shape))
+
+ def set_gguf_parameters(self):
+ self.gguf_writer.add_name("InternLM2")
+ self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
+ self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"])
+ self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
+ self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
+ self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
+ self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
+ self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
+ self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ num_heads = self.hparams["num_attention_heads"]
+ num_kv_heads = self.hparams["num_key_value_heads"]
+ hidden_size = self.hparams["hidden_size"]
+ q_per_kv = num_heads // num_kv_heads
+ head_dim = hidden_size // num_heads
+ num_groups = num_heads // q_per_kv
+
+ qkv_pattern = r"model\.layers\.(\d+)\.attention\.wqkv"
+
+ if re.match(qkv_pattern, name):
+ bid = re.findall(qkv_pattern, name)[0]
+ qkv = data_torch
+ # qkv = rearrange(qkv.T, " o (g n i) ->o g n i", g=num_groups, n=q_per_kv + 2, i=head_dim)
+ qkv = qkv.T.reshape((-1, num_groups, q_per_kv + 2, head_dim))
+ q, k, v = qkv[..., : q_per_kv, :], qkv[..., q_per_kv: q_per_kv + 1, :], qkv[..., q_per_kv + 1: q_per_kv + 2, :]
+ # The model weights of q and k equire additional reshape.
+ # q = self._hf_permute_qk(rearrange(q, " o g n i -> o (g n i)").T, num_heads, num_heads)
+ q = self._hf_permute_qk(q.reshape((q.shape[0], -1)).T, num_heads, num_heads)
+ # k = self._hf_permute_qk(rearrange(k, " o g n i -> o (g n i)").T, num_heads, num_kv_heads)
+ k = self._hf_permute_qk(k.reshape((k.shape[0], -1)).T, num_heads, num_kv_heads)
+ # v = rearrange(v, " o g n i -> o (g n i)").T
+ v = v.reshape((v.shape[0], -1)).T
+ return [
+ (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), q),
+ (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), k),
+ (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), v),
+ ]
+ else:
+ return [(self.map_tensor_name(name), data_torch)]
+
+
+@Model.register("BertModel", "CamembertModel")
+class BertModel(Model):
+ model_arch = gguf.MODEL_ARCH.BERT
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.vocab_size = None
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ self.gguf_writer.add_causal_attention(False)
+
+ # get pooling path
+ pooling_path = None
+ module_path = self.dir_model / "modules.json"
+ if module_path.is_file():
+ with open(module_path, encoding="utf-8") as f:
+ modules = json.load(f)
+ for mod in modules:
+ if mod["type"] == "sentence_transformers.models.Pooling":
+ pooling_path = mod["path"]
+ break
+
+ # get pooling type
+ if pooling_path is not None:
+ with open(self.dir_model / pooling_path / "config.json", encoding="utf-8") as f:
+ pooling = json.load(f)
+ if pooling["pooling_mode_mean_tokens"]:
+ pooling_type = gguf.PoolingType.MEAN
+ elif pooling["pooling_mode_cls_token"]:
+ pooling_type = gguf.PoolingType.CLS
+ else:
+ raise NotImplementedError("Only MEAN and CLS pooling types supported")
+ self.gguf_writer.add_pooling_type(pooling_type)
+
+ def set_vocab(self):
+ tokens, toktypes, tokpre = self.get_vocab_base()
+ self.vocab_size = len(tokens)
+
+ # we need this to validate the size of the token_type embeddings
+ # though currently we are passing all zeros to the token_type embeddings
+ self.gguf_writer.add_token_type_count(2) # "Sequence A" or "Sequence B"
+
+ # convert to phantom space vocab
+ def phantom(tok):
+ if tok.startswith("[") and tok.endswith("]"):
+ return tok
+ if tok.startswith("##"):
+ return tok[2:]
+ return "\u2581" + tok
+ tokens = list(map(phantom, tokens))
+
+ # add vocab to gguf
+ self.gguf_writer.add_tokenizer_model("bert")
+ self.gguf_writer.add_tokenizer_pre(tokpre)
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_types(toktypes)
+
+ # handle special tokens
+ special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ # we are only using BERT for embeddings so we don't need the pooling layer
+ if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias"):
+ return [] # we don't need these
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+
+@Model.register("NomicBertModel")
+class NomicBertModel(BertModel):
+ model_arch = gguf.MODEL_ARCH.NOMIC_BERT
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ # the HF config claims n_ctx=8192, but it uses RoPE scaling
+ self.hparams["n_ctx"] = 2048
+
+ # SwigLU activation
+ assert self.hparams["activation_function"] == "swiglu"
+ # this doesn't do anything in the HF version
+ assert self.hparams["causal"] is False
+ # no bias tensors
+ assert self.hparams["qkv_proj_bias"] is False
+ assert self.hparams["mlp_fc1_bias"] is False
+ assert self.hparams["mlp_fc2_bias"] is False
+ # norm at end of layer
+ assert self.hparams["prenorm"] is False
+ # standard RoPE
+ assert self.hparams["rotary_emb_fraction"] == 1.0
+ assert self.hparams["rotary_emb_interleaved"] is False
+ assert self.hparams["rotary_emb_scale_base"] is None
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
+
+
+@Model.register("GemmaForCausalLM")
+class GemmaModel(Model):
+ model_arch = gguf.MODEL_ARCH.GEMMA
+
+ def set_vocab(self):
+ self._set_vocab_sentencepiece()
+
+ # TODO: these special tokens should be exported only for the CodeGemma family
+ special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False,
+ special_token_types = ['prefix', 'suffix', 'middle', 'fsep', 'eot'])
+ special_vocab._set_special_token("prefix", 67)
+ special_vocab._set_special_token("suffix", 69)
+ special_vocab._set_special_token("middle", 68)
+ special_vocab._set_special_token("fsep", 70)
+ special_vocab._set_special_token("eot", 107)
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ self.gguf_writer.add_add_space_prefix(False)
+
+ def set_gguf_parameters(self):
+ hparams = self.hparams
+ block_count = hparams["num_hidden_layers"]
+
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
+ self.gguf_writer.add_embedding_length(hparams["hidden_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
+ self.gguf_writer.add_head_count(hparams["num_attention_heads"])
+ self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"])
+ self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
+ self.gguf_writer.add_key_length(hparams["head_dim"])
+ self.gguf_writer.add_value_length(hparams["head_dim"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ # lm_head is not used in llama.cpp, while autoawq will include this tensor in model
+ # To prevent errors, skip loading lm_head.weight.
+ if name == "lm_head.weight":
+ logger.debug(f"Skipping get tensor {name!r} in safetensors so that convert can end normally.")
+ return []
+
+ # ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
+ if name.endswith("norm.weight"):
+ data_torch = data_torch + 1
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+
+@Model.register("Gemma2ForCausalLM")
+class Gemma2Model(Model):
+ model_arch = gguf.MODEL_ARCH.GEMMA2
+
+ def set_vocab(self):
+ tokens, scores, toktypes = self._create_vocab_sentencepiece()
+ # hack: This is required so that we can properly use start/end-of-turn for chat template
+ for i in range(108):
+ # including <unusedX>, <start_of_turn>, <end_of_turn>
+ toktypes[i] = SentencePieceTokenTypes.CONTROL
+ self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_scores(scores)
+ self.gguf_writer.add_token_types(toktypes)
+
+ special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ self.gguf_writer.add_add_space_prefix(False)
+
+ def set_gguf_parameters(self):
+ hparams = self.hparams
+ block_count = hparams["num_hidden_layers"]
+
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
+ self.gguf_writer.add_embedding_length(hparams["hidden_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
+ self.gguf_writer.add_head_count(hparams["num_attention_heads"])
+ self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"])
+ self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
+ self.gguf_writer.add_key_length(hparams["head_dim"])
+ self.gguf_writer.add_value_length(hparams["head_dim"])
+ self.gguf_writer.add_file_type(self.ftype)
+ self.gguf_writer.add_attn_logit_softcapping(
+ self.hparams["attn_logit_softcapping"]
+ )
+ self.gguf_writer.add_final_logit_softcapping(
+ self.hparams["final_logit_softcapping"]
+ )
+ self.gguf_writer.add_sliding_window(self.hparams["sliding_window"])
+
+ # sanity check
+ attn_scalar = self.hparams["query_pre_attn_scalar"]
+ if attn_scalar != hparams["hidden_size"] / hparams["num_attention_heads"]:
+ raise ValueError("query_pre_attn_scalar must be equal to n_embd / n_head")
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unusem
+
+ # lm_head is not used in llama.cpp, while autoawq will include this tensor in model
+ # To prevent errors, skip loading lm_head.weight.
+ if name == "lm_head.weight":
+ logger.debug(f"Skipping get tensor {name!r} in safetensors so that convert can end normally.")
+ return []
+
+ # ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
+ if name.endswith("norm.weight"):
+ data_torch = data_torch + 1
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+
+@Model.register("Starcoder2ForCausalLM")
+class StarCoder2Model(Model):
+ model_arch = gguf.MODEL_ARCH.STARCODER2
+
+
+@Model.register("MambaForCausalLM", "MambaLMHeadModel")
+class MambaModel(Model):
+ model_arch = gguf.MODEL_ARCH.MAMBA
+
+ def set_vocab(self):
+ vocab_size = self.hparams["vocab_size"]
+ # Round vocab size to next multiple of 8
+ pad_vocab = self.hparams.get("pad_vocab_size_multiple", 8)
+ # pad using ceiling division
+ # ref: https://stackoverflow.com/a/17511341/22827863
+ vocab_size = -(vocab_size // -pad_vocab) * pad_vocab
+ self.hparams["vocab_size"] = vocab_size
+
+ if (self.dir_model / "tokenizer.json").is_file():
+ self._set_vocab_gpt2()
+ elif (self.dir_model / "tokenizer.model").is_file():
+ self._set_vocab_sentencepiece()
+ else:
+ # Use the GPT-NeoX tokenizer when no tokenizer files are present
+ tokenizer_path = Path(sys.path[0]) / "models" / "ggml-vocab-gpt-neox.gguf"
+ logger.warning(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'")
+ neox_reader = gguf.GGUFReader(tokenizer_path, "r")
+
+ field = neox_reader.get_field(gguf.Keys.Tokenizer.MODEL)
+ self.gguf_writer.add_tokenizer_model(bytes(field.parts[-1]).decode("utf-8") if field else "gpt2")
+
+ field = neox_reader.get_field(gguf.Keys.Tokenizer.PRE)
+ self.gguf_writer.add_tokenizer_pre(bytes(field.parts[-1]).decode("utf-8") if field else "mpt")
+
+ field = neox_reader.get_field(gguf.Keys.Tokenizer.LIST)
+ assert field
+ self.gguf_writer.add_token_list([bytes(field.parts[i]) for i in field.data][:vocab_size])
+
+ field = neox_reader.get_field(gguf.Keys.Tokenizer.TOKEN_TYPE)
+ assert field
+ self.gguf_writer.add_token_types([field.parts[i].tolist()[0] for i in field.data][:vocab_size])
+
+ field = neox_reader.get_field(gguf.Keys.Tokenizer.MERGES)
+ assert field
+ self.gguf_writer.add_token_merges([bytes(field.parts[i]) for i in field.data])
+
+ field = neox_reader.get_field(gguf.Keys.Tokenizer.BOS_ID)
+ self.gguf_writer.add_bos_token_id(field.parts[-1].tolist()[0] if field else 1)
+
+ field = neox_reader.get_field(gguf.Keys.Tokenizer.EOS_ID)
+ self.gguf_writer.add_eos_token_id(field.parts[-1].tolist()[0] if field else 0)
+
+ field = neox_reader.get_field(gguf.Keys.Tokenizer.UNK_ID)
+ self.gguf_writer.add_unk_token_id(field.parts[-1].tolist()[0] if field else 0)
+
+ field = neox_reader.get_field(gguf.Keys.Tokenizer.PAD_ID)
+ self.gguf_writer.add_pad_token_id(field.parts[-1].tolist()[0] if field else 0)
+
+ def set_gguf_parameters(self):
+ d_model = self.find_hparam(["hidden_size", "d_model"])
+ d_conv = self.find_hparam(["conv_kernel", "d_conv"], optional=True) or 4
+ d_inner = self.find_hparam(["intermediate_size", "d_inner"], optional=True) or 2 * d_model
+ d_state = self.find_hparam(["state_size", "d_state"], optional=True) or 16
+ # ceiling division
+ # ref: https://stackoverflow.com/a/17511341/22827863
+ # ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
+ dt_rank = self.find_hparam(["time_step_rank", "dt_rank"], optional=True) or -(d_model // -16)
+ rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5
+
+ # Fail early for models which don't have a block expansion factor of 2
+ assert d_inner == 2 * d_model
+
+ self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
+ self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
+ self.gguf_writer.add_embedding_length(d_model)
+ self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
+ self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
+ self.gguf_writer.add_block_count(self.hparams["n_layer"])
+ self.gguf_writer.add_ssm_conv_kernel(d_conv)
+ self.gguf_writer.add_ssm_inner_size(d_inner)
+ self.gguf_writer.add_ssm_state_size(d_state)
+ self.gguf_writer.add_ssm_time_step_rank(dt_rank)
+ self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
+ self.gguf_writer.add_file_type(self.ftype)
+
+ _tok_embd = None
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
+ tok_embd_name = self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD)
+
+ new_name = self.map_tensor_name(name)
+
+ if name.endswith(".A_log"):
+ logger.debug("A_log --> A ==> " + new_name)
+ data_torch = -torch.exp(data_torch)
+
+ # assuming token_embd.weight is seen before output.weight
+ if self._tok_embd is not None and new_name == output_name:
+ if torch.equal(self._tok_embd, data_torch):
+ logger.debug(f"{output_name} is equivalent to {tok_embd_name}, omitting")
+ return []
+ elif new_name == tok_embd_name:
+ self._tok_embd = data_torch
+
+ return [(new_name, data_torch)]
+
+ def extra_f32_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
+ del n_dims # unused
+
+ return bid is not None and new_name in (
+ self.format_tensor_name(n, bid, ".weight" if name.endswith(".weight") else "") for n in [
+ gguf.MODEL_TENSOR.SSM_CONV1D,
+ gguf.MODEL_TENSOR.SSM_X,
+ gguf.MODEL_TENSOR.SSM_DT,
+ gguf.MODEL_TENSOR.SSM_A,
+ gguf.MODEL_TENSOR.SSM_D,
+ ]
+ )
+
+
+@Model.register("CohereForCausalLM")
+class CommandR2Model(Model):
+ model_arch = gguf.MODEL_ARCH.COMMAND_R
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ # max_position_embeddings = 8192 in config.json but model was actually
+ # trained on 128k context length
+ # aya-23 models don't have model_max_length specified
+ self.hparams["max_position_embeddings"] = self.find_hparam(["model_max_length", "max_position_embeddings"])
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ self.gguf_writer.add_logit_scale(self.hparams["logit_scale"])
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
+
+
+@Model.register("OlmoForCausalLM")
+@Model.register("OLMoForCausalLM")
+class OlmoModel(Model):
+ model_arch = gguf.MODEL_ARCH.OLMO
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ self.gguf_writer.add_layer_norm_eps(1e-5)
+ clip_qkv = self.hparams.get("clip_qkv")
+ if clip_qkv is not None:
+ self.gguf_writer.add_clamp_kqv(clip_qkv)
+
+ # Same as super class, but permuting q_proj, k_proj
+ # Copied from: LlamaModel
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ n_head = self.hparams["num_attention_heads"]
+ n_kv_head = self.hparams.get("num_key_value_heads")
+
+ if name.endswith("q_proj.weight"):
+ data_torch = LlamaModel.permute(data_torch, n_head, n_head)
+ if name.endswith("k_proj.weight"):
+ data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+
+@Model.register("JinaBertModel", "JinaBertForMaskedLM")
+class JinaBertV2Model(BertModel):
+ model_arch = gguf.MODEL_ARCH.JINA_BERT_V2
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.intermediate_size = self.hparams["intermediate_size"]
+
+ def get_tensors(self):
+ for name, data in super().get_tensors():
+ if 'gated_layer' in name:
+ d1 = data[:self.intermediate_size, :]
+ name1 = name.replace('gated_layers', 'gated_layers_w')
+ name1 = name1.replace('up_gated_layer', 'gated_layers_v')
+ d2 = data[self.intermediate_size:, :]
+ name2 = name.replace('gated_layers', 'gated_layers_v')
+ name2 = name2.replace('up_gated_layer', 'gated_layers_w')
+ yield name1, d1
+ yield name2, d2
+ continue
+
+ yield name, data
+
+ def set_vocab(self, *args, **kwargs):
+ tokenizer_class = 'BertTokenizer'
+ with open(self.dir_model / "tokenizer_config.json", "r", encoding="utf-8") as f:
+ tokenizer_class = json.load(f)['tokenizer_class']
+
+ if tokenizer_class == 'BertTokenizer':
+ super().set_vocab()
+ elif tokenizer_class == 'RobertaTokenizer':
+ self._set_vocab_gpt2()
+ self.gguf_writer.add_token_type_count(2)
+ else:
+ raise NotImplementedError(f'Tokenizer {tokenizer_class} is not supported for JinaBertModel')
+ self.gguf_writer.add_add_bos_token(True)
+ self.gguf_writer.add_add_eos_token(True)
+
+
+@Model.register("ArcticForCausalLM")
+class ArcticModel(Model):
+ model_arch = gguf.MODEL_ARCH.ARCTIC
+
+ def set_vocab(self):
+ # The reason for using a custom implementation here is that the
+ # snowflake-arctic-instruct model redefined tokens 31998 and 31999 from
+ # tokenizer.model and used them as BOS and EOS instead of adding new tokens.
+ from sentencepiece import SentencePieceProcessor
+
+ tokenizer_path = self.dir_model / 'tokenizer.model'
+
+ if not tokenizer_path.is_file():
+ logger.error(f'Error: Missing {tokenizer_path}')
+ sys.exit(1)
+
+ # Read the whole vocabulary from the tokenizer.model file
+ tokenizer = SentencePieceProcessor()
+ tokenizer.LoadFromFile(str(tokenizer_path))
+
+ vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
+
+ tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
+ scores: list[float] = [-10000.0] * vocab_size
+ toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
+
+ for token_id in range(tokenizer.vocab_size()):
+
+ piece = tokenizer.IdToPiece(token_id)
+ text = piece.encode("utf-8")
+ score = tokenizer.GetScore(token_id)
+
+ toktype = SentencePieceTokenTypes.NORMAL
+ if tokenizer.IsUnknown(token_id):
+ toktype = SentencePieceTokenTypes.UNKNOWN
+ elif tokenizer.IsControl(token_id):
+ toktype = SentencePieceTokenTypes.CONTROL
+ elif tokenizer.IsUnused(token_id):
+ toktype = SentencePieceTokenTypes.UNUSED
+ elif tokenizer.IsByte(token_id):
+ toktype = SentencePieceTokenTypes.BYTE
+
+ tokens[token_id] = text
+ scores[token_id] = score
+ toktypes[token_id] = toktype
+
+ # Use the added_tokens_decoder field from tokeniser_config.json as the source
+ # of information about added/redefined tokens and modify them accordingly.
+ tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
+ if tokenizer_config_file.is_file():
+ with open(tokenizer_config_file, "r", encoding="utf-8") as f:
+ tokenizer_config_json = json.load(f)
+
+ if "added_tokens_decoder" in tokenizer_config_json:
+ added_tokens_decoder = tokenizer_config_json["added_tokens_decoder"]
+ for token_id, token_json in added_tokens_decoder.items():
+ token_id = int(token_id)
+ if (token_id >= vocab_size):
+ logger.debug(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
+ continue
+
+ token_content = token_json["content"]
+ token_type = SentencePieceTokenTypes.USER_DEFINED
+ token_score = -10000.0
+
+ # Map unk_token to UNKNOWN, other special tokens to CONTROL
+ # Set the score to 0.0 as in the original tokenizer.model
+ if ("special" in token_json) and token_json["special"]:
+ if token_content == tokenizer_config_json["unk_token"]:
+ token_type = SentencePieceTokenTypes.UNKNOWN
+ else:
+ token_type = SentencePieceTokenTypes.CONTROL
+ token_score = 0.0
+
+ logger.info(f"Setting added token {token_id} to '{token_content}' (type: {token_type}, score: {token_score:.2f})")
+ tokens[token_id] = token_content.encode("utf-8")
+ toktypes[token_id] = token_type
+ scores[token_id] = token_score
+
+ self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_scores(scores)
+ self.gguf_writer.add_token_types(toktypes)
+
+ special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ hparams = self.hparams
+ self.gguf_writer.add_vocab_size(hparams["vocab_size"])
+ self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
+
+ _experts: list[dict[str, Tensor]] | None = None
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ n_head = self.hparams["num_attention_heads"]
+ n_kv_head = self.hparams.get("num_key_value_heads")
+
+ if name.endswith("q_proj.weight"):
+ data_torch = LlamaModel.permute(data_torch, n_head, n_head)
+ if name.endswith("k_proj.weight"):
+ data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
+
+ # process the experts separately
+ if name.find("block_sparse_moe.experts") != -1:
+ n_experts = self.hparams["num_local_experts"]
+
+ assert bid is not None
+
+ if self._experts is None:
+ self._experts = [{} for _ in range(self.block_count)]
+
+ self._experts[bid][name] = data_torch
+
+ if len(self._experts[bid]) >= n_experts * 3:
+ tensors: list[tuple[str, Tensor]] = []
+
+ # merge the experts into a single 3d tensor
+ for wid in ["w1", "w2", "w3"]:
+ datas: list[Tensor] = []
+
+ for xid in range(n_experts):
+ ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.{wid}.weight"
+ datas.append(self._experts[bid][ename])
+ del self._experts[bid][ename]
+
+ data_torch = torch.stack(datas, dim=0)
+
+ merged_name = f"layers.{bid}.feed_forward.experts.{wid}.weight"
+
+ new_name = self.map_tensor_name(merged_name)
+
+ tensors.append((new_name, data_torch))
+ return tensors
+ else:
+ return []
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+ def write_tensors(self):
+ super().write_tensors()
+
+ if self._experts is not None:
+ # flatten `list[dict[str, Tensor]]` into `list[str]`
+ experts = [k for d in self._experts for k in d.keys()]
+ if len(experts) > 0:
+ raise ValueError(f"Unprocessed experts: {experts}")
+
+
+@Model.register("DeepseekV2ForCausalLM")
+class DeepseekV2Model(Model):
+ model_arch = gguf.MODEL_ARCH.DEEPSEEK2
+
+ def set_vocab(self):
+ self._set_vocab_gpt2()
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ hparams = self.hparams
+
+ self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
+ self.gguf_writer.add_vocab_size(hparams["vocab_size"])
+ if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None:
+ self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"])
+ self.gguf_writer.add_kv_lora_rank(hparams["kv_lora_rank"])
+ self.gguf_writer.add_key_length(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
+ self.gguf_writer.add_value_length(hparams["v_head_dim"])
+ self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])
+ self.gguf_writer.add_expert_count(hparams["n_routed_experts"])
+ self.gguf_writer.add_expert_shared_count(hparams["n_shared_experts"])
+ self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
+ self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
+
+ if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
+ if self.hparams["rope_scaling"].get("type") == "yarn":
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
+ self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
+ self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
+ self.gguf_writer.add_rope_scaling_yarn_log_mul(0.1 * hparams["rope_scaling"]["mscale_all_dim"])
+
+ _experts: list[dict[str, Tensor]] | None = None
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ # process the experts separately
+ if name.find("mlp.experts") != -1:
+ n_experts = self.hparams["n_routed_experts"]
+ assert bid is not None
+
+ if self._experts is None:
+ self._experts = [{} for _ in range(self.block_count)]
+
+ self._experts[bid][name] = data_torch
+
+ if len(self._experts[bid]) >= n_experts * 3:
+ tensors: list[tuple[str, Tensor]] = []
+
+ # merge the experts into a single 3d tensor
+ for w_name in ["down_proj", "gate_proj", "up_proj"]:
+ datas: list[Tensor] = []
+
+ for xid in range(n_experts):
+ ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
+ datas.append(self._experts[bid][ename])
+ del self._experts[bid][ename]
+
+ data_torch = torch.stack(datas, dim=0)
+
+ merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
+
+ new_name = self.map_tensor_name(merged_name)
+
+ tensors.append((new_name, data_torch))
+ return tensors
+ else:
+ return []
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+ def write_tensors(self):
+ super().write_tensors()
+
+ if self._experts is not None:
+ # flatten `list[dict[str, Tensor]]` into `list[str]`
+ experts = [k for d in self._experts for k in d.keys()]
+ if len(experts) > 0:
+ raise ValueError(f"Unprocessed experts: {experts}")
+
+
+@Model.register("T5WithLMHeadModel")
+@Model.register("T5ForConditionalGeneration")
+@Model.register("MT5ForConditionalGeneration")
+@Model.register("UMT5ForConditionalGeneration")
+class T5Model(Model):
+ model_arch = gguf.MODEL_ARCH.T5
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.shared_token_embeddings_found = False
+
+ def set_vocab(self):
+ # to avoid TypeError: Descriptors cannot be created directly
+ # exception when importing sentencepiece_model_pb2
+ os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
+ from sentencepiece import SentencePieceProcessor
+ from sentencepiece import sentencepiece_model_pb2 as model
+
+ tokenizer_path = self.dir_model / 'tokenizer.model'
+
+ # many older models use spiece.model tokenizer model filename
+ if not tokenizer_path.is_file():
+ tokenizer_path = self.dir_model / 'spiece.model'
+
+ if not tokenizer_path.is_file():
+ raise FileNotFoundError(f"File not found: {tokenizer_path}")
+
+ sentencepiece_model = model.ModelProto()
+ sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
+
+ # some models like Pile-T5 family use BPE tokenizer instead of Unigram
+ if sentencepiece_model.trainer_spec.model_type == 2: # BPE
+ # assure the tokenizer model file name is correct
+ assert tokenizer_path.name == 'tokenizer.model'
+ return self._set_vocab_sentencepiece()
+ else:
+ assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
+
+ add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
+ remove_whitespaces = sentencepiece_model.normalizer_spec.remove_extra_whitespaces
+ precompiled_charsmap = sentencepiece_model.normalizer_spec.precompiled_charsmap
+
+ tokenizer = SentencePieceProcessor()
+ tokenizer.LoadFromFile(str(tokenizer_path))
+
+ vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
+
+ tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
+ scores: list[float] = [-10000.0] * vocab_size
+ toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
+
+ for token_id in range(tokenizer.vocab_size()):
+ piece = tokenizer.IdToPiece(token_id)
+ text = piece.encode("utf-8")
+ score = tokenizer.GetScore(token_id)
+
+ toktype = SentencePieceTokenTypes.NORMAL
+ if tokenizer.IsUnknown(token_id):
+ toktype = SentencePieceTokenTypes.UNKNOWN
+ elif tokenizer.IsControl(token_id):
+ toktype = SentencePieceTokenTypes.CONTROL
+ elif tokenizer.IsUnused(token_id):
+ toktype = SentencePieceTokenTypes.UNUSED
+ elif tokenizer.IsByte(token_id):
+ toktype = SentencePieceTokenTypes.BYTE
+
+ tokens[token_id] = text
+ scores[token_id] = score
+ toktypes[token_id] = toktype
+
+ added_tokens_file = self.dir_model / 'added_tokens.json'
+ if added_tokens_file.is_file():
+ with open(added_tokens_file, "r", encoding="utf-8") as f:
+ added_tokens_json = json.load(f)
+ for key in added_tokens_json:
+ token_id = added_tokens_json[key]
+ if (token_id >= vocab_size):
+ logger.warning(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
+ continue
+
+ tokens[token_id] = key.encode("utf-8")
+ scores[token_id] = -1000.0
+ toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
+
+ if vocab_size > len(tokens):
+ pad_count = vocab_size - len(tokens)
+ logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
+ for i in range(1, pad_count + 1):
+ tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
+ scores.append(-1000.0)
+ toktypes.append(SentencePieceTokenTypes.UNUSED)
+
+ self.gguf_writer.add_tokenizer_model("t5")
+ self.gguf_writer.add_tokenizer_pre("default")
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_scores(scores)
+ self.gguf_writer.add_token_types(toktypes)
+ self.gguf_writer.add_add_space_prefix(add_prefix)
+ self.gguf_writer.add_remove_extra_whitespaces(remove_whitespaces)
+ if precompiled_charsmap:
+ self.gguf_writer.add_precompiled_charsmap(precompiled_charsmap)
+
+ special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ self.gguf_writer.add_add_bos_token(False)
+ self.gguf_writer.add_add_eos_token(True)
+
+ def set_gguf_parameters(self):
+ self.gguf_writer.add_name("T5")
+ if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None:
+ logger.warning("Couldn't find context length in config.json, assuming default value of 512")
+ n_ctx = 512
+ self.gguf_writer.add_context_length(n_ctx)
+ self.gguf_writer.add_embedding_length(self.hparams["d_model"])
+ self.gguf_writer.add_feed_forward_length(self.hparams["d_ff"])
+ self.gguf_writer.add_block_count(self.hparams["num_layers"])
+ self.gguf_writer.add_head_count(self.hparams["num_heads"])
+ self.gguf_writer.add_key_length(self.hparams["d_kv"])
+ self.gguf_writer.add_value_length(self.hparams["d_kv"])
+ self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_relative_attn_buckets_count(self.hparams["relative_attention_num_buckets"])
+ self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_decoder_start_token_id(self.hparams["decoder_start_token_id"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ # T5 based models contain shared token embeddings tensors saved randomly as either "encoder.embed_tokens.weight",
+ # "decoder.embed_tokens.weight" or "shared.weight" tensor. In some models there are even multiple of them stored
+ # in the safetensors files. We use the first tensor from these three as the token embeddings for both encoder
+ # and decoder and ignore the remaining ones.
+ if name in ["decoder.embed_tokens.weight", "encoder.embed_tokens.weight", "shared.weight"]:
+ if not self.shared_token_embeddings_found:
+ name = "shared.weight"
+ self.shared_token_embeddings_found = True
+ else:
+ logger.debug(f"Skipping shared tensor {name!r} in safetensors so that convert can end normally.")
+ return []
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+
+@Model.register("JAISLMHeadModel")
+class JaisModel(Model):
+ model_arch = gguf.MODEL_ARCH.JAIS
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ # SwigLU activation
+ assert self.hparams["activation_function"] == "swiglu"
+ # ALiBi position embedding
+ assert self.hparams["position_embedding_type"] == "alibi"
+
+ # Embeddings scale
+ self.embeddings_scale = 1.0
+ # note: For some JAIS flavors, output is tied to (same as) wte in original model
+ self.output_is_wte = False
+ if 'mup_embeddings_scale' in self.hparams:
+ self.output_is_wte = True # Hack (?)
+ self.embeddings_scale = self.hparams['mup_embeddings_scale']
+ elif 'embeddings_scale' in self.hparams:
+ self.embeddings_scale = self.hparams['embeddings_scale']
+ else:
+ assert False
+
+ self.width_scale = 1.0
+ if 'mup_output_alpha' in self.hparams:
+ assert 'mup_width_scale' in self.hparams
+ self.width_scale = self.hparams['mup_output_alpha'] * self.hparams['mup_width_scale']
+ elif 'width_scale' in self.hparams:
+ self.width_scale = self.hparams['width_scale']
+ else:
+ assert False
+
+ self.max_alibi_bias = 8.0
+
+ def set_vocab(self):
+ self._set_vocab_gpt2()
+
+ def set_gguf_parameters(self):
+ self.gguf_writer.add_name(self.dir_model.name)
+ self.gguf_writer.add_block_count(self.hparams["n_layer"])
+ self.gguf_writer.add_context_length(self.hparams["n_positions"])
+ self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
+ self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
+ self.gguf_writer.add_head_count(self.hparams["n_head"])
+ self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_file_type(self.ftype)
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ tensors: list[tuple[str, Tensor]] = []
+
+ # we don't need these
+ if name.endswith((".attn.bias")):
+ return tensors
+
+ if name.endswith(("relative_pe.slopes")):
+ # Calculate max ALiBi bias (this is the inverse of the ALiBi calculation)
+ # Some other models has max_alibi_bias spelled out explicitly in the hyperparams,
+ # but Jais's PyTorch model simply precalculates the slope values and places them
+ # in relative_pes.slopes
+ n_head_closest_log2 = 2 ** math.floor(math.log2(self.hparams["n_head"]))
+ first_val = float(data_torch._data[0])
+ self.max_alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)
+
+ return tensors
+
+ if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
+ data_torch = data_torch.transpose(1, 0)
+
+ new_name = self.map_tensor_name(name)
+
+ if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
+ tensors.append((new_name, data_torch * self.embeddings_scale))
+ if self.output_is_wte:
+ tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
+ elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
+ assert not self.output_is_wte
+ tensors.append((new_name, data_torch * self.width_scale))
+ else:
+ tensors.append((new_name, data_torch))
+
+ return tensors
+
+ def write_tensors(self):
+ super().write_tensors()
+ self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
+
+
+###### CONVERSION LOGIC ######
+
+
+# tree of lazy tensors
+class LazyTorchTensor(gguf.LazyBase):
+ _tensor_type = torch.Tensor
+ # to keep the type-checker happy
+ dtype: torch.dtype
+ shape: torch.Size
+
+ # only used when converting a torch.Tensor to a np.ndarray
+ _dtype_map: dict[torch.dtype, type] = {
+ torch.float16: np.float16,
+ torch.float32: np.float32,
+ }
+
+ def numpy(self) -> gguf.LazyNumpyTensor:
+ dtype = self._dtype_map[self.dtype]
+ return gguf.LazyNumpyTensor(
+ meta=gguf.LazyNumpyTensor.meta_with_dtype_and_shape(dtype, self.shape),
+ lazy=self._lazy,
+ args=(self,),
+ func=(lambda s: s[0].numpy())
+ )
+
+ @classmethod
+ def meta_with_dtype_and_shape(cls, dtype: torch.dtype, shape: torch.Size) -> Tensor:
+ return torch.empty(size=shape, dtype=dtype, device="meta")
+
+ @classmethod
+ def __torch_function__(cls, func, types, args=(), kwargs=None):
+ del types # unused
+
+ if kwargs is None:
+ kwargs = {}
+
+ if func is torch.Tensor.numpy:
+ return args[0].numpy()
+
+ return LazyTorchTensor._wrap_fn(func)(*args, **kwargs)
+
+
+def parse_args() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(
+ description="Convert a huggingface model to a GGML compatible file")
+ parser.add_argument(
+ "--vocab-only", action="store_true",
+ help="extract only the vocab",
+ )
+ parser.add_argument(
+ "--awq-path", type=Path, default=None,
+ help="Path to scale awq cache file",
+ )
+ parser.add_argument(
+ "--outfile", type=Path,
+ help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
+ )
+ parser.add_argument(
+ "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f16",
+ help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
+ )
+ parser.add_argument(
+ "--bigendian", action="store_true",
+ help="model is executed on big endian machine",
+ )
+ parser.add_argument(
+ "model", type=Path,
+ help="directory containing model file",
+ )
+ parser.add_argument(
+ "--use-temp-file", action="store_true",
+ help="use the tempfile library while processing (helpful when running out of memory, process killed)",
+ )
+ parser.add_argument(
+ "--no-lazy", action="store_true",
+ help="use more RAM by computing all outputs before writing (use in case lazy evaluation is broken)",
+ )
+ parser.add_argument(
+ "--model-name", type=str, default=None,
+ help="name of the model",
+ )
+ parser.add_argument(
+ "--verbose", action="store_true",
+ help="increase output verbosity",
+ )
+ parser.add_argument(
+ "--split-max-tensors", type=int, default=0,
+ help="max tensors in each split",
+ )
+ parser.add_argument(
+ "--split-max-size", type=str, default="0",
+ help="max size per split N(M|G)",
+ )
+ parser.add_argument(
+ "--dry-run", action="store_true",
+ help="only print out a split plan and exit, without writing any new files",
+ )
+ parser.add_argument(
+ "--no-tensor-first-split", action="store_true",
+ help="do not add tensors to the first split (disabled by default)"
+ )
+
+ return parser.parse_args()
+
+
+def split_str_to_n_bytes(split_str: str) -> int:
+ if split_str.endswith("K"):
+ n = int(split_str[:-1]) * 1000
+ elif split_str.endswith("M"):
+ n = int(split_str[:-1]) * 1000 * 1000
+ elif split_str.endswith("G"):
+ n = int(split_str[:-1]) * 1000 * 1000 * 1000
+ elif split_str.isnumeric():
+ n = int(split_str)
+ else:
+ raise ValueError(f"Invalid split size: {split_str}, must be a number, optionally followed by K, M, or G")
+
+ if n < 0:
+ raise ValueError(f"Invalid split size: {split_str}, must be positive")
+
+ return n
+
+
+def main() -> None:
+ args = parse_args()
+
+ logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+
+ dir_model = args.model
+
+ if args.awq_path:
+ sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
+ from awq.apply_awq import add_scale_weights # type: ignore[import-not-found]
+ tmp_model_path = args.model / "weighted_model"
+ dir_model = tmp_model_path
+ if tmp_model_path.is_dir():
+ logger.info(f"{tmp_model_path} exists as a weighted model.")
+ else:
+ tmp_model_path.mkdir(parents=True, exist_ok=True)
+ logger.info("Saving new weighted model ...")
+ add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
+ logger.info(f"Saved weighted model at {tmp_model_path}.")
+
+ if not dir_model.is_dir():
+ logger.error(f'Error: {args.model} is not a directory')
+ sys.exit(1)
+
+ ftype_map: dict[str, gguf.LlamaFileType] = {
+ "f32": gguf.LlamaFileType.ALL_F32,
+ "f16": gguf.LlamaFileType.MOSTLY_F16,
+ "bf16": gguf.LlamaFileType.MOSTLY_BF16,
+ "q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
+ "auto": gguf.LlamaFileType.GUESSED,
+ }
+
+ is_split = args.split_max_tensors > 0 or args.split_max_size != "0"
+ if args.use_temp_file and is_split:
+ logger.error("Error: Cannot use temp file when splitting")
+ sys.exit(1)
+
+ if args.outfile is not None:
+ fname_out = args.outfile
+ else:
+ # output in the same directory as the model by default
+ fname_out = dir_model / 'ggml-model-{ftype}.gguf'
+
+ logger.info(f"Loading model: {dir_model.name}")
+
+ hparams = Model.load_hparams(dir_model)
+
+ with torch.inference_mode():
+ try:
+ model_class = Model.from_model_architecture(hparams["architectures"][0])
+ except NotImplementedError:
+ logger.error(f"Model {hparams['architectures'][0]} is not supported")
+ sys.exit(1)
+
+ model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file,
+ args.no_lazy, args.model_name, split_max_tensors=args.split_max_tensors,
+ split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
+ small_first_shard=args.no_tensor_first_split)
+
+ logger.info("Set model parameters")
+ model_instance.set_gguf_parameters()
+
+ logger.info("Set model tokenizer")
+ model_instance.set_vocab()
+
+ model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
+
+ if args.vocab_only:
+ logger.info("Exporting model vocab...")
+ model_instance.write_vocab()
+ logger.info(f"Model vocab successfully exported to {model_instance.fname_out}")
+ else:
+ logger.info("Exporting model...")
+ model_instance.write()
+ out_path = f"{model_instance.fname_out.parent}{os.sep}" if is_split else model_instance.fname_out
+ logger.info(f"Model successfully exported to {out_path}")
+
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# This script downloads the tokenizer models of the specified models from Huggingface and
+# generates the get_vocab_base_pre() function for convert-hf-to-gguf.py
+#
+# This is necessary in order to analyze the type of pre-tokenizer used by the model and
+# provide the necessary information to llama.cpp via the GGUF header in order to implement
+# the same pre-tokenizer.
+#
+# ref: https://github.com/ggerganov/llama.cpp/pull/6920
+#
+# Instructions:
+#
+# - Add a new model to the "models" list
+# - Run the script with your huggingface token:
+#
+# python3 convert-hf-to-gguf-update.py <huggingface_token>
+#
+# - Copy-paste the generated get_vocab_base_pre() function into convert-hf-to-gguf.py
+# - Update llama.cpp with the new pre-tokenizer if necessary
+#
+# TODO: generate tokenizer tests for llama.cpp
+#
+
+import logging
+import os
+import pathlib
+import re
+
+import requests
+import sys
+import json
+
+from hashlib import sha256
+from enum import IntEnum, auto
+from transformers import AutoTokenizer
+
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger("convert-hf-to-gguf-update")
+sess = requests.Session()
+
+
+class TOKENIZER_TYPE(IntEnum):
+ SPM = auto()
+ BPE = auto()
+ WPM = auto()
+ UGM = auto()
+
+
+# TODO: this string has to exercise as much pre-tokenizer functionality as possible
+# will be updated with time - contributions welcome
+chktxt = "\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````\"\"\"\"......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL"
+
+if len(sys.argv) == 2:
+ token = sys.argv[1]
+ if not token.startswith("hf_"):
+ logger.info("Huggingface token seems invalid")
+ logger.info("Usage: python convert-hf-to-gguf-update.py <huggingface_token>")
+ sys.exit(1)
+else:
+ logger.info("Usage: python convert-hf-to-gguf-update.py <huggingface_token>")
+ sys.exit(1)
+
+# TODO: add models here, base models preferred
+models = [
+ {"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
+ {"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
+ {"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
+ {"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
+ {"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
+ {"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
+ {"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
+ {"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
+ {"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
+ {"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
+ {"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
+ {"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
+ {"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
+ {"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
+ {"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
+ {"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
+ {"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
+ {"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
+ {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
+ {"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
+ {"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
+ {"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
+ {"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
+ {"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
+ {"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
+ {"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
+ {"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
+]
+
+
+def download_file_with_auth(url, token, save_path):
+ headers = {"Authorization": f"Bearer {token}"}
+ response = sess.get(url, headers=headers)
+ response.raise_for_status()
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
+ with open(save_path, "wb") as f:
+ f.write(response.content)
+ logger.info(f"File {save_path} downloaded successfully")
+
+
+def download_model(model):
+ name = model["name"]
+ repo = model["repo"]
+ tokt = model["tokt"]
+
+ os.makedirs(f"models/tokenizers/{name}", exist_ok=True)
+
+ files = ["config.json", "tokenizer.json", "tokenizer_config.json"]
+
+ if tokt == TOKENIZER_TYPE.SPM:
+ files.append("tokenizer.model")
+
+ if tokt == TOKENIZER_TYPE.UGM:
+ files.append("spiece.model")
+
+ for file in files:
+ save_path = f"models/tokenizers/{name}/{file}"
+ if os.path.isfile(save_path):
+ logger.info(f"{name}: File {save_path} already exists - skipping")
+ continue
+ download_file_with_auth(f"{repo}/resolve/main/{file}", token, save_path)
+
+
+for model in models:
+ try:
+ download_model(model)
+ except Exception as e:
+ logger.error(f"Failed to download model {model['name']}. Error: {e}")
+
+
+# generate the source code for the convert-hf-to-gguf.py:get_vocab_base_pre() function:
+
+src_ifs = ""
+for model in models:
+ name = model["name"]
+ tokt = model["tokt"]
+
+ if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM:
+ continue
+
+ # Skip if the tokenizer folder does not exist or there are other download issues previously
+ if not os.path.exists(f"models/tokenizers/{name}"):
+ logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
+ continue
+
+ # create the tokenizer
+ try:
+ if name == "t5":
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
+ else:
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
+ except OSError as e:
+ logger.error(
+ f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}"
+ )
+ continue # Skip to the next model if the tokenizer can't be loaded
+
+ chktok = tokenizer.encode(chktxt)
+ chkhsh = sha256(str(chktok).encode()).hexdigest()
+
+ logger.info(f"model: {name}")
+ logger.info(f"tokt: {tokt}")
+ logger.info(f"repo: {model['repo']}")
+ logger.info(f"chktok: {chktok}")
+ logger.info(f"chkhsh: {chkhsh}")
+
+ # print the "pre_tokenizer" content from the tokenizer.json
+ with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
+ cfg = json.load(f)
+ normalizer = cfg["normalizer"]
+ logger.info("normalizer: " + json.dumps(normalizer, indent=4))
+ pre_tokenizer = cfg["pre_tokenizer"]
+ logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
+ if "ignore_merges" in cfg["model"]:
+ logger.info(
+ "ignore_merges: " + json.dumps(cfg["model"]["ignore_merges"], indent=4)
+ )
+
+ logger.info("")
+
+ src_ifs += f' if chkhsh == "{chkhsh}":\n'
+ src_ifs += f" # ref: {model['repo']}\n"
+ src_ifs += f' res = "{name}"\n'
+
+src_func = f"""
+ def get_vocab_base_pre(self, tokenizer) -> str:
+ # encoding this string and hashing the resulting tokens would (hopefully) give us a unique identifier that
+ # is specific for the BPE pre-tokenizer used by the model
+ # we will use this unique identifier to write a "tokenizer.ggml.pre" entry in the GGUF file which we can
+ # use in llama.cpp to implement the same pre-tokenizer
+
+ chktxt = {repr(chktxt)}
+
+ chktok = tokenizer.encode(chktxt)
+ chkhsh = sha256(str(chktok).encode()).hexdigest()
+
+ logger.debug(f"chktok: {{chktok}}")
+ logger.debug(f"chkhsh: {{chkhsh}}")
+
+ res = None
+
+ # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
+ # or pull the latest version of the model from Huggingface
+ # don't edit the hashes manually!
+{src_ifs}
+ if res is None:
+ logger.warning("\\n")
+ logger.warning("**************************************************************************************")
+ logger.warning("** WARNING: The BPE pre-tokenizer was not recognized!")
+ logger.warning("** There are 2 possible reasons for this:")
+ logger.warning("** - the model has not been added to convert-hf-to-gguf-update.py yet")
+ logger.warning("** - the pre-tokenization config has changed upstream")
+ logger.warning("** Check your model files and convert-hf-to-gguf-update.py and update them accordingly.")
+ logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
+ logger.warning("**")
+ logger.warning(f"** chkhsh: {{chkhsh}}")
+ logger.warning("**************************************************************************************")
+ logger.warning("\\n")
+ raise NotImplementedError("BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
+
+ logger.debug(f"tokenizer.ggml.pre: {{repr(res)}}")
+ logger.debug(f"chkhsh: {{chkhsh}}")
+
+ return res
+"""
+
+convert_py_pth = pathlib.Path("convert-hf-to-gguf.py")
+convert_py = convert_py_pth.read_text(encoding="utf-8")
+convert_py = re.sub(
+ r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
+ lambda m: m.group(1) + src_func + m.group(3),
+ convert_py,
+ flags=re.DOTALL | re.MULTILINE,
+)
+
+convert_py_pth.write_text(convert_py, encoding="utf-8")
+
+logger.info("+++ convert-hf-to-gguf.py was updated")
+
+# generate tests for each tokenizer model
+
+tests = [
+ "ied 4 ½ months",
+ "Führer",
+ "",
+ " ",
+ " ",
+ " ",
+ "\t",
+ "\n",
+ "\n\n",
+ "\n\n\n",
+ "\t\n",
+ "Hello world",
+ " Hello world",
+ "Hello World",
+ " Hello World",
+ " Hello World!",
+ "Hello, world!",
+ " Hello, world!",
+ " this is 🦙.cpp",
+ "w048 7tuijk dsdfhu",
+ "нещо на Български",
+ "កាន់តែពិសេសអាចខលចេញ",
+ "🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
+ "Hello",
+ " Hello",
+ " Hello",
+ " Hello",
+ " Hello",
+ " Hello\n Hello",
+ " (",
+ "\n =",
+ "' era",
+ "Hello, y'all! How are you 😁 ?我想在apple工作1314151天~",
+ "!!!!!!",
+ "3",
+ "33",
+ "333",
+ "3333",
+ "33333",
+ "333333",
+ "3333333",
+ "33333333",
+ "333333333",
+ "Cửa Việt", # llama-bpe fails on this
+ " discards",
+ chktxt,
+]
+
+# write the tests to ./models/ggml-vocab-{name}.gguf.inp
+# the format is:
+#
+# test0
+# __ggml_vocab_test__
+# test1
+# __ggml_vocab_test__
+# ...
+#
+
+# with each model, encode all tests and write the results in ./models/ggml-vocab-{name}.gguf.out
+# for each test, write the resulting tokens on a separate line
+
+for model in models:
+ name = model["name"]
+ tokt = model["tokt"]
+
+ # Skip if the tokenizer folder does not exist or there are other download issues previously
+ if not os.path.exists(f"models/tokenizers/{name}"):
+ logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
+ continue
+
+ # create the tokenizer
+ try:
+ if name == "t5":
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
+ else:
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
+ except OSError as e:
+ logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
+ continue # Skip this model and continue with the next one in the loop
+
+ with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
+ for text in tests:
+ f.write(f"{text}")
+ f.write("\n__ggml_vocab_test__\n")
+
+ with open(f"models/ggml-vocab-{name}.gguf.out", "w") as f:
+ for text in tests:
+ res = tokenizer.encode(text, add_special_tokens=False)
+ for r in res:
+ f.write(f" {r}")
+ f.write("\n")
+
+ logger.info(f"Tests for {name} written in ./models/ggml-vocab-{name}.gguf.*")
+
+# generate commands for creating vocab files
+
+logger.info("\nRun the following commands to generate the vocab files for testing:\n")
+
+for model in models:
+ name = model["name"]
+
+ print(
+ f"python3 convert-hf-to-gguf.py models/tokenizers/{name}/ --outfile models/ggml-vocab-{name}.gguf --vocab-only"
+ ) # noqa: NP100
+
+logger.info("\n")
--- /dev/null
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import logging
+import argparse
+import os
+import struct
+import sys
+from enum import IntEnum
+from pathlib import Path
+
+import numpy as np
+
+if 'NO_LOCAL_GGUF' not in os.environ:
+ sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
+import gguf
+
+logger = logging.getLogger("ggml-to-gguf")
+
+
+class GGMLFormat(IntEnum):
+ GGML = 0
+ GGMF = 1
+ GGJT = 2
+
+
+class GGMLFType(IntEnum):
+ ALL_F32 = 0
+ MOSTLY_F16 = 1
+ MOSTLY_Q4_0 = 2
+ MOSTLY_Q4_1 = 3
+ MOSTLY_Q4_1_SOME_F16 = 4
+ MOSTLY_Q8_0 = 7
+ MOSTLY_Q5_0 = 8
+ MOSTLY_Q5_1 = 9
+ MOSTLY_Q2_K = 10
+ MOSTLY_Q3_K_S = 11
+ MOSTLY_Q3_K_M = 12
+ MOSTLY_Q3_K_L = 13
+ MOSTLY_Q4_K_S = 14
+ MOSTLY_Q4_K_M = 15
+ MOSTLY_Q5_K_S = 16
+ MOSTLY_Q5_K_M = 17
+ MOSTLY_Q6_K = 18
+
+
+class Hyperparameters:
+ def __init__(self):
+ self.n_vocab = self.n_embd = self.n_mult = self.n_head = 0
+ self.n_layer = self.n_rot = self.n_ff = 0
+ self.ftype = GGMLFType.ALL_F32
+
+ def set_n_ff(self, model):
+ ff_tensor_idx = model.tensor_map.get(b'layers.0.feed_forward.w1.weight')
+ assert ff_tensor_idx is not None, 'Missing layer 0 FF tensor'
+ ff_tensor = model.tensors[ff_tensor_idx]
+ self.n_ff = ff_tensor.dims[1]
+
+ def load(self, data, offset):
+ (
+ self.n_vocab,
+ self.n_embd,
+ self.n_mult,
+ self.n_head,
+ self.n_layer,
+ self.n_rot,
+ ftype,
+ ) = struct.unpack('<7I', data[offset:offset + (4 * 7)])
+ try:
+ self.ftype = GGMLFType(ftype)
+ except ValueError:
+ raise ValueError(f'Invalid ftype {ftype}')
+ return 4 * 7
+
+ def __str__(self):
+ return f'<Hyperparameters: n_vocab={self.n_vocab}, n_embd={self.n_embd}, n_mult={self.n_mult}, n_head={self.n_head}, n_layer={self.n_layer}, n_rot={self.n_rot}, n_ff={self.n_ff}, ftype={self.ftype.name}>'
+
+
+class Vocab:
+ def __init__(self, load_scores = True):
+ self.items = []
+ self.load_scores = load_scores
+
+ def load(self, data, offset, n_vocab):
+ orig_offset = offset
+ for _ in range(n_vocab):
+ itemlen = struct.unpack('<I', data[offset:offset + 4])[0]
+ assert itemlen < 4096, 'Absurd vocab item length'
+ offset += 4
+ item_text = bytes(data[offset:offset + itemlen])
+ offset += itemlen
+ if self.load_scores:
+ item_score = struct.unpack('<f', data[offset:offset + 4])[0]
+ offset += 4
+ else:
+ item_score = 0.0
+ self.items.append((item_text, item_score))
+ return offset - orig_offset
+
+
+class Tensor:
+ def __init__(self, use_padding = True):
+ self.name = None
+ self.dims: tuple[int, ...] = ()
+ self.dtype = None
+ self.start_offset = 0
+ self.len_bytes = np.int64(0)
+ self.use_padding = use_padding
+
+ def load(self, data, offset):
+ orig_offset = offset
+ (n_dims, name_len, dtype) = struct.unpack('<3I', data[offset:offset + 12])
+ assert n_dims >= 0 and n_dims <= 4, f'Invalid tensor dimensions {n_dims}'
+ assert name_len < 4096, 'Absurd tensor name length'
+ quant = gguf.GGML_QUANT_SIZES.get(dtype)
+ assert quant is not None, 'Unknown tensor type'
+ (blksize, tysize) = quant
+ offset += 12
+ self.dtype= dtype
+ self.dims = struct.unpack(f'<{n_dims}I', data[offset:offset + (4 * n_dims)])
+ offset += 4 * n_dims
+ self.name = bytes(data[offset:offset + name_len])
+ offset += name_len
+ pad = ((offset + 31) & ~31) - offset if self.use_padding else 0
+ offset += pad
+ n_elems = np.prod(self.dims)
+ n_bytes = np.int64(np.int64(n_elems) * np.int64(tysize)) // np.int64(blksize)
+ self.start_offset = offset
+ self.len_bytes = n_bytes
+ offset += n_bytes
+ return offset - orig_offset
+
+
+class GGMLModel:
+ def __init__(self):
+ self.hyperparameters = None
+ self.vocab = None
+ self.tensor_map = {}
+ self.tensors = []
+
+ def validate_header(self, data, offset):
+ magic = bytes(data[offset:offset + 4])
+ if magic == b'GGUF':
+ raise ValueError('File is already in GGUF format.')
+ if magic == b'lmgg':
+ self.file_format = GGMLFormat.GGML
+ self.format_version = 1
+ return 4
+ version = struct.unpack('<I', data[offset + 4:offset + 8])[0]
+ if magic == b'fmgg':
+ if version != 1:
+ raise ValueError(f'Cannot handle unexpected GGMF file version {version}')
+ self.file_format = GGMLFormat.GGMF
+ self.format_version = version
+ return 8
+ if magic == b'tjgg':
+ if version < 1 or version > 3:
+ raise ValueError(f'Cannot handle unexpected GGJT file version {version}')
+ self.file_format = GGMLFormat.GGJT
+ self.format_version = version
+ return 8
+ raise ValueError(f"Unexpected file magic {magic!r}! This doesn't look like a GGML format file.")
+
+ def validate_conversion(self, ftype):
+ err = ''
+ if (self.file_format < GGMLFormat.GGJT or self.format_version < 2):
+ if ftype not in (GGMLFType.ALL_F32, GGMLFType.MOSTLY_F16):
+ err = 'Quantizations changed in GGJTv2. Can only convert unquantized GGML files older than GGJTv2.'
+ elif (self.file_format == GGMLFormat.GGJT and self.format_version == 2):
+ if ftype in (GGMLFType.MOSTLY_Q4_0, GGMLFType.MOSTLY_Q4_1,
+ GGMLFType.MOSTLY_Q4_1_SOME_F16, GGMLFType.MOSTLY_Q8_0):
+ err = 'Q4 and Q8 quantizations changed in GGJTv3.'
+ if len(err) > 0:
+ raise ValueError(f'{err} Sorry, your {self.file_format.name}v{self.format_version} file of type {ftype.name} is not eligible for conversion.')
+
+ def load(self, data, offset):
+ offset += self.validate_header(data, offset)
+ hp = Hyperparameters()
+ offset += hp.load(data, offset)
+ logger.info(f'* File format: {self.file_format.name}v{self.format_version} with ftype {hp.ftype.name}')
+ self.validate_conversion(hp.ftype)
+ vocab = Vocab(load_scores = self.file_format > GGMLFormat.GGML)
+ offset += vocab.load(data, offset, hp.n_vocab)
+ tensors: list[Tensor] = []
+ tensor_map = {}
+ while offset < len(data):
+ tensor = Tensor(use_padding = self.file_format > GGMLFormat.GGMF)
+ offset += tensor.load(data, offset)
+ tensor_map[tensor.name] = len(tensors)
+ tensors.append(tensor)
+ self.hyperparameters = hp
+ self.vocab = vocab
+ self.tensors = tensors
+ self.tensor_map = tensor_map
+ hp.set_n_ff(self)
+ return offset
+
+
+class GGMLToGGUF:
+ def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override = None, special_vocab = None):
+ hp = ggml_model.hyperparameters
+ self.model = ggml_model
+ self.data = data
+ self.cfg = cfg
+ self.params_override = params_override
+ self.vocab_override = vocab_override
+ self.special_vocab = special_vocab
+ if params_override is not None:
+ n_kv_head = params_override.n_head_kv
+ else:
+ if cfg.gqa == 1:
+ n_kv_head = hp.n_head
+ else:
+ gqa = float(cfg.gqa)
+ n_kv_head = None
+ for x in range(1, 256):
+ if float(hp.n_head) / float(x) == gqa:
+ n_kv_head = x
+ assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
+ logger.info(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
+ self.n_kv_head = n_kv_head
+ self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer)
+
+ def save(self):
+ logger.info('* Preparing to save GGUF file')
+ gguf_writer = gguf.GGUFWriter(
+ self.cfg.output,
+ gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA],
+ use_temp_file = False)
+ self.add_params(gguf_writer)
+ self.add_vocab(gguf_writer)
+ if self.special_vocab is not None:
+ self.special_vocab.add_to_gguf(gguf_writer)
+ self.add_tensors(gguf_writer)
+ logger.info(" gguf: write header")
+ gguf_writer.write_header_to_file()
+ logger.info(" gguf: write metadata")
+ gguf_writer.write_kv_data_to_file()
+ logger.info(" gguf: write tensors")
+ gguf_writer.write_tensors_to_file()
+ gguf_writer.close()
+
+ def add_params(self, gguf_writer):
+ hp = self.model.hyperparameters
+ cfg = self.cfg
+ if cfg.desc is not None:
+ desc = cfg.desc
+ else:
+ desc = f'converted from legacy {self.model.file_format.name}v{self.model.format_version} {hp.ftype.name} format'
+ try:
+ # Filenames aren't necessarily valid UTF8.
+ name = cfg.name if cfg.name is not None else cfg.input.name
+ except UnicodeDecodeError:
+ name = None
+ logger.info('* Adding model parameters and KV items')
+ if name is not None:
+ gguf_writer.add_name(name)
+ gguf_writer.add_description(desc)
+ gguf_writer.add_file_type(int(hp.ftype))
+ if self.params_override is not None:
+ po = self.params_override
+ assert po.n_embd == hp.n_embd, 'Model hyperparams mismatch'
+ assert po.n_layer == hp.n_layer, 'Model hyperparams mismatch'
+ assert po.n_head == hp.n_head, 'Model hyperparams mismatch'
+ gguf_writer.add_context_length (po.n_ctx)
+ gguf_writer.add_embedding_length (po.n_embd)
+ gguf_writer.add_block_count (po.n_layer)
+ gguf_writer.add_feed_forward_length (po.n_ff)
+ gguf_writer.add_rope_dimension_count(po.n_embd // po.n_head)
+ gguf_writer.add_head_count (po.n_head)
+ gguf_writer.add_head_count_kv (po.n_head_kv)
+ gguf_writer.add_layer_norm_rms_eps (po.f_norm_eps)
+ return
+ gguf_writer.add_context_length(cfg.context_length)
+ gguf_writer.add_embedding_length(hp.n_embd)
+ gguf_writer.add_block_count(hp.n_layer)
+ gguf_writer.add_feed_forward_length(hp.n_ff)
+ gguf_writer.add_rope_dimension_count(hp.n_embd // hp.n_head)
+ gguf_writer.add_head_count(hp.n_head)
+ gguf_writer.add_head_count_kv(self.n_kv_head)
+ gguf_writer.add_layer_norm_rms_eps(float(cfg.eps))
+
+ def add_vocab(self, gguf_writer):
+ hp = self.model.hyperparameters
+ gguf_writer.add_tokenizer_model('llama')
+ gguf_writer.add_tokenizer_pre('default')
+ tokens = []
+ scores = []
+ toktypes = []
+ if self.vocab_override is not None:
+ vo = self.vocab_override
+ logger.info('* Adding vocab item(s)')
+ for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
+ tokens.append(vbytes)
+ scores.append(score)
+ toktypes.append(ttype)
+ assert len(tokens) == hp.n_vocab, \
+ f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
+ gguf_writer.add_token_list(tokens)
+ gguf_writer.add_token_scores(scores)
+ if len(toktypes) > 0:
+ gguf_writer.add_token_types(toktypes)
+ return
+ logger.info(f'* Adding {hp.n_vocab} vocab item(s)')
+ assert len(self.model.vocab.items) >= 3, 'Cannot handle unexpectedly short model vocab'
+ for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
+ tt = 1 # Normal
+ # Special handling for UNK, BOS, EOS tokens.
+ if tokid <= 2:
+ if tokid == 0:
+ vbytes = b'<unk>'
+ tt = 2
+ elif tokid == 1:
+ vbytes = b'<s>'
+ tt = 3
+ else:
+ vbytes = b'</s>'
+ tt = 3
+ elif len(vbytes) == 0:
+ tt = 3 # Control
+ elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
+ vbytes = bytes(f'<0x{vbytes[0]:02X}>', encoding = 'UTF-8')
+ tt = 6 # Byte
+ else:
+ vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
+ toktypes.append(tt)
+ tokens.append(vbytes)
+ scores.append(vscore)
+ gguf_writer.add_token_list(tokens)
+ gguf_writer.add_token_scores(scores)
+ gguf_writer.add_token_types(toktypes)
+ gguf_writer.add_unk_token_id(0)
+ gguf_writer.add_bos_token_id(1)
+ gguf_writer.add_eos_token_id(2)
+
+ def add_tensors(self, gguf_writer):
+ tensor_map = self.name_map
+ data = self.data
+ logger.info(f'* Adding {len(self.model.tensors)} tensor(s)')
+ for tensor in self.model.tensors:
+ name = str(tensor.name, 'UTF-8')
+ mapped_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
+ assert mapped_name is not None, f'Bad name {name}'
+ tempdims = list(tensor.dims[:])
+ if len(tempdims) > 1:
+ temp = tempdims[1]
+ tempdims[1] = tempdims[0]
+ tempdims[0] = temp
+ gguf_writer.add_tensor(
+ mapped_name,
+ data[tensor.start_offset:tensor.start_offset + tensor.len_bytes],
+ raw_shape = tempdims,
+ raw_dtype = tensor.dtype)
+
+
+def handle_metadata(cfg, hp):
+ import convert
+ assert cfg.model_metadata_dir.is_dir(), 'Metadata dir is not a directory'
+ hf_config_path = cfg.model_metadata_dir / "config.json"
+ orig_config_path = cfg.model_metadata_dir / "params.json"
+ # We pass a fake model here. "original" mode will check the shapes of some
+ # tensors if information is missing in the .json file: other than that, the
+ # model data isn't used so this should be safe (at least for now).
+ fakemodel = {
+ 'tok_embeddings.weight': convert.LazyTensor.__new__(convert.LazyTensor),
+ 'layers.0.feed_forward.w1.weight': convert.LazyTensor.__new__(convert.LazyTensor),
+ }
+ fakemodel['tok_embeddings.weight'].shape = [hp.n_vocab]
+ fakemodel['layers.0.feed_forward.w1.weight'].shape = [hp.n_ff]
+ if hf_config_path.exists():
+ params = convert.Params.loadHFTransformerJson(fakemodel, hf_config_path)
+ elif orig_config_path.exists():
+ params = convert.Params.loadOriginalParamsJson(fakemodel, orig_config_path)
+ else:
+ raise ValueError('Unable to load metadata')
+ vocab_path = Path(cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir)
+ vocab_factory = convert.VocabFactory(vocab_path)
+ vocab, special_vocab = vocab_factory.load_vocab(cfg.vocabtype.split(","), cfg.model_metadata_dir)
+ convert.check_vocab_size(params, vocab)
+ return params, vocab, special_vocab
+
+
+def handle_args():
+ parser = argparse.ArgumentParser(description = 'Convert GGML models to GGUF')
+ parser.add_argument('--input', '-i', type = Path, required = True,
+ help = 'Input GGMLv3 filename')
+ parser.add_argument('--output', '-o', type = Path, required = True,
+ help ='Output GGUF filename')
+ parser.add_argument('--name',
+ help = 'Set model name')
+ parser.add_argument('--desc',
+ help = 'Set model description')
+ parser.add_argument('--gqa', type = int, default = 1,
+ help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
+ parser.add_argument('--eps', default = '5.0e-06',
+ help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2')
+ parser.add_argument('--context-length', '-c', type=int, default = 2048,
+ help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096')
+ parser.add_argument('--model-metadata-dir', '-m', type = Path,
+ help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
+ parser.add_argument("--vocab-dir", type=Path,
+ help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
+ parser.add_argument("--vocabtype", default="spm,hfft",
+ help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm,hfft)")
+ parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
+ return parser.parse_args()
+
+
+def main():
+ cfg = handle_args()
+ logging.basicConfig(level=logging.DEBUG if cfg.verbose else logging.INFO)
+ logger.info(f'* Using config: {cfg}')
+ logger.warning('=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===')
+ if cfg.model_metadata_dir is None and (cfg.gqa == 1 or cfg.eps == '5.0e-06'):
+ logger.info('- Note: If converting LLaMA2, specifying "--eps 1e-5" is required. 70B models also need "--gqa 8".')
+ data = np.memmap(cfg.input, mode = 'r')
+ model = GGMLModel()
+ logger.info('* Scanning GGML input file')
+ offset = model.load(data, 0) # noqa
+ logger.info(f'* GGML model hyperparameters: {model.hyperparameters}')
+ vocab_override = None
+ params_override = None
+ special_vocab = None
+ if cfg.model_metadata_dir is not None:
+ (params_override, vocab_override, special_vocab) = handle_metadata(cfg, model.hyperparameters)
+ logger.info('!! Note: When overriding params the --gqa, --eps and --context-length options are ignored.')
+ logger.info(f'* Overriding params: {params_override}')
+ logger.info(f'* Overriding vocab: {vocab_override}')
+ logger.info(f'* Special vocab: {special_vocab}')
+ else:
+ logger.warning('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n')
+ if model.file_format == GGMLFormat.GGML:
+ logger.info('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!')
+ converter = GGMLToGGUF(
+ model, data, cfg,
+ params_override = params_override,
+ vocab_override = vocab_override,
+ special_vocab = special_vocab
+ )
+ converter.save()
+ logger.info(f'* Successful completion. Output saved to: {cfg.output}')
+
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import json
+import os
+import struct
+import sys
+from pathlib import Path
+from typing import Any, BinaryIO, Sequence
+
+import numpy as np
+import torch
+
+if 'NO_LOCAL_GGUF' not in os.environ:
+ sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+import gguf
+
+NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1}
+
+
+def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
+ fout.write(b"ggla"[::-1]) # magic (ggml lora)
+ fout.write(struct.pack("i", 1)) # file version
+ fout.write(struct.pack("i", params["r"]))
+ # https://opendelta.readthedocs.io/en/latest/modules/deltas.html says that `lora_alpha` is an int
+ # but some models ship a float value instead
+ # let's convert to int, but fail if lossless conversion is not possible
+ assert (
+ int(params["lora_alpha"]) == params["lora_alpha"]
+ ), "cannot convert float to int losslessly"
+ fout.write(struct.pack("i", int(params["lora_alpha"])))
+
+
+def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]) -> None:
+ sname = name.encode("utf-8")
+ fout.write(
+ struct.pack(
+ "iii",
+ len(shape),
+ len(sname),
+ NUMPY_TYPE_TO_FTYPE[data_type.name],
+ )
+ )
+ fout.write(struct.pack("i" * len(shape), *shape[::-1]))
+ fout.write(sname)
+ fout.seek((fout.tell() + 31) & -32)
+
+
+if __name__ == '__main__':
+ if len(sys.argv) < 2:
+ print(f"Usage: python {sys.argv[0]} <path> [arch]")
+ print(
+ "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
+ )
+ print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
+ sys.exit(1)
+
+ input_json = os.path.join(sys.argv[1], "adapter_config.json")
+ input_model = os.path.join(sys.argv[1], "adapter_model.bin")
+ output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin")
+
+ if os.path.exists(input_model):
+ model = torch.load(input_model, map_location="cpu")
+ else:
+ input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
+ # lazy import load_file only if lora is in safetensors format.
+ from safetensors.torch import load_file
+ model = load_file(input_model, device="cpu")
+
+ arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
+
+ if arch_name not in gguf.MODEL_ARCH_NAMES.values():
+ print(f"Error: unsupported architecture {arch_name}")
+ sys.exit(1)
+
+ arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
+ name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
+
+ with open(input_json, "r") as f:
+ params = json.load(f)
+
+ if params["peft_type"] != "LORA":
+ print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
+ sys.exit(1)
+
+ if params["fan_in_fan_out"] is True:
+ print("Error: param fan_in_fan_out is not supported")
+ sys.exit(1)
+
+ if params["bias"] is not None and params["bias"] != "none":
+ print("Error: param bias is not supported")
+ sys.exit(1)
+
+ # TODO: these seem to be layers that have been trained but without lora.
+ # doesn't seem widely used but eventually should be supported
+ if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
+ print("Error: param modules_to_save is not supported")
+ sys.exit(1)
+
+ with open(output_path, "wb") as fout:
+ fout.truncate()
+
+ write_file_header(fout, params)
+ for k, v in model.items():
+ orig_k = k
+ if k.endswith(".default.weight"):
+ k = k.replace(".default.weight", ".weight")
+ if k in ["llama_proj.weight", "llama_proj.bias"]:
+ continue
+ if k.endswith("lora_A.weight"):
+ if v.dtype != torch.float16 and v.dtype != torch.float32:
+ v = v.float()
+ v = v.T
+ else:
+ v = v.float()
+
+ t = v.detach().numpy()
+
+ prefix = "base_model.model."
+ if k.startswith(prefix):
+ k = k[len(prefix) :]
+
+ lora_suffixes = (".lora_A.weight", ".lora_B.weight")
+ if k.endswith(lora_suffixes):
+ suffix = k[-len(lora_suffixes[0]):]
+ k = k[: -len(lora_suffixes[0])]
+ else:
+ print(f"Error: unrecognized tensor name {orig_k}")
+ sys.exit(1)
+
+ tname = name_map.get_name(k)
+ if tname is None:
+ print(f"Error: could not map tensor name {orig_k}")
+ print(" Note: the arch parameter must be specified if the model is not llama")
+ sys.exit(1)
+
+ if suffix == ".lora_A.weight":
+ tname += ".weight.loraA"
+ elif suffix == ".lora_B.weight":
+ tname += ".weight.loraB"
+ else:
+ assert False
+
+ print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
+ write_tensor_header(fout, tname, t.shape, t.dtype)
+ t.tofile(fout)
+
+ print(f"Converted {input_json} and {input_model} to {output_path}")
+
--- /dev/null
+#!/usr/bin/env python3
+import argparse
+import os
+import sys
+from pathlib import Path
+from pprint import pprint
+
+import torch
+from sentencepiece import SentencePieceProcessor
+
+if 'NO_LOCAL_GGUF' not in os.environ:
+ sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
+import gguf
+
+
+def _flatten_dict(dct, tensors, prefix=None):
+ assert isinstance(dct, dict)
+ for key in dct.keys():
+ new_prefix = prefix + '.' + key if prefix is not None else key
+ if isinstance(dct[key], torch.Tensor):
+ tensors[new_prefix] = dct[key]
+ elif isinstance(dct[key], dict):
+ _flatten_dict(dct[key], tensors, new_prefix)
+ else:
+ raise ValueError(type(dct[key]))
+ return None
+
+
+def _get_sentencepiece_tokenizer_info(dir_model: Path):
+ tokenizer_path = dir_model / 'adept_vocab.model'
+ print('gguf: getting sentencepiece tokenizer from', tokenizer_path)
+ tokenizer = SentencePieceProcessor(str(tokenizer_path))
+ print('gguf: adding tokens')
+ tokens: list[bytes] = []
+ scores: list[float] = []
+ toktypes: list[int] = []
+
+ for i in range(tokenizer.vocab_size()):
+ text: bytes
+ score: float
+
+ piece = tokenizer.id_to_piece(i)
+ text = piece.encode("utf-8")
+ score = tokenizer.get_score(i)
+
+ toktype = 1
+ if tokenizer.is_unknown(i):
+ toktype = 2
+ if tokenizer.is_control(i):
+ toktype = 3
+ if tokenizer.is_unused(i):
+ toktype = 5
+ if tokenizer.is_byte(i):
+ toktype = 6
+
+ tokens.append(text)
+ scores.append(score)
+ toktypes.append(toktype)
+ pass
+ return tokens, scores, toktypes
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Convert a Persimmon model from Adept (e.g. Persimmon 8b chat) to a GGML compatible file")
+ parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
+ parser.add_argument("--ckpt-path", type=Path, help="path to persimmon checkpoint .pt file")
+ parser.add_argument("--model-dir", type=Path, help="directory containing model e.g. 8b_chat_model_release")
+ parser.add_argument("--adept-inference-dir", type=str, help="path to adept-inference code directory")
+ args = parser.parse_args()
+ sys.path.append(str(args.adept_inference_dir))
+ persimmon_model = torch.load(args.ckpt_path)
+ hparams = persimmon_model['args']
+ pprint(hparams)
+ tensors: dict[str, torch.Tensor] = {}
+ _flatten_dict(persimmon_model['model'], tensors, None)
+
+ arch = gguf.MODEL_ARCH.PERSIMMON
+ gguf_writer = gguf.GGUFWriter(args.outfile, gguf.MODEL_ARCH_NAMES[arch])
+
+ block_count = hparams.num_layers
+ head_count = hparams.num_attention_heads
+ head_count_kv = head_count
+ ctx_length = hparams.seq_length
+ hidden_size = hparams.hidden_size
+
+ gguf_writer.add_name('persimmon-8b-chat')
+ gguf_writer.add_context_length(ctx_length)
+ gguf_writer.add_embedding_length(hidden_size)
+ gguf_writer.add_block_count(block_count)
+ gguf_writer.add_feed_forward_length(hparams.ffn_hidden_size)
+ # ref: https://github.com/ggerganov/llama.cpp/pull/4889/commits/eea19039fc52ea2dbd1aab45b59ab4e3e29a3443
+ gguf_writer.add_rope_dimension_count(hidden_size // head_count // 2)
+ gguf_writer.add_head_count(head_count)
+ gguf_writer.add_head_count_kv(head_count_kv)
+ gguf_writer.add_rope_freq_base(hparams.rotary_emb_base)
+ gguf_writer.add_layer_norm_eps(hparams.layernorm_epsilon)
+
+ tokens, scores, toktypes = _get_sentencepiece_tokenizer_info(args.model_dir)
+ gguf_writer.add_tokenizer_model('llama')
+ gguf_writer.add_token_list(tokens)
+ gguf_writer.add_token_scores(scores)
+ gguf_writer.add_token_types(toktypes)
+ gguf_writer.add_bos_token_id(71013)
+ gguf_writer.add_eos_token_id(71013)
+
+ tensor_map = gguf.get_tensor_name_map(arch, block_count)
+ print(tensor_map)
+ for name in tensors.keys():
+ data = tensors[name]
+ if name.endswith(".self_attention.rotary_emb.inv_freq"):
+ continue
+ old_dtype = data.dtype
+ # TODO: FP16 conversion produces garbage outputs. (Q8_0 does not, so..?)
+ data = data.to(torch.float32).squeeze().numpy()
+ new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
+ if new_name is None:
+ print("Can not map tensor '" + name + "'")
+ sys.exit()
+ n_dims = len(data.shape)
+ print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
+ gguf_writer.add_tensor(new_name, data)
+ print("gguf: write header")
+ gguf_writer.write_header_to_file()
+ print("gguf: write metadata")
+ gguf_writer.write_kv_data_to_file()
+ print("gguf: write tensors")
+ gguf_writer.write_tensors_to_file()
+
+ gguf_writer.close()
+
+ print(f"gguf: model successfully exported to '{args.outfile}'")
+ print("")
+
+
+if __name__ == '__main__':
+ main()
+
--- /dev/null
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+
+[[package]]
+name = "atomicwrites"
+version = "1.4.1"
+description = "Atomic file writes."
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+ {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"},
+]
+
+[[package]]
+name = "attrs"
+version = "23.2.0"
+description = "Classes Without Boilerplate"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
+ {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"},
+]
+
+[package.extras]
+cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
+dev = ["attrs[tests]", "pre-commit"]
+docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
+tests = ["attrs[tests-no-zope]", "zope-interface"]
+tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"]
+tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"]
+
+[[package]]
+name = "certifi"
+version = "2024.2.2"
+description = "Python package for providing Mozilla's CA Bundle."
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"},
+ {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"},
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.3.2"
+description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+optional = false
+python-versions = ">=3.7.0"
+files = [
+ {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"},
+ {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"},
+ {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"},
+ {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"},
+ {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"},
+ {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"},
+ {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"},
+ {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+files = [
+ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+
+[[package]]
+name = "filelock"
+version = "3.13.1"
+description = "A platform independent file lock."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"},
+ {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.24)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"]
+typing = ["typing-extensions (>=4.8)"]
+
+[[package]]
+name = "fsspec"
+version = "2024.2.0"
+description = "File-system specification"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "fsspec-2024.2.0-py3-none-any.whl", hash = "sha256:817f969556fa5916bc682e02ca2045f96ff7f586d45110fcb76022063ad2c7d8"},
+ {file = "fsspec-2024.2.0.tar.gz", hash = "sha256:b6ad1a679f760dda52b1168c859d01b7b80648ea6f7f7c7f5a8a91dc3f3ecb84"},
+]
+
+[package.extras]
+abfs = ["adlfs"]
+adl = ["adlfs"]
+arrow = ["pyarrow (>=1)"]
+dask = ["dask", "distributed"]
+devel = ["pytest", "pytest-cov"]
+dropbox = ["dropbox", "dropboxdrivefs", "requests"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+fuse = ["fusepy"]
+gcs = ["gcsfs"]
+git = ["pygit2"]
+github = ["requests"]
+gs = ["gcsfs"]
+gui = ["panel"]
+hdfs = ["pyarrow (>=1)"]
+http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"]
+libarchive = ["libarchive-c"]
+oci = ["ocifs"]
+s3 = ["s3fs"]
+sftp = ["paramiko"]
+smb = ["smbprotocol"]
+ssh = ["paramiko"]
+tqdm = ["tqdm"]
+
+[[package]]
+name = "gguf"
+version = "0.7.0"
+description = "Read and write ML models in GGUF for GGML"
+optional = false
+python-versions = ">=3.8"
+files = []
+develop = false
+
+[package.dependencies]
+numpy = ">=1.17"
+
+[package.source]
+type = "directory"
+url = "gguf-py"
+
+[[package]]
+name = "huggingface-hub"
+version = "0.20.3"
+description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+ {file = "huggingface_hub-0.20.3-py3-none-any.whl", hash = "sha256:d988ae4f00d3e307b0c80c6a05ca6dbb7edba8bba3079f74cda7d9c2e562a7b6"},
+ {file = "huggingface_hub-0.20.3.tar.gz", hash = "sha256:94e7f8e074475fbc67d6a71957b678e1b4a74ff1b64a644fd6cbb83da962d05d"},
+]
+
+[package.dependencies]
+filelock = "*"
+fsspec = ">=2023.5.0"
+packaging = ">=20.9"
+pyyaml = ">=5.1"
+requests = "*"
+tqdm = ">=4.42.1"
+typing-extensions = ">=3.7.4.3"
+
+[package.extras]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.1.3)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+cli = ["InquirerPy (==0.3.4)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.1.3)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
+inference = ["aiohttp", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)"]
+quality = ["mypy (==1.5.1)", "ruff (>=0.1.3)"]
+tensorflow = ["graphviz", "pydot", "tensorflow"]
+testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
+torch = ["torch"]
+typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
+
+[[package]]
+name = "idna"
+version = "3.6"
+description = "Internationalized Domain Names in Applications (IDNA)"
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"},
+ {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"},
+]
+
+[[package]]
+name = "jinja2"
+version = "3.1.3"
+description = "A very fast and expressive template engine."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
+ {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
+]
+
+[package.dependencies]
+MarkupSafe = ">=2.0"
+
+[package.extras]
+i18n = ["Babel (>=2.7)"]
+
+[[package]]
+name = "markupsafe"
+version = "2.1.5"
+description = "Safely add untrusted strings to HTML/XML markup."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"},
+ {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"},
+ {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"},
+ {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"},
+ {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"},
+ {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"},
+ {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"},
+ {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"},
+ {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"},
+ {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"},
+ {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"},
+ {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"},
+ {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"},
+ {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"},
+ {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"},
+ {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"},
+ {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"},
+ {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"},
+ {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"},
+ {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"},
+ {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"},
+ {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"},
+ {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"},
+ {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"},
+ {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"},
+ {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"},
+ {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"},
+ {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"},
+ {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"},
+ {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"},
+ {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"},
+ {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"},
+ {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"},
+ {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"},
+ {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"},
+ {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"},
+ {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"},
+ {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"},
+ {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"},
+ {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"},
+ {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"},
+ {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"},
+ {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"},
+ {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"},
+ {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"},
+ {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"},
+ {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"},
+ {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"},
+ {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"},
+ {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"},
+ {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"},
+ {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"},
+ {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"},
+ {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"},
+ {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"},
+ {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"},
+ {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"},
+ {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"},
+ {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"},
+ {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"},
+]
+
+[[package]]
+name = "more-itertools"
+version = "10.2.0"
+description = "More routines for operating on iterables, beyond itertools"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "more-itertools-10.2.0.tar.gz", hash = "sha256:8fccb480c43d3e99a00087634c06dd02b0d50fbf088b380de5a41a015ec239e1"},
+ {file = "more_itertools-10.2.0-py3-none-any.whl", hash = "sha256:686b06abe565edfab151cb8fd385a05651e1fdf8f0a14191e4439283421f8684"},
+]
+
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+description = "Python library for arbitrary-precision floating-point arithmetic"
+optional = false
+python-versions = "*"
+files = [
+ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
+ {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
+]
+
+[package.extras]
+develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
+docs = ["sphinx"]
+gmpy = ["gmpy2 (>=2.1.0a4)"]
+tests = ["pytest (>=4.6)"]
+
+[[package]]
+name = "networkx"
+version = "3.2.1"
+description = "Python package for creating and manipulating graphs and networks"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"},
+ {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"},
+]
+
+[package.extras]
+default = ["matplotlib (>=3.5)", "numpy (>=1.22)", "pandas (>=1.4)", "scipy (>=1.9,!=1.11.0,!=1.11.1)"]
+developer = ["changelist (==0.4)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"]
+doc = ["nb2plots (>=0.7)", "nbconvert (<7.9)", "numpydoc (>=1.6)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.14)", "sphinx (>=7)", "sphinx-gallery (>=0.14)", "texext (>=0.6.7)"]
+extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.11)", "sympy (>=1.10)"]
+test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
+
+[[package]]
+name = "numpy"
+version = "1.26.4"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
+ {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
+ {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"},
+ {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"},
+ {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"},
+ {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"},
+ {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"},
+ {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"},
+ {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"},
+ {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"},
+ {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"},
+ {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"},
+ {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"},
+ {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"},
+ {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"},
+ {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"},
+ {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"},
+ {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"},
+ {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"},
+ {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"},
+ {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"},
+ {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"},
+ {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"},
+ {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"},
+ {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"},
+ {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"},
+ {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"},
+ {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"},
+ {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"},
+ {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"},
+ {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"},
+ {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"},
+ {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"},
+ {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"},
+ {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"},
+ {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
+]
+
+[[package]]
+name = "packaging"
+version = "23.2"
+description = "Core utilities for Python packages"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
+ {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
+]
+
+[[package]]
+name = "pluggy"
+version = "0.13.1"
+description = "plugin and hook calling mechanisms for python"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+ {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"},
+ {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"},
+]
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+
+[[package]]
+name = "protobuf"
+version = "4.25.3"
+description = ""
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "protobuf-4.25.3-cp310-abi3-win32.whl", hash = "sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa"},
+ {file = "protobuf-4.25.3-cp310-abi3-win_amd64.whl", hash = "sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8"},
+ {file = "protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c"},
+ {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019"},
+ {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d"},
+ {file = "protobuf-4.25.3-cp38-cp38-win32.whl", hash = "sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2"},
+ {file = "protobuf-4.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4"},
+ {file = "protobuf-4.25.3-cp39-cp39-win32.whl", hash = "sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4"},
+ {file = "protobuf-4.25.3-cp39-cp39-win_amd64.whl", hash = "sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c"},
+ {file = "protobuf-4.25.3-py3-none-any.whl", hash = "sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9"},
+ {file = "protobuf-4.25.3.tar.gz", hash = "sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c"},
+]
+
+[[package]]
+name = "py"
+version = "1.11.0"
+description = "library with cross-python path, ini-parsing, io, code, log facilities"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+files = [
+ {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
+ {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
+]
+
+[[package]]
+name = "pytest"
+version = "5.4.3"
+description = "pytest: simple powerful testing with Python"
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"},
+ {file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"},
+]
+
+[package.dependencies]
+atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
+attrs = ">=17.4.0"
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+more-itertools = ">=4.0.0"
+packaging = "*"
+pluggy = ">=0.12,<1.0"
+py = ">=1.5.0"
+wcwidth = "*"
+
+[package.extras]
+checkqa-mypy = ["mypy (==v0.761)"]
+testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.1"
+description = "YAML parser and emitter for Python"
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"},
+ {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"},
+ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
+ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
+ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
+ {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
+ {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
+ {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
+ {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
+ {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"},
+ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
+ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
+ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
+ {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
+ {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
+ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
+ {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
+ {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+ {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
+ {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
+ {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
+ {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
+ {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"},
+ {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"},
+ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
+ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
+ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
+ {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
+ {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
+ {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
+ {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
+ {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"},
+ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
+ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
+ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
+ {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
+ {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
+ {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
+ {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
+]
+
+[[package]]
+name = "regex"
+version = "2023.12.25"
+description = "Alternative regular expression module, to replace re."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0694219a1d54336fd0445ea382d49d36882415c0134ee1e8332afd1529f0baa5"},
+ {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b014333bd0217ad3d54c143de9d4b9a3ca1c5a29a6d0d554952ea071cff0f1f8"},
+ {file = "regex-2023.12.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d865984b3f71f6d0af64d0d88f5733521698f6c16f445bb09ce746c92c97c586"},
+ {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e0eabac536b4cc7f57a5f3d095bfa557860ab912f25965e08fe1545e2ed8b4c"},
+ {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c25a8ad70e716f96e13a637802813f65d8a6760ef48672aa3502f4c24ea8b400"},
+ {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9b6d73353f777630626f403b0652055ebfe8ff142a44ec2cf18ae470395766e"},
+ {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9cc99d6946d750eb75827cb53c4371b8b0fe89c733a94b1573c9dd16ea6c9e4"},
+ {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88d1f7bef20c721359d8675f7d9f8e414ec5003d8f642fdfd8087777ff7f94b5"},
+ {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cb3fe77aec8f1995611f966d0c656fdce398317f850d0e6e7aebdfe61f40e1cd"},
+ {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7aa47c2e9ea33a4a2a05f40fcd3ea36d73853a2aae7b4feab6fc85f8bf2c9704"},
+ {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:df26481f0c7a3f8739fecb3e81bc9da3fcfae34d6c094563b9d4670b047312e1"},
+ {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c40281f7d70baf6e0db0c2f7472b31609f5bc2748fe7275ea65a0b4601d9b392"},
+ {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:d94a1db462d5690ebf6ae86d11c5e420042b9898af5dcf278bd97d6bda065423"},
+ {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba1b30765a55acf15dce3f364e4928b80858fa8f979ad41f862358939bdd1f2f"},
+ {file = "regex-2023.12.25-cp310-cp310-win32.whl", hash = "sha256:150c39f5b964e4d7dba46a7962a088fbc91f06e606f023ce57bb347a3b2d4630"},
+ {file = "regex-2023.12.25-cp310-cp310-win_amd64.whl", hash = "sha256:09da66917262d9481c719599116c7dc0c321ffcec4b1f510c4f8a066f8768105"},
+ {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1b9d811f72210fa9306aeb88385b8f8bcef0dfbf3873410413c00aa94c56c2b6"},
+ {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d902a43085a308cef32c0d3aea962524b725403fd9373dea18110904003bac97"},
+ {file = "regex-2023.12.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d166eafc19f4718df38887b2bbe1467a4f74a9830e8605089ea7a30dd4da8887"},
+ {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7ad32824b7f02bb3c9f80306d405a1d9b7bb89362d68b3c5a9be53836caebdb"},
+ {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:636ba0a77de609d6510235b7f0e77ec494d2657108f777e8765efc060094c98c"},
+ {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fda75704357805eb953a3ee15a2b240694a9a514548cd49b3c5124b4e2ad01b"},
+ {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f72cbae7f6b01591f90814250e636065850c5926751af02bb48da94dfced7baa"},
+ {file = "regex-2023.12.25-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db2a0b1857f18b11e3b0e54ddfefc96af46b0896fb678c85f63fb8c37518b3e7"},
+ {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7502534e55c7c36c0978c91ba6f61703faf7ce733715ca48f499d3dbbd7657e0"},
+ {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e8c7e08bb566de4faaf11984af13f6bcf6a08f327b13631d41d62592681d24fe"},
+ {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:283fc8eed679758de38fe493b7d7d84a198b558942b03f017b1f94dda8efae80"},
+ {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f44dd4d68697559d007462b0a3a1d9acd61d97072b71f6d1968daef26bc744bd"},
+ {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:67d3ccfc590e5e7197750fcb3a2915b416a53e2de847a728cfa60141054123d4"},
+ {file = "regex-2023.12.25-cp311-cp311-win32.whl", hash = "sha256:68191f80a9bad283432385961d9efe09d783bcd36ed35a60fb1ff3f1ec2efe87"},
+ {file = "regex-2023.12.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d2af3f6b8419661a0c421584cfe8aaec1c0e435ce7e47ee2a97e344b98f794f"},
+ {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8a0ccf52bb37d1a700375a6b395bff5dd15c50acb745f7db30415bae3c2b0715"},
+ {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c3c4a78615b7762740531c27cf46e2f388d8d727d0c0c739e72048beb26c8a9d"},
+ {file = "regex-2023.12.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ad83e7545b4ab69216cef4cc47e344d19622e28aabec61574b20257c65466d6a"},
+ {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7a635871143661feccce3979e1727c4e094f2bdfd3ec4b90dfd4f16f571a87a"},
+ {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d498eea3f581fbe1b34b59c697512a8baef88212f92e4c7830fcc1499f5b45a5"},
+ {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43f7cd5754d02a56ae4ebb91b33461dc67be8e3e0153f593c509e21d219c5060"},
+ {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51f4b32f793812714fd5307222a7f77e739b9bc566dc94a18126aba3b92b98a3"},
+ {file = "regex-2023.12.25-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba99d8077424501b9616b43a2d208095746fb1284fc5ba490139651f971d39d9"},
+ {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4bfc2b16e3ba8850e0e262467275dd4d62f0d045e0e9eda2bc65078c0110a11f"},
+ {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8c2c19dae8a3eb0ea45a8448356ed561be843b13cbc34b840922ddf565498c1c"},
+ {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:60080bb3d8617d96f0fb7e19796384cc2467447ef1c491694850ebd3670bc457"},
+ {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b77e27b79448e34c2c51c09836033056a0547aa360c45eeeb67803da7b0eedaf"},
+ {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:518440c991f514331f4850a63560321f833979d145d7d81186dbe2f19e27ae3d"},
+ {file = "regex-2023.12.25-cp312-cp312-win32.whl", hash = "sha256:e2610e9406d3b0073636a3a2e80db05a02f0c3169b5632022b4e81c0364bcda5"},
+ {file = "regex-2023.12.25-cp312-cp312-win_amd64.whl", hash = "sha256:cc37b9aeebab425f11f27e5e9e6cf580be7206c6582a64467a14dda211abc232"},
+ {file = "regex-2023.12.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:da695d75ac97cb1cd725adac136d25ca687da4536154cdc2815f576e4da11c69"},
+ {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d126361607b33c4eb7b36debc173bf25d7805847346dd4d99b5499e1fef52bc7"},
+ {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4719bb05094d7d8563a450cf8738d2e1061420f79cfcc1fa7f0a44744c4d8f73"},
+ {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dd58946bce44b53b06d94aa95560d0b243eb2fe64227cba50017a8d8b3cd3e2"},
+ {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22a86d9fff2009302c440b9d799ef2fe322416d2d58fc124b926aa89365ec482"},
+ {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2aae8101919e8aa05ecfe6322b278f41ce2994c4a430303c4cd163fef746e04f"},
+ {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e692296c4cc2873967771345a876bcfc1c547e8dd695c6b89342488b0ea55cd8"},
+ {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:263ef5cc10979837f243950637fffb06e8daed7f1ac1e39d5910fd29929e489a"},
+ {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:d6f7e255e5fa94642a0724e35406e6cb7001c09d476ab5fce002f652b36d0c39"},
+ {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:88ad44e220e22b63b0f8f81f007e8abbb92874d8ced66f32571ef8beb0643b2b"},
+ {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:3a17d3ede18f9cedcbe23d2daa8a2cd6f59fe2bf082c567e43083bba3fb00347"},
+ {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d15b274f9e15b1a0b7a45d2ac86d1f634d983ca40d6b886721626c47a400bf39"},
+ {file = "regex-2023.12.25-cp37-cp37m-win32.whl", hash = "sha256:ed19b3a05ae0c97dd8f75a5d8f21f7723a8c33bbc555da6bbe1f96c470139d3c"},
+ {file = "regex-2023.12.25-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d1047952c0b8104a1d371f88f4ab62e6275567d4458c1e26e9627ad489b445"},
+ {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b43523d7bc2abd757119dbfb38af91b5735eea45537ec6ec3a5ec3f9562a1c53"},
+ {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:efb2d82f33b2212898f1659fb1c2e9ac30493ac41e4d53123da374c3b5541e64"},
+ {file = "regex-2023.12.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7fca9205b59c1a3d5031f7e64ed627a1074730a51c2a80e97653e3e9fa0d415"},
+ {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086dd15e9435b393ae06f96ab69ab2d333f5d65cbe65ca5a3ef0ec9564dfe770"},
+ {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e81469f7d01efed9b53740aedd26085f20d49da65f9c1f41e822a33992cb1590"},
+ {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34e4af5b27232f68042aa40a91c3b9bb4da0eeb31b7632e0091afc4310afe6cb"},
+ {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9852b76ab558e45b20bf1893b59af64a28bd3820b0c2efc80e0a70a4a3ea51c1"},
+ {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff100b203092af77d1a5a7abe085b3506b7eaaf9abf65b73b7d6905b6cb76988"},
+ {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cc038b2d8b1470364b1888a98fd22d616fba2b6309c5b5f181ad4483e0017861"},
+ {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:094ba386bb5c01e54e14434d4caabf6583334090865b23ef58e0424a6286d3dc"},
+ {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5cd05d0f57846d8ba4b71d9c00f6f37d6b97d5e5ef8b3c3840426a475c8f70f4"},
+ {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:9aa1a67bbf0f957bbe096375887b2505f5d8ae16bf04488e8b0f334c36e31360"},
+ {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:98a2636994f943b871786c9e82bfe7883ecdaba2ef5df54e1450fa9869d1f756"},
+ {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37f8e93a81fc5e5bd8db7e10e62dc64261bcd88f8d7e6640aaebe9bc180d9ce2"},
+ {file = "regex-2023.12.25-cp38-cp38-win32.whl", hash = "sha256:d78bd484930c1da2b9679290a41cdb25cc127d783768a0369d6b449e72f88beb"},
+ {file = "regex-2023.12.25-cp38-cp38-win_amd64.whl", hash = "sha256:b521dcecebc5b978b447f0f69b5b7f3840eac454862270406a39837ffae4e697"},
+ {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f7bc09bc9c29ebead055bcba136a67378f03d66bf359e87d0f7c759d6d4ffa31"},
+ {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e14b73607d6231f3cc4622809c196b540a6a44e903bcfad940779c80dffa7be7"},
+ {file = "regex-2023.12.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9eda5f7a50141291beda3edd00abc2d4a5b16c29c92daf8d5bd76934150f3edc"},
+ {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc6bb9aa69aacf0f6032c307da718f61a40cf970849e471254e0e91c56ffca95"},
+ {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298dc6354d414bc921581be85695d18912bea163a8b23cac9a2562bbcd5088b1"},
+ {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f4e475a80ecbd15896a976aa0b386c5525d0ed34d5c600b6d3ebac0a67c7ddf"},
+ {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531ac6cf22b53e0696f8e1d56ce2396311254eb806111ddd3922c9d937151dae"},
+ {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22f3470f7524b6da61e2020672df2f3063676aff444db1daa283c2ea4ed259d6"},
+ {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:89723d2112697feaa320c9d351e5f5e7b841e83f8b143dba8e2d2b5f04e10923"},
+ {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0ecf44ddf9171cd7566ef1768047f6e66975788258b1c6c6ca78098b95cf9a3d"},
+ {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:905466ad1702ed4acfd67a902af50b8db1feeb9781436372261808df7a2a7bca"},
+ {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:4558410b7a5607a645e9804a3e9dd509af12fb72b9825b13791a37cd417d73a5"},
+ {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:7e316026cc1095f2a3e8cc012822c99f413b702eaa2ca5408a513609488cb62f"},
+ {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3b1de218d5375cd6ac4b5493e0b9f3df2be331e86520f23382f216c137913d20"},
+ {file = "regex-2023.12.25-cp39-cp39-win32.whl", hash = "sha256:11a963f8e25ab5c61348d090bf1b07f1953929c13bd2309a0662e9ff680763c9"},
+ {file = "regex-2023.12.25-cp39-cp39-win_amd64.whl", hash = "sha256:e693e233ac92ba83a87024e1d32b5f9ab15ca55ddd916d878146f4e3406b5c91"},
+ {file = "regex-2023.12.25.tar.gz", hash = "sha256:29171aa128da69afdf4bde412d5bedc335f2ca8fcfe4489038577d05f16181e5"},
+]
+
+[[package]]
+name = "requests"
+version = "2.31.0"
+description = "Python HTTP for Humans."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
+ {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
+]
+
+[package.dependencies]
+certifi = ">=2017.4.17"
+charset-normalizer = ">=2,<4"
+idna = ">=2.5,<4"
+urllib3 = ">=1.21.1,<3"
+
+[package.extras]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
+
+[[package]]
+name = "safetensors"
+version = "0.4.2"
+description = ""
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "safetensors-0.4.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:69d8bb8384dc2cb5b72c36c4d6980771b293d1a1377b378763f5e37b6bb8d133"},
+ {file = "safetensors-0.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3d420e19fcef96d0067f4de4699682b4bbd85fc8fea0bd45fcd961fdf3e8c82c"},
+ {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ca54742122fa3c4821754adb67318e1cd25c3a22bbf0c5520d5176e77a099ac"},
+ {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b47aa643afdfd66cf7ce4c184092ae734e15d10aba2c2948f24270211801c3c"},
+ {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d88a16bbc330f27e7f2d4caaf6fb061ad0b8a756ecc4033260b0378e128ce8a2"},
+ {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9223b8ac21085db614a510eb3445e7083cae915a9202357555fa939695d4f57"},
+ {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce6cb86133dc8930a7ab5e7438545a7f205f7a1cdd5aaf108c1d0da6bdcfbc2b"},
+ {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b8a628e0ae2bbc334b62952c384aa5f41621d01850f8d67b04a96b9c39dd7326"},
+ {file = "safetensors-0.4.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:88d6beb7f811a081e0e5f1d9669fdac816c45340c04b1eaf7ebfda0ce93ea403"},
+ {file = "safetensors-0.4.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b57fc5b1b54cb12d8690a58a4cf4b7144730d4bde9d98aa0e1dab6295a1cd579"},
+ {file = "safetensors-0.4.2-cp310-none-win32.whl", hash = "sha256:9d87a1c98803c16cf113b9ba03f07b2dce5e8eabfd1811a7f7323fcaa2a1bf47"},
+ {file = "safetensors-0.4.2-cp310-none-win_amd64.whl", hash = "sha256:18930ec1d1ecb526d3d9835abc2489b8f1530877518f0c541e77ef0b7abcbd99"},
+ {file = "safetensors-0.4.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:c5dd2ed788730ed56b415d1a11c62026b8cc8c573f55a2092afb3ab383e94fff"},
+ {file = "safetensors-0.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc41791b33efb9c83a59b731619f3d15f543dfe71f3a793cb8fbf9bd5d0d5d71"},
+ {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c888bf71d5ca12a720f1ed87d407c4918afa022fb247a6546d8fac15b1f112b"},
+ {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e6b2feb4b47226a16a792e6fac3f49442714884a3d4c1008569d5068a3941be9"},
+ {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f41cc0ee4b838ae8f4d8364a1b162067693d11a3893f0863be8c228d40e4d0ee"},
+ {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:51b7228e46c0a483c40ba4b9470dea00fb1ff8685026bb4766799000f6328ac2"},
+ {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02697f8f2be8ca3c37a4958702dbdb1864447ef765e18b5328a1617022dcf164"},
+ {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:27fd8f65cf7c80e4280cae1ee6bcd85c483882f6580821abe71ee1a0d3dcfca7"},
+ {file = "safetensors-0.4.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c487b5f113b0924c9534a07dc034830fb4ef05ce9bb6d78cfe016a7dedfe281f"},
+ {file = "safetensors-0.4.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:da7f6483f3fe67ff39b3a55552552c67930ea10a36e9f2539d36fc205273d767"},
+ {file = "safetensors-0.4.2-cp311-none-win32.whl", hash = "sha256:52a7012f6cb9cb4a132760b6308daede18a9f5f8952ce08adc7c67a7d865c2d8"},
+ {file = "safetensors-0.4.2-cp311-none-win_amd64.whl", hash = "sha256:4d1361a097ac430b310ce9eed8ed4746edee33ddafdfbb965debc8966fc34dc2"},
+ {file = "safetensors-0.4.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:77af8aa0edcc2863760fd6febbfdb82e88fd75d0e60c1ce4ba57208ba5e4a89b"},
+ {file = "safetensors-0.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846666c1c5a8c8888d2dfda8d3921cb9cb8e2c5f78365be756c11021e75a0a2a"},
+ {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f4bfc7ea19b446bfad41510d4b4c76101698c00caaa8a332c8edd8090a412ef"},
+ {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:233436fd30f27ffeb3c3780d0b84f496518868445c7a8db003639a649cc98453"},
+ {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a09237a795d11cd11f9dae505d170a29b5616151db1e10c14f892b11caadc7d"},
+ {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de01c9a3a3b7b69627d624ff69d9f11d28ce9908eea2fb6245adafa4b1d43df6"},
+ {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c1f25c5069ee42a5bcffdc66c300a407941edd73f3239e9fdefd26216407391"},
+ {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7a73b3649456d09ca8506140d44484b63154a7378434cc1e8719f8056550b224"},
+ {file = "safetensors-0.4.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e1625a8d07d046e968bd5c4961810aba1225984e4fb9243626f9d04a06ed3fee"},
+ {file = "safetensors-0.4.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f74c86b25615cb24ad4cff765a2eefc09d71bf0fed97588cf585aad9c38fbb4"},
+ {file = "safetensors-0.4.2-cp312-none-win32.whl", hash = "sha256:8523b9c5777d771bcde5c2389c03f1cdf7ebe8797432a1bd5e345efe25c55987"},
+ {file = "safetensors-0.4.2-cp312-none-win_amd64.whl", hash = "sha256:dcff0243e1737a21f83d664c63fed89d1f532c23fc6830d0427279fabd789ccb"},
+ {file = "safetensors-0.4.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:96ad3d7d472612e26cbe413922b4fb13933310f0511d346ea5cc9a1e856e52eb"},
+ {file = "safetensors-0.4.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:88250922401b5ae4e37de929178caf46be47ed16c817b2237b81679bec07c120"},
+ {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d40443554142fc0ab30652d5cc8554c4b7a613513bde00373e18afd5de8cbe4b"},
+ {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:27f53f70106224d32d874aacecbeb4a6e4c5b16a1d2006d0e876d97229086d71"},
+ {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cc068afe23734dfb26ce19db0a7877499ddf73b1d55ceb762417e8da4a1b05fb"},
+ {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9be1918eb8d43a11a6f8806759fccfa0eeb0542b12924caba66af8a7800ad01a"},
+ {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41911087d20a7bbd78cb4ad4f98aab0c431533107584df6635d8b54b99945573"},
+ {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:50771c662aab909f31e94d048e76861fd027d66076ea773eef2e66c717766e24"},
+ {file = "safetensors-0.4.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:13f2e57be007b7ea9329133d2399e6bdfcf1910f655440a4da17df3a45afcd30"},
+ {file = "safetensors-0.4.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c772147e6395bc829842e0a98e1b30c67fe25d816299c28196488511d5a5e951"},
+ {file = "safetensors-0.4.2-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:36239a0060b537a3e8c473df78cffee14c3ec4f51d5f1a853af99371a2fb2a35"},
+ {file = "safetensors-0.4.2-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:d0cbb7664fad2c307f95195f951b7059e95dc23e0e1822e5978c8b500098543c"},
+ {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b3e55adb6bd9dc1c2a341e72f48f075953fa35d173dd8e29a95b3b02d0d1462"},
+ {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42f743b3cca863fba53ca57a193f510e5ec359b97f38c282437716b6768e4a25"},
+ {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04e6af4a6dbeb06c4e6e7d46cf9c716cbc4cc5ef62584fd8a7c0fe558562df45"},
+ {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a492ba21b5c8f14ee5ec9b20f42ba969e53ca1f909a4d04aad736b66a341dcc2"},
+ {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b25b8233a1a85dc67e39838951cfb01595d792f3b7b644add63edb652992e030"},
+ {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fd27e063fbdafe776f7b1714da59110e88f270e86db00788a8fd65f4eacfeba7"},
+ {file = "safetensors-0.4.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1b6fa399f251bbeb52029bf5a0ac2878d7705dd3612a2f8895b48e9c11f0367d"},
+ {file = "safetensors-0.4.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:de642d46b459e4afd5c2020b26c0d6d869a171ea00411897d5776c127cac74f0"},
+ {file = "safetensors-0.4.2-cp37-none-win32.whl", hash = "sha256:77b72d17754c93bb68f3598182f14d78776e0b9b31682ca5bb2c7c5bd9a75267"},
+ {file = "safetensors-0.4.2-cp37-none-win_amd64.whl", hash = "sha256:d36ee3244d461cd655aeef493792c3bccf4875282f8407fd9af99e9a41cf2530"},
+ {file = "safetensors-0.4.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:16b6b3884f7876c6b3b23a742428223a7170a5a9dac819d8c12a1569422c4b5a"},
+ {file = "safetensors-0.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ee25d311493fbbe0be9d395faee46e9d79e8948f461e388ff39e59875ed9a350"},
+ {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eed8097968585cd752a1171f86fce9aa1d89a29033e5cd8bec5a502e29f6b7af"},
+ {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:880e6865cf72cb67f9ab8d04a3c4b49dd95ae92fb1583929ce65aed94e1f685f"},
+ {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91290f83daf80ce6d1a7f629b244443c200060a80f908b29d879021409e5ea94"},
+ {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3517d568486ab3508a7acc360b82d7a4a3e26b86efdf210a9ecd9d233c40708a"},
+ {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1f43a77eb38540f782999e5dc5645164fe9027d3f0194f6c9a5126168017efa"},
+ {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b684d9818aa5d63fddc65f7d0151968037d255d91adf74eba82125b41c680aaa"},
+ {file = "safetensors-0.4.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ab1f5d84185f9fefaf21413efb764e4908057b8a9a0b987ede890c353490fd70"},
+ {file = "safetensors-0.4.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2bd979642e6c3a517ef4b84ff36c2fee4015664fea05a61154fc565978347553"},
+ {file = "safetensors-0.4.2-cp38-none-win32.whl", hash = "sha256:11be6e7afed29e5a5628f0aa6214e34bc194da73f558dc69fc7d56e07037422a"},
+ {file = "safetensors-0.4.2-cp38-none-win_amd64.whl", hash = "sha256:2f7a6e5d29bd2cc340cffaa391fa437b1be9d21a2bd8b8724d2875d13a6ef2a9"},
+ {file = "safetensors-0.4.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a5a921b4fe6925f9942adff3ebae8c16e0487908c54586a5a42f35b59fd69794"},
+ {file = "safetensors-0.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b691727228c28f2d82d8a92b2bc26e7a1f129ee40b2f2a3185b5974e038ed47c"},
+ {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91ca1056decc4e981248786e87b2a202d4841ee5f99d433f1adf3d44d4bcfa0e"},
+ {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:55969fd2e6fdb38dc221b0ab380668c21b0efa12a7562db9924759faa3c51757"},
+ {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ae429bfaecc10ab5fe78c93009b3d1656c1581da560041e700eadb497dbe7a4"},
+ {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ff88f194fe4ac50b463a4a6f0c03af9ad72eb5d24ec6d6730af59522e37fedb"},
+ {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a80cb48d0a447f8dd18e61813efa7d3f8f8d52edf0f05806abc0c59b83431f57"},
+ {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b286fb7adfee70a4189898ac2342b8a67d5f493e6b21b0af89ca8eac1b967cbf"},
+ {file = "safetensors-0.4.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0ceeff9ddbab4f78738489eb6682867ae946178776f33699737b2129b5394dc1"},
+ {file = "safetensors-0.4.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a26fae748a7488cb3aac381eddfa818c42052c87b5e689fb4c6e82ed58cec209"},
+ {file = "safetensors-0.4.2-cp39-none-win32.whl", hash = "sha256:039a42ab33c9d68b39706fd38f1922ace26866eff246bf20271edb619f5f848b"},
+ {file = "safetensors-0.4.2-cp39-none-win_amd64.whl", hash = "sha256:b3a3e1f5b85859e398773f064943b62a4059f225008a2a8ee6add1edcf77cacf"},
+ {file = "safetensors-0.4.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:4e70d442ad17e8b153ef9095bf48ea64f15a66bf26dc2b6ca94660c154edbc24"},
+ {file = "safetensors-0.4.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b90f1d9809caf4ff395951b4703295a68d12907f6945bbc3129e934ff8ae46f6"},
+ {file = "safetensors-0.4.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c7ac9ad3728838006598e296b3ae9f27d80b489effd4685b92d97b3fc4c98f6"},
+ {file = "safetensors-0.4.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5730d77e6ff7f4c7039e20913661ad0ea2f86c09e71c039e73dfdd1f394f08"},
+ {file = "safetensors-0.4.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:44feb8cb156d6803dcd19fc6b81b27235f29b877660605a6ac35e1da7d64f0e4"},
+ {file = "safetensors-0.4.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:523a241c33e7c827ab9a3a23760d75c7d062f43dfe55b6b019409f89b0fb52d1"},
+ {file = "safetensors-0.4.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fb18300e8eb74291225214f26c9a8ae2110fd61a6c9b5a2ff4c4e0eb1bb9a998"},
+ {file = "safetensors-0.4.2-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fe5437ff9fb116e44f2ab558981249ae63f978392b4576e62fcfe167d353edbc"},
+ {file = "safetensors-0.4.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9304a0934ced5a5d272f39de36291dc141dfc152d277f03fb4d65f2fb2ffa7c"},
+ {file = "safetensors-0.4.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:160ba1b1e11cf874602c233ab80a14f588571d09556cbc3586900121d622b5ed"},
+ {file = "safetensors-0.4.2-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04fcd6fcf7d9c13c7e5dc7e08de5e492ee4daa8f4ad74b4d8299d3eb0224292f"},
+ {file = "safetensors-0.4.2-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:906d14c4a677d35834fb0f3a5455ef8305e1bba10a5e0f2e0f357b3d1ad989f2"},
+ {file = "safetensors-0.4.2-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:df3fcdec0cd543084610d1f09c65cdb10fb3079f79bceddc092b0d187c6a265b"},
+ {file = "safetensors-0.4.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5ca76f13fb1cef242ea3ad2cb37388e7d005994f42af8b44bee56ba48b2d45ce"},
+ {file = "safetensors-0.4.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:278a1a3414c020785decdcd741c578725721274d2f9f787fcc930882e83b89cc"},
+ {file = "safetensors-0.4.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b5a461cc68ecd42d9d546e5e1268a39d8ede7934a68d1ce17c3c659cb829d6"},
+ {file = "safetensors-0.4.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2341411412a41671d25e26bed59ec121e46bf4fadb8132895e610411c4b9681"},
+ {file = "safetensors-0.4.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3497ac3895acf17c5f98197f1fa4769f09c5e7ede07fcb102f1c201e663e052c"},
+ {file = "safetensors-0.4.2-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:01b5e71d3754d2201294f1eb7a6d59cce3a5702ff96d83d226571b2ca2183837"},
+ {file = "safetensors-0.4.2-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3627dbd1ea488dd8046a0491de5087f3c0d641e7acc80c0189a33c69398f1cd1"},
+ {file = "safetensors-0.4.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9d56f0ef53afad26ec54ceede78a43e9a23a076dadbbda7b44d304c591abf4c1"},
+ {file = "safetensors-0.4.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b259ca73d42daf658a1bda463f1f83885ae4d93a60869be80d7f7dfcc9d8bbb5"},
+ {file = "safetensors-0.4.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ebc3cd401e4eb54e7c0a70346be565e81942d9a41fafd5f4bf7ab3a55d10378"},
+ {file = "safetensors-0.4.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5bc384a0309b706aa0425c93abb0390508a61bf029ce99c7d9df4220f25871a5"},
+ {file = "safetensors-0.4.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:af2d8f7235d8a08fbccfb8394387890e7fa38942b349a94e6eff13c52ac98087"},
+ {file = "safetensors-0.4.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0911315bbcc5289087d063c2c2c7ccd711ea97a7e557a7bce005ac2cf80146aa"},
+ {file = "safetensors-0.4.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1efe31673be91832d73439a2af426743e1395fc9ef7b081914e9e1d567bd7b5f"},
+ {file = "safetensors-0.4.2.tar.gz", hash = "sha256:acc85dcb09ec5e8aa787f588d7ad4d55c103f31e4ff060e17d92cc0e8b8cac73"},
+]
+
+[package.extras]
+all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"]
+dev = ["safetensors[all]"]
+jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"]
+mlx = ["mlx (>=0.0.9)"]
+numpy = ["numpy (>=1.21.6)"]
+paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"]
+pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"]
+quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"]
+tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"]
+testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools_rust (>=1.5.2)"]
+torch = ["safetensors[numpy]", "torch (>=1.10)"]
+
+[[package]]
+name = "sentencepiece"
+version = "0.1.99"
+description = "SentencePiece python wrapper"
+optional = false
+python-versions = "*"
+files = [
+ {file = "sentencepiece-0.1.99-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0eb528e70571b7c02723e5804322469b82fe7ea418c96051d0286c0fa028db73"},
+ {file = "sentencepiece-0.1.99-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77d7fafb2c4e4659cbdf303929503f37a26eabc4ff31d3a79bf1c5a1b338caa7"},
+ {file = "sentencepiece-0.1.99-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be9cf5b9e404c245aeb3d3723c737ba7a8f5d4ba262ef233a431fa6c45f732a0"},
+ {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baed1a26464998f9710d20e52607c29ffd4293e7c71c6a1f83f51ad0911ec12c"},
+ {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9832f08bb372d4c8b567612f8eab9e36e268dff645f1c28f9f8e851be705f6d1"},
+ {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:019e7535108e309dae2b253a75834fc3128240aa87c00eb80732078cdc182588"},
+ {file = "sentencepiece-0.1.99-cp310-cp310-win32.whl", hash = "sha256:fa16a830416bb823fa2a52cbdd474d1f7f3bba527fd2304fb4b140dad31bb9bc"},
+ {file = "sentencepiece-0.1.99-cp310-cp310-win_amd64.whl", hash = "sha256:14b0eccb7b641d4591c3e12ae44cab537d68352e4d3b6424944f0c447d2348d5"},
+ {file = "sentencepiece-0.1.99-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6d3c56f24183a1e8bd61043ff2c58dfecdc68a5dd8955dc13bab83afd5f76b81"},
+ {file = "sentencepiece-0.1.99-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ed6ea1819fd612c989999e44a51bf556d0ef6abfb553080b9be3d347e18bcfb7"},
+ {file = "sentencepiece-0.1.99-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2a0260cd1fb7bd8b4d4f39dc2444a8d5fd4e0a0c4d5c899810ef1abf99b2d45"},
+ {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a1abff4d1ff81c77cac3cc6fefa34fa4b8b371e5ee51cb7e8d1ebc996d05983"},
+ {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:004e6a621d4bc88978eecb6ea7959264239a17b70f2cbc348033d8195c9808ec"},
+ {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db361e03342c41680afae5807590bc88aa0e17cfd1a42696a160e4005fcda03b"},
+ {file = "sentencepiece-0.1.99-cp311-cp311-win32.whl", hash = "sha256:2d95e19168875b70df62916eb55428a0cbcb834ac51d5a7e664eda74def9e1e0"},
+ {file = "sentencepiece-0.1.99-cp311-cp311-win_amd64.whl", hash = "sha256:f90d73a6f81248a909f55d8e6ef56fec32d559e1e9af045f0b0322637cb8e5c7"},
+ {file = "sentencepiece-0.1.99-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:62e24c81e74bd87a6e0d63c51beb6527e4c0add67e1a17bac18bcd2076afcfeb"},
+ {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57efcc2d51caff20d9573567d9fd3f854d9efe613ed58a439c78c9f93101384a"},
+ {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a904c46197993bd1e95b93a6e373dca2f170379d64441041e2e628ad4afb16f"},
+ {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d89adf59854741c0d465f0e1525b388c0d174f611cc04af54153c5c4f36088c4"},
+ {file = "sentencepiece-0.1.99-cp36-cp36m-win32.whl", hash = "sha256:47c378146928690d1bc106fdf0da768cebd03b65dd8405aa3dd88f9c81e35dba"},
+ {file = "sentencepiece-0.1.99-cp36-cp36m-win_amd64.whl", hash = "sha256:9ba142e7a90dd6d823c44f9870abdad45e6c63958eb60fe44cca6828d3b69da2"},
+ {file = "sentencepiece-0.1.99-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b7b1a9ae4d7c6f1f867e63370cca25cc17b6f4886729595b885ee07a58d3cec3"},
+ {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0f644c9d4d35c096a538507b2163e6191512460035bf51358794a78515b74f7"},
+ {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8843d23a0f686d85e569bd6dcd0dd0e0cbc03731e63497ca6d5bacd18df8b85"},
+ {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e6f690a1caebb4867a2e367afa1918ad35be257ecdb3455d2bbd787936f155"},
+ {file = "sentencepiece-0.1.99-cp37-cp37m-win32.whl", hash = "sha256:8a321866c2f85da7beac74a824b4ad6ddc2a4c9bccd9382529506d48f744a12c"},
+ {file = "sentencepiece-0.1.99-cp37-cp37m-win_amd64.whl", hash = "sha256:c42f753bcfb7661c122a15b20be7f684b61fc8592c89c870adf52382ea72262d"},
+ {file = "sentencepiece-0.1.99-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:85b476406da69c70586f0bb682fcca4c9b40e5059814f2db92303ea4585c650c"},
+ {file = "sentencepiece-0.1.99-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cfbcfe13c69d3f87b7fcd5da168df7290a6d006329be71f90ba4f56bc77f8561"},
+ {file = "sentencepiece-0.1.99-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:445b0ec381af1cd4eef95243e7180c63d9c384443c16c4c47a28196bd1cda937"},
+ {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6890ea0f2b4703f62d0bf27932e35808b1f679bdb05c7eeb3812b935ba02001"},
+ {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb71af492b0eefbf9f2501bec97bcd043b6812ab000d119eaf4bd33f9e283d03"},
+ {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b866b5bd3ddd54166bbcbf5c8d7dd2e0b397fac8537991c7f544220b1f67bc"},
+ {file = "sentencepiece-0.1.99-cp38-cp38-win32.whl", hash = "sha256:b133e8a499eac49c581c3c76e9bdd08c338cc1939e441fee6f92c0ccb5f1f8be"},
+ {file = "sentencepiece-0.1.99-cp38-cp38-win_amd64.whl", hash = "sha256:0eaf3591dd0690a87f44f4df129cf8d05d8a4029b5b6709b489b8e27f9a9bcff"},
+ {file = "sentencepiece-0.1.99-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38efeda9bbfb55052d482a009c6a37e52f42ebffcea9d3a98a61de7aee356a28"},
+ {file = "sentencepiece-0.1.99-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6c030b081dc1e1bcc9fadc314b19b740715d3d566ad73a482da20d7d46fd444c"},
+ {file = "sentencepiece-0.1.99-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:84dbe53e02e4f8a2e45d2ac3e430d5c83182142658e25edd76539b7648928727"},
+ {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b0f55d0a0ee1719b4b04221fe0c9f0c3461dc3dabd77a035fa2f4788eb3ef9a"},
+ {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e800f206cd235dc27dc749299e05853a4e4332e8d3dfd81bf13d0e5b9007d9"},
+ {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ae1c40cda8f9d5b0423cfa98542735c0235e7597d79caf318855cdf971b2280"},
+ {file = "sentencepiece-0.1.99-cp39-cp39-win32.whl", hash = "sha256:c84ce33af12ca222d14a1cdd37bd76a69401e32bc68fe61c67ef6b59402f4ab8"},
+ {file = "sentencepiece-0.1.99-cp39-cp39-win_amd64.whl", hash = "sha256:350e5c74d739973f1c9643edb80f7cc904dc948578bcb1d43c6f2b173e5d18dd"},
+ {file = "sentencepiece-0.1.99.tar.gz", hash = "sha256:189c48f5cb2949288f97ccdb97f0473098d9c3dcf5a3d99d4eabe719ec27297f"},
+]
+
+[[package]]
+name = "sympy"
+version = "1.12"
+description = "Computer algebra system (CAS) in Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
+ {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"},
+]
+
+[package.dependencies]
+mpmath = ">=0.19"
+
+[[package]]
+name = "tokenizers"
+version = "0.15.2"
+description = ""
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "tokenizers-0.15.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:52f6130c9cbf70544287575a985bf44ae1bda2da7e8c24e97716080593638012"},
+ {file = "tokenizers-0.15.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:054c1cc9c6d68f7ffa4e810b3d5131e0ba511b6e4be34157aa08ee54c2f8d9ee"},
+ {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a9b9b070fdad06e347563b88c278995735292ded1132f8657084989a4c84a6d5"},
+ {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea621a7eef4b70e1f7a4e84dd989ae3f0eeb50fc8690254eacc08acb623e82f1"},
+ {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cf7fd9a5141634fa3aa8d6b7be362e6ae1b4cda60da81388fa533e0b552c98fd"},
+ {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44f2a832cd0825295f7179eaf173381dc45230f9227ec4b44378322d900447c9"},
+ {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8b9ec69247a23747669ec4b0ca10f8e3dfb3545d550258129bd62291aabe8605"},
+ {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40b6a4c78da863ff26dbd5ad9a8ecc33d8a8d97b535172601cf00aee9d7ce9ce"},
+ {file = "tokenizers-0.15.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5ab2a4d21dcf76af60e05af8063138849eb1d6553a0d059f6534357bce8ba364"},
+ {file = "tokenizers-0.15.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a47acfac7e511f6bbfcf2d3fb8c26979c780a91e06fb5b9a43831b2c0153d024"},
+ {file = "tokenizers-0.15.2-cp310-none-win32.whl", hash = "sha256:064ff87bb6acdbd693666de9a4b692add41308a2c0ec0770d6385737117215f2"},
+ {file = "tokenizers-0.15.2-cp310-none-win_amd64.whl", hash = "sha256:3b919afe4df7eb6ac7cafd2bd14fb507d3f408db7a68c43117f579c984a73843"},
+ {file = "tokenizers-0.15.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:89cd1cb93e4b12ff39bb2d626ad77e35209de9309a71e4d3d4672667b4b256e7"},
+ {file = "tokenizers-0.15.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cfed5c64e5be23d7ee0f0e98081a25c2a46b0b77ce99a4f0605b1ec43dd481fa"},
+ {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a907d76dcfda37023ba203ab4ceeb21bc5683436ebefbd895a0841fd52f6f6f2"},
+ {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20ea60479de6fc7b8ae756b4b097572372d7e4032e2521c1bbf3d90c90a99ff0"},
+ {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:48e2b9335be2bc0171df9281385c2ed06a15f5cf121c44094338306ab7b33f2c"},
+ {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:112a1dd436d2cc06e6ffdc0b06d55ac019a35a63afd26475205cb4b1bf0bfbff"},
+ {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4620cca5c2817177ee8706f860364cc3a8845bc1e291aaf661fb899e5d1c45b0"},
+ {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccd73a82751c523b3fc31ff8194702e4af4db21dc20e55b30ecc2079c5d43cb7"},
+ {file = "tokenizers-0.15.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:107089f135b4ae7817affe6264f8c7a5c5b4fd9a90f9439ed495f54fcea56fb4"},
+ {file = "tokenizers-0.15.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0ff110ecc57b7aa4a594396525a3451ad70988e517237fe91c540997c4e50e29"},
+ {file = "tokenizers-0.15.2-cp311-none-win32.whl", hash = "sha256:6d76f00f5c32da36c61f41c58346a4fa7f0a61be02f4301fd30ad59834977cc3"},
+ {file = "tokenizers-0.15.2-cp311-none-win_amd64.whl", hash = "sha256:cc90102ed17271cf0a1262babe5939e0134b3890345d11a19c3145184b706055"},
+ {file = "tokenizers-0.15.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f86593c18d2e6248e72fb91c77d413a815153b8ea4e31f7cd443bdf28e467670"},
+ {file = "tokenizers-0.15.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0774bccc6608eca23eb9d620196687c8b2360624619623cf4ba9dc9bd53e8b51"},
+ {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d0222c5b7c9b26c0b4822a82f6a7011de0a9d3060e1da176f66274b70f846b98"},
+ {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3835738be1de66624fff2f4f6f6684775da4e9c00bde053be7564cbf3545cc66"},
+ {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0143e7d9dcd811855c1ce1ab9bf5d96d29bf5e528fd6c7824d0465741e8c10fd"},
+ {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db35825f6d54215f6b6009a7ff3eedee0848c99a6271c870d2826fbbedf31a38"},
+ {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f5e64b0389a2be47091d8cc53c87859783b837ea1a06edd9d8e04004df55a5c"},
+ {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e0480c452217edd35eca56fafe2029fb4d368b7c0475f8dfa3c5c9c400a7456"},
+ {file = "tokenizers-0.15.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a33ab881c8fe70474980577e033d0bc9a27b7ab8272896e500708b212995d834"},
+ {file = "tokenizers-0.15.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a308a607ca9de2c64c1b9ba79ec9a403969715a1b8ba5f998a676826f1a7039d"},
+ {file = "tokenizers-0.15.2-cp312-none-win32.whl", hash = "sha256:b8fcfa81bcb9447df582c5bc96a031e6df4da2a774b8080d4f02c0c16b42be0b"},
+ {file = "tokenizers-0.15.2-cp312-none-win_amd64.whl", hash = "sha256:38d7ab43c6825abfc0b661d95f39c7f8af2449364f01d331f3b51c94dcff7221"},
+ {file = "tokenizers-0.15.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:38bfb0204ff3246ca4d5e726e8cc8403bfc931090151e6eede54d0e0cf162ef0"},
+ {file = "tokenizers-0.15.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c861d35e8286a53e06e9e28d030b5a05bcbf5ac9d7229e561e53c352a85b1fc"},
+ {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:936bf3842db5b2048eaa53dade907b1160f318e7c90c74bfab86f1e47720bdd6"},
+ {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:620beacc3373277700d0e27718aa8b25f7b383eb8001fba94ee00aeea1459d89"},
+ {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2735ecbbf37e52db4ea970e539fd2d450d213517b77745114f92867f3fc246eb"},
+ {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:473c83c5e2359bb81b0b6fde870b41b2764fcdd36d997485e07e72cc3a62264a"},
+ {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:968fa1fb3c27398b28a4eca1cbd1e19355c4d3a6007f7398d48826bbe3a0f728"},
+ {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:865c60ae6eaebdde7da66191ee9b7db52e542ed8ee9d2c653b6d190a9351b980"},
+ {file = "tokenizers-0.15.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7c0d8b52664ab2d4a8d6686eb5effc68b78608a9008f086a122a7b2996befbab"},
+ {file = "tokenizers-0.15.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:f33dfbdec3784093a9aebb3680d1f91336c56d86cc70ddf88708251da1fe9064"},
+ {file = "tokenizers-0.15.2-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:d44ba80988ff9424e33e0a49445072ac7029d8c0e1601ad25a0ca5f41ed0c1d6"},
+ {file = "tokenizers-0.15.2-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:dce74266919b892f82b1b86025a613956ea0ea62a4843d4c4237be2c5498ed3a"},
+ {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0ef06b9707baeb98b316577acb04f4852239d856b93e9ec3a299622f6084e4be"},
+ {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c73e2e74bbb07910da0d37c326869f34113137b23eadad3fc00856e6b3d9930c"},
+ {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4eeb12daf02a59e29f578a865f55d87cd103ce62bd8a3a5874f8fdeaa82e336b"},
+ {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9ba9f6895af58487ca4f54e8a664a322f16c26bbb442effd01087eba391a719e"},
+ {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ccec77aa7150e38eec6878a493bf8c263ff1fa8a62404e16c6203c64c1f16a26"},
+ {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3f40604f5042ff210ba82743dda2b6aa3e55aa12df4e9f2378ee01a17e2855e"},
+ {file = "tokenizers-0.15.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5645938a42d78c4885086767c70923abad047163d809c16da75d6b290cb30bbe"},
+ {file = "tokenizers-0.15.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:05a77cbfebe28a61ab5c3891f9939cc24798b63fa236d84e5f29f3a85a200c00"},
+ {file = "tokenizers-0.15.2-cp37-none-win32.whl", hash = "sha256:361abdc068e8afe9c5b818769a48624687fb6aaed49636ee39bec4e95e1a215b"},
+ {file = "tokenizers-0.15.2-cp37-none-win_amd64.whl", hash = "sha256:7ef789f83eb0f9baeb4d09a86cd639c0a5518528f9992f38b28e819df397eb06"},
+ {file = "tokenizers-0.15.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:4fe1f74a902bee74a3b25aff180fbfbf4f8b444ab37c4d496af7afd13a784ed2"},
+ {file = "tokenizers-0.15.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c4b89038a684f40a6b15d6b09f49650ac64d951ad0f2a3ea9169687bbf2a8ba"},
+ {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d05a1b06f986d41aed5f2de464c003004b2df8aaf66f2b7628254bcbfb72a438"},
+ {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:508711a108684111ec8af89d3a9e9e08755247eda27d0ba5e3c50e9da1600f6d"},
+ {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:daa348f02d15160cb35439098ac96e3a53bacf35885072611cd9e5be7d333daa"},
+ {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:494fdbe5932d3416de2a85fc2470b797e6f3226c12845cadf054dd906afd0442"},
+ {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2d60f5246f4da9373f75ff18d64c69cbf60c3bca597290cea01059c336d2470"},
+ {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93268e788825f52de4c7bdcb6ebc1fcd4a5442c02e730faa9b6b08f23ead0e24"},
+ {file = "tokenizers-0.15.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6fc7083ab404019fc9acafe78662c192673c1e696bd598d16dc005bd663a5cf9"},
+ {file = "tokenizers-0.15.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:41e39b41e5531d6b2122a77532dbea60e171ef87a3820b5a3888daa847df4153"},
+ {file = "tokenizers-0.15.2-cp38-none-win32.whl", hash = "sha256:06cd0487b1cbfabefb2cc52fbd6b1f8d4c37799bd6c6e1641281adaa6b2504a7"},
+ {file = "tokenizers-0.15.2-cp38-none-win_amd64.whl", hash = "sha256:5179c271aa5de9c71712e31cb5a79e436ecd0d7532a408fa42a8dbfa4bc23fd9"},
+ {file = "tokenizers-0.15.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:82f8652a74cc107052328b87ea8b34291c0f55b96d8fb261b3880216a9f9e48e"},
+ {file = "tokenizers-0.15.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:02458bee6f5f3139f1ebbb6d042b283af712c0981f5bc50edf771d6b762d5e4f"},
+ {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c9a09cd26cca2e1c349f91aa665309ddb48d71636370749414fbf67bc83c5343"},
+ {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:158be8ea8554e5ed69acc1ce3fbb23a06060bd4bbb09029431ad6b9a466a7121"},
+ {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ddba9a2b0c8c81633eca0bb2e1aa5b3a15362b1277f1ae64176d0f6eba78ab1"},
+ {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ef5dd1d39797044642dbe53eb2bc56435308432e9c7907728da74c69ee2adca"},
+ {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:454c203164e07a860dbeb3b1f4a733be52b0edbb4dd2e5bd75023ffa8b49403a"},
+ {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cf6b7f1d4dc59af960e6ffdc4faffe6460bbfa8dce27a58bf75755ffdb2526d"},
+ {file = "tokenizers-0.15.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2ef09bbc16519f6c25d0c7fc0c6a33a6f62923e263c9d7cca4e58b8c61572afb"},
+ {file = "tokenizers-0.15.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c9a2ebdd2ad4ec7a68e7615086e633857c85e2f18025bd05d2a4399e6c5f7169"},
+ {file = "tokenizers-0.15.2-cp39-none-win32.whl", hash = "sha256:918fbb0eab96fe08e72a8c2b5461e9cce95585d82a58688e7f01c2bd546c79d0"},
+ {file = "tokenizers-0.15.2-cp39-none-win_amd64.whl", hash = "sha256:524e60da0135e106b254bd71f0659be9f89d83f006ea9093ce4d1fab498c6d0d"},
+ {file = "tokenizers-0.15.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6a9b648a58281c4672212fab04e60648fde574877d0139cd4b4f93fe28ca8944"},
+ {file = "tokenizers-0.15.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7c7d18b733be6bbca8a55084027f7be428c947ddf871c500ee603e375013ffba"},
+ {file = "tokenizers-0.15.2-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:13ca3611de8d9ddfbc4dc39ef54ab1d2d4aaa114ac8727dfdc6a6ec4be017378"},
+ {file = "tokenizers-0.15.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:237d1bf3361cf2e6463e6c140628e6406766e8b27274f5fcc62c747ae3c6f094"},
+ {file = "tokenizers-0.15.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67a0fe1e49e60c664915e9fb6b0cb19bac082ab1f309188230e4b2920230edb3"},
+ {file = "tokenizers-0.15.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4e022fe65e99230b8fd89ebdfea138c24421f91c1a4f4781a8f5016fd5cdfb4d"},
+ {file = "tokenizers-0.15.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d857be2df69763362ac699f8b251a8cd3fac9d21893de129bc788f8baaef2693"},
+ {file = "tokenizers-0.15.2-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:708bb3e4283177236309e698da5fcd0879ce8fd37457d7c266d16b550bcbbd18"},
+ {file = "tokenizers-0.15.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:64c35e09e9899b72a76e762f9854e8750213f67567787d45f37ce06daf57ca78"},
+ {file = "tokenizers-0.15.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1257f4394be0d3b00de8c9e840ca5601d0a4a8438361ce9c2b05c7d25f6057b"},
+ {file = "tokenizers-0.15.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02272fe48280e0293a04245ca5d919b2c94a48b408b55e858feae9618138aeda"},
+ {file = "tokenizers-0.15.2-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dc3ad9ebc76eabe8b1d7c04d38be884b8f9d60c0cdc09b0aa4e3bcf746de0388"},
+ {file = "tokenizers-0.15.2-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:32e16bdeffa7c4f46bf2152172ca511808b952701d13e7c18833c0b73cb5c23f"},
+ {file = "tokenizers-0.15.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fb16ba563d59003028b678d2361a27f7e4ae0ab29c7a80690efa20d829c81fdb"},
+ {file = "tokenizers-0.15.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:2277c36d2d6cdb7876c274547921a42425b6810d38354327dd65a8009acf870c"},
+ {file = "tokenizers-0.15.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1cf75d32e8d250781940d07f7eece253f2fe9ecdb1dc7ba6e3833fa17b82fcbc"},
+ {file = "tokenizers-0.15.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1b3b31884dc8e9b21508bb76da80ebf7308fdb947a17affce815665d5c4d028"},
+ {file = "tokenizers-0.15.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b10122d8d8e30afb43bb1fe21a3619f62c3e2574bff2699cf8af8b0b6c5dc4a3"},
+ {file = "tokenizers-0.15.2-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d88b96ff0fe8e91f6ef01ba50b0d71db5017fa4e3b1d99681cec89a85faf7bf7"},
+ {file = "tokenizers-0.15.2-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:37aaec5a52e959892870a7c47cef80c53797c0db9149d458460f4f31e2fb250e"},
+ {file = "tokenizers-0.15.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e2ea752f2b0fe96eb6e2f3adbbf4d72aaa1272079b0dfa1145507bd6a5d537e6"},
+ {file = "tokenizers-0.15.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:4b19a808d8799fda23504a5cd31d2f58e6f52f140380082b352f877017d6342b"},
+ {file = "tokenizers-0.15.2-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:64c86e5e068ac8b19204419ed8ca90f9d25db20578f5881e337d203b314f4104"},
+ {file = "tokenizers-0.15.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de19c4dc503c612847edf833c82e9f73cd79926a384af9d801dcf93f110cea4e"},
+ {file = "tokenizers-0.15.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea09acd2fe3324174063d61ad620dec3bcf042b495515f27f638270a7d466e8b"},
+ {file = "tokenizers-0.15.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cf27fd43472e07b57cf420eee1e814549203d56de00b5af8659cb99885472f1f"},
+ {file = "tokenizers-0.15.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7ca22bd897537a0080521445d91a58886c8c04084a6a19e6c78c586e0cfa92a5"},
+ {file = "tokenizers-0.15.2.tar.gz", hash = "sha256:e6e9c6e019dd5484be5beafc775ae6c925f4c69a3487040ed09b45e13df2cb91"},
+]
+
+[package.dependencies]
+huggingface_hub = ">=0.16.4,<1.0"
+
+[package.extras]
+dev = ["tokenizers[testing]"]
+docs = ["setuptools_rust", "sphinx", "sphinx_rtd_theme"]
+testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"]
+
+[[package]]
+name = "torch"
+version = "2.2.1+cpu"
+description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+ {file = "torch-2.2.1+cpu-cp310-cp310-linux_x86_64.whl", hash = "sha256:5d82422cf04797f1b2a8574b64a916070ec83eef58ad4900615ee0218d7b8b8e"},
+ {file = "torch-2.2.1+cpu-cp310-cp310-win_amd64.whl", hash = "sha256:f8914dd0f5f0e5c66fdecd9559403eea9feac82d1ea639b672fde0073c6addbd"},
+ {file = "torch-2.2.1+cpu-cp311-cp311-linux_x86_64.whl", hash = "sha256:6bc973d5632374b92b4b293817b4d2ff8c8ce1c784c748b471dba1fffcd9c333"},
+ {file = "torch-2.2.1+cpu-cp311-cp311-win_amd64.whl", hash = "sha256:abdec34b0ade8fca0520055e72c3094425ae0ef210718e9c0278121cd3608c32"},
+ {file = "torch-2.2.1+cpu-cp312-cp312-linux_x86_64.whl", hash = "sha256:d7339580135da4105c1244a8621faa076990409afeab5a7b642c3c1ee70a5622"},
+ {file = "torch-2.2.1+cpu-cp312-cp312-win_amd64.whl", hash = "sha256:039128fcb5548122465b15f679b8831c47d14f0d6c28c1f1b631f8019c104720"},
+ {file = "torch-2.2.1+cpu-cp38-cp38-linux_x86_64.whl", hash = "sha256:2b447f7bb50b393b4544b4036d587e39ab524d4353e77c197f6a2727f22b0d47"},
+ {file = "torch-2.2.1+cpu-cp38-cp38-win_amd64.whl", hash = "sha256:2ccdf3e5f71e6426ea9e34d21c3cc333b29d4f48299b981d28aeb5112b5495e1"},
+ {file = "torch-2.2.1+cpu-cp39-cp39-linux_x86_64.whl", hash = "sha256:2fb340b289760040a16a77a6d70b8a48961abba1822e6f58705c97c80befa03e"},
+ {file = "torch-2.2.1+cpu-cp39-cp39-win_amd64.whl", hash = "sha256:e03dc4654ecceeb5b03f0a6f60b342c0e0d267b3ebc61e4f672cace1df8cd930"},
+]
+
+[package.dependencies]
+filelock = "*"
+fsspec = "*"
+jinja2 = "*"
+networkx = "*"
+sympy = "*"
+typing-extensions = ">=4.8.0"
+
+[package.extras]
+opt-einsum = ["opt-einsum (>=3.3)"]
+optree = ["optree (>=0.9.1)"]
+
+[package.source]
+type = "legacy"
+url = "https://download.pytorch.org/whl/cpu"
+reference = "pytorch"
+
+[[package]]
+name = "tqdm"
+version = "4.66.2"
+description = "Fast, Extensible Progress Meter"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"},
+ {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
+notebook = ["ipywidgets (>=6)"]
+slack = ["slack-sdk"]
+telegram = ["requests"]
+
+[[package]]
+name = "transformers"
+version = "4.38.1"
+description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+ {file = "transformers-4.38.1-py3-none-any.whl", hash = "sha256:a7a9265fb060183e9d975cbbadc4d531b10281589c43f6d07563f86322728973"},
+ {file = "transformers-4.38.1.tar.gz", hash = "sha256:86dc84ccbe36123647e84cbd50fc31618c109a41e6be92514b064ab55bf1304c"},
+]
+
+[package.dependencies]
+filelock = "*"
+huggingface-hub = ">=0.19.3,<1.0"
+numpy = ">=1.17"
+packaging = ">=20.0"
+pyyaml = ">=5.1"
+regex = "!=2019.12.17"
+requests = "*"
+safetensors = ">=0.4.1"
+tokenizers = ">=0.14,<0.19"
+tqdm = ">=4.27"
+
+[package.extras]
+accelerate = ["accelerate (>=0.21.0)"]
+agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"]
+all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.19)", "torch", "torchaudio", "torchvision"]
+audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
+codecarbon = ["codecarbon (==1.2.0)"]
+deepspeed = ["accelerate (>=0.21.0)", "deepspeed (>=0.9.3)"]
+deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.21.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
+dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.19)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.14,<0.19)", "urllib3 (<2.0.0)"]
+dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.19)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+docs = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.19)", "torch", "torchaudio", "torchvision"]
+docs-specific = ["hf-doc-builder"]
+flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"]
+flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
+ftfy = ["ftfy"]
+integrations = ["optuna", "ray[tune] (>=2.7.0)", "sigopt"]
+ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"]
+modelcreation = ["cookiecutter (==1.7.3)"]
+natten = ["natten (>=0.14.6,<0.15.0)"]
+onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"]
+onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
+optuna = ["optuna"]
+quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (==0.1.5)", "urllib3 (<2.0.0)"]
+ray = ["ray[tune] (>=2.7.0)"]
+retrieval = ["datasets (!=2.5.0)", "faiss-cpu"]
+sagemaker = ["sagemaker (>=2.31.0)"]
+sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"]
+serving = ["fastapi", "pydantic", "starlette", "uvicorn"]
+sigopt = ["sigopt"]
+sklearn = ["scikit-learn"]
+speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
+testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "tensorboard", "timeout-decorator"]
+tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
+tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
+tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
+timm = ["timm"]
+tokenizers = ["tokenizers (>=0.14,<0.19)"]
+torch = ["accelerate (>=0.21.0)", "torch"]
+torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
+torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
+torchhub = ["filelock", "huggingface-hub (>=0.19.3,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.14,<0.19)", "torch", "tqdm (>=4.27)"]
+video = ["av (==9.2.0)", "decord (==0.6.0)"]
+vision = ["Pillow (>=10.0.1,<=15.0)"]
+
+[[package]]
+name = "typing-extensions"
+version = "4.9.0"
+description = "Backported and Experimental Type Hints for Python 3.8+"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"},
+ {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"},
+]
+
+[[package]]
+name = "urllib3"
+version = "2.2.1"
+description = "HTTP library with thread-safe connection pooling, file post, and more."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"},
+ {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"},
+]
+
+[package.extras]
+brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
+h2 = ["h2 (>=4,<5)"]
+socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
+zstd = ["zstandard (>=0.18.0)"]
+
+[[package]]
+name = "wcwidth"
+version = "0.2.13"
+description = "Measures the displayed width of unicode strings in a terminal"
+optional = false
+python-versions = "*"
+files = [
+ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
+ {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
+]
+
+[metadata]
+lock-version = "2.0"
+python-versions = ">=3.9"
+content-hash = "c8c4cc87637266a7b85debcbafa8887c5ad81cc8ef40e98a3f52c7c50af05c03"
--- /dev/null
+[tool.poetry]
+name = "llama-cpp-scripts"
+version = "0.0.0"
+description = "Scripts that ship with llama.cpp"
+authors = ["GGML <ggml@ggml.ai>"]
+readme = "README.md"
+homepage = "https://ggml.ai"
+repository = "https://github.com/ggerganov/llama.cpp"
+keywords = ["ggml", "gguf", "llama.cpp"]
+packages = [{ include = "__init__.py", from = "." }]
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "License :: OSI Approved :: MIT License",
+ "Operating System :: OS Independent",
+]
+
+[tool.poetry.dependencies]
+python = ">=3.9"
+numpy = "^1.25.0"
+sentencepiece = ">=0.1.98,<0.2.0"
+transformers = ">=4.35.2,<5.0.0"
+protobuf = ">=4.21.0,<5.0.0"
+gguf = { path = "./gguf-py" }
+torch = {version = "^2.2.0", source = "pytorch"}
+
+[tool.poetry.dev-dependencies]
+pytest = "^5.2"
+
+[[tool.poetry.source]]
+name = "pytorch"
+url = "https://download.pytorch.org/whl/cpu"
+priority = "explicit"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry.scripts]
+llama-convert-hf-to-gguf = "convert_hf_to_gguf:main"
+llama-convert-llama-ggml-to-gguf = "convert_llama_ggml_to_gguf:main"
+llama-convert-lora-to-ggml = "convert_lora_to_ggml:main"
+llama-convert-persimmon-to-gguf = "convert_persimmon_to_gguf:main"
+llama-convert = "convert:main"
+llama-ggml_vk_generate_shaders = "ggml_vk_generate_shaders:main"