RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git
-COPY requirements.txt requirements.txt
+COPY requirements.txt requirements.txt
+COPY requirements requirements
RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
gfx1101 \
gfx1102
-COPY requirements.txt requirements.txt
+COPY requirements.txt requirements.txt
+COPY requirements requirements
RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git
-COPY requirements.txt requirements.txt
+COPY requirements.txt requirements.txt
+COPY requirements requirements
RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
gfx1101 \
gfx1102
-COPY requirements.txt requirements.txt
+COPY requirements.txt requirements.txt
+COPY requirements requirements
RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
--- /dev/null
+name: Python check requirements.txt
+
+on:
+ push:
+ paths:
+ - 'scripts/check-requirements.sh'
+ - 'convert*.py'
+ - 'requirements.txt'
+ - 'requirements/*.txt'
+ pull_request:
+ paths:
+ - 'scripts/check-requirements.sh'
+ - 'convert*.py'
+ - 'requirements.txt'
+ - 'requirements/*.txt'
+
+jobs:
+ python-check-requirements:
+ runs-on: ubuntu-latest
+ name: check-requirements
+ steps:
+ - name: Check out source repository
+ uses: actions/checkout@v3
+ - name: Set up Python environment
+ uses: actions/setup-python@v4
+ with:
+ python-version: "3.11"
+ - name: Run check-requirements.sh script
+ run: bash scripts/check-requirements.sh nocleanup
tokens: list[bytearray] = []
toktypes: list[int] = []
- from transformers import AutoTokenizer # type: ignore[attr-defined]
+ from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(dir_model)
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
assert max(tokenizer.vocab.values()) < vocab_size
hparams = self.hparams
block_count = hparams["num_hidden_layers"]
- self.gguf_writer.add_name(dir_model.name)
+ self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
self.gguf_writer.add_block_count(block_count)
tokens: list[bytearray] = []
toktypes: list[int] = []
- from transformers import AutoTokenizer # type: ignore[attr-defined]
+ from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
vocab_size = hparams["vocab_size"]
assert max(tokenizer.get_vocab().values()) < vocab_size
return parser.parse_args()
-args = parse_args()
+def main() -> None:
+ args = parse_args()
-dir_model = args.model
+ dir_model = args.model
-if args.awq_path:
- sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
- from awq.apply_awq import add_scale_weights
- tmp_model_path = args.model / "weighted_model"
- dir_model = tmp_model_path
- if tmp_model_path.is_dir():
- print(f"{tmp_model_path} exists as a weighted model.")
+ if args.awq_path:
+ sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
+ from awq.apply_awq import add_scale_weights
+ tmp_model_path = args.model / "weighted_model"
+ dir_model = tmp_model_path
+ if tmp_model_path.is_dir():
+ print(f"{tmp_model_path} exists as a weighted model.")
+ else:
+ tmp_model_path.mkdir(parents=True, exist_ok=True)
+ print("Saving new weighted model ...")
+ add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
+ print(f"Saved weighted model at {tmp_model_path}.")
+
+ if not dir_model.is_dir():
+ print(f'Error: {args.model} is not a directory', file=sys.stderr)
+ sys.exit(1)
+
+ ftype_map = {
+ "f32": gguf.GGMLQuantizationType.F32,
+ "f16": gguf.GGMLQuantizationType.F16,
+ }
+
+ if args.outfile is not None:
+ fname_out = args.outfile
else:
- tmp_model_path.mkdir(parents=True, exist_ok=True)
- print("Saving new weighted model ...")
- add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
- print(f"Saved weighted model at {tmp_model_path}.")
-
-if not dir_model.is_dir():
- print(f'Error: {args.model} is not a directory', file=sys.stderr)
- sys.exit(1)
+ # output in the same directory as the model by default
+ fname_out = dir_model / f'ggml-model-{args.outtype}.gguf'
-ftype_map = {
- "f32": gguf.GGMLQuantizationType.F32,
- "f16": gguf.GGMLQuantizationType.F16,
-}
+ print(f"Loading model: {dir_model.name}")
-if args.outfile is not None:
- fname_out = args.outfile
-else:
- # output in the same directory as the model by default
- fname_out = dir_model / f'ggml-model-{args.outtype}.gguf'
+ hparams = Model.load_hparams(dir_model)
-print(f"Loading model: {dir_model.name}")
+ with torch.inference_mode():
+ model_class = Model.from_model_architecture(hparams["architectures"][0])
+ model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian)
-hparams = Model.load_hparams(dir_model)
+ print("Set model parameters")
+ model_instance.set_gguf_parameters()
-with torch.inference_mode():
- model_class = Model.from_model_architecture(hparams["architectures"][0])
- model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian)
+ print("Set model tokenizer")
+ model_instance.set_vocab()
- print("Set model parameters")
- model_instance.set_gguf_parameters()
+ if args.vocab_only:
+ print(f"Exporting model vocab to '{fname_out}'")
+ model_instance.write_vocab()
+ else:
+ print(f"Exporting model to '{fname_out}'")
+ model_instance.write()
- print("Set model tokenizer")
- model_instance.set_vocab()
+ print(f"Model successfully exported to '{fname_out}'")
- if args.vocab_only:
- print(f"Exporting model vocab to '{fname_out}'")
- model_instance.write_vocab()
- else:
- print(f"Exporting model to '{fname_out}'")
- model_instance.write()
- print(f"Model successfully exported to '{fname_out}'")
+if __name__ == '__main__':
+ main()
fout.seek((fout.tell() + 31) & -32)
-if len(sys.argv) < 2:
- print(f"Usage: python {sys.argv[0]} <path> [arch]")
- print(
- "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
- )
- print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
- sys.exit(1)
-
-input_json = os.path.join(sys.argv[1], "adapter_config.json")
-input_model = os.path.join(sys.argv[1], "adapter_model.bin")
-output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin")
-
-model = torch.load(input_model, map_location="cpu")
-arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
-
-if arch_name not in gguf.MODEL_ARCH_NAMES.values():
- print(f"Error: unsupported architecture {arch_name}")
- sys.exit(1)
-
-arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
-name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
-
-with open(input_json, "r") as f:
- params = json.load(f)
-
-if params["peft_type"] != "LORA":
- print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
- sys.exit(1)
-
-if params["fan_in_fan_out"] is True:
- print("Error: param fan_in_fan_out is not supported")
- sys.exit(1)
-
-if params["bias"] is not None and params["bias"] != "none":
- print("Error: param bias is not supported")
- sys.exit(1)
-
-# TODO: these seem to be layers that have been trained but without lora.
-# doesn't seem widely used but eventually should be supported
-if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
- print("Error: param modules_to_save is not supported")
- sys.exit(1)
-
-with open(output_path, "wb") as fout:
- fout.truncate()
-
- write_file_header(fout, params)
- for k, v in model.items():
- orig_k = k
- if k.endswith(".default.weight"):
- k = k.replace(".default.weight", ".weight")
- if k in ["llama_proj.weight", "llama_proj.bias"]:
- continue
- if k.endswith("lora_A.weight"):
- if v.dtype != torch.float16 and v.dtype != torch.float32:
+if __name__ == '__main__':
+ if len(sys.argv) < 2:
+ print(f"Usage: python {sys.argv[0]} <path> [arch]")
+ print(
+ "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
+ )
+ print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
+ sys.exit(1)
+
+ input_json = os.path.join(sys.argv[1], "adapter_config.json")
+ input_model = os.path.join(sys.argv[1], "adapter_model.bin")
+ output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin")
+
+ model = torch.load(input_model, map_location="cpu")
+ arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
+
+ if arch_name not in gguf.MODEL_ARCH_NAMES.values():
+ print(f"Error: unsupported architecture {arch_name}")
+ sys.exit(1)
+
+ arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
+ name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
+
+ with open(input_json, "r") as f:
+ params = json.load(f)
+
+ if params["peft_type"] != "LORA":
+ print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
+ sys.exit(1)
+
+ if params["fan_in_fan_out"] is True:
+ print("Error: param fan_in_fan_out is not supported")
+ sys.exit(1)
+
+ if params["bias"] is not None and params["bias"] != "none":
+ print("Error: param bias is not supported")
+ sys.exit(1)
+
+ # TODO: these seem to be layers that have been trained but without lora.
+ # doesn't seem widely used but eventually should be supported
+ if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
+ print("Error: param modules_to_save is not supported")
+ sys.exit(1)
+
+ with open(output_path, "wb") as fout:
+ fout.truncate()
+
+ write_file_header(fout, params)
+ for k, v in model.items():
+ orig_k = k
+ if k.endswith(".default.weight"):
+ k = k.replace(".default.weight", ".weight")
+ if k in ["llama_proj.weight", "llama_proj.bias"]:
+ continue
+ if k.endswith("lora_A.weight"):
+ if v.dtype != torch.float16 and v.dtype != torch.float32:
+ v = v.float()
+ v = v.T
+ else:
v = v.float()
- v = v.T
- else:
- v = v.float()
-
- t = v.detach().numpy()
-
- prefix = "base_model.model."
- if k.startswith(prefix):
- k = k[len(prefix) :]
-
- lora_suffixes = (".lora_A.weight", ".lora_B.weight")
- if k.endswith(lora_suffixes):
- suffix = k[-len(lora_suffixes[0]):]
- k = k[: -len(lora_suffixes[0])]
- else:
- print(f"Error: unrecognized tensor name {orig_k}")
- sys.exit(1)
-
- tname = name_map.get_name(k)
- if tname is None:
- print(f"Error: could not map tensor name {orig_k}")
- print(" Note: the arch parameter must be specified if the model is not llama")
- sys.exit(1)
-
- if suffix == ".lora_A.weight":
- tname += ".weight.loraA"
- elif suffix == ".lora_B.weight":
- tname += ".weight.loraB"
- else:
- assert False
-
- print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
- write_tensor_header(fout, tname, t.shape, t.dtype)
- t.tofile(fout)
-
-print(f"Converted {input_json} and {input_model} to {output_path}")
+
+ t = v.detach().numpy()
+
+ prefix = "base_model.model."
+ if k.startswith(prefix):
+ k = k[len(prefix) :]
+
+ lora_suffixes = (".lora_A.weight", ".lora_B.weight")
+ if k.endswith(lora_suffixes):
+ suffix = k[-len(lora_suffixes[0]):]
+ k = k[: -len(lora_suffixes[0])]
+ else:
+ print(f"Error: unrecognized tensor name {orig_k}")
+ sys.exit(1)
+
+ tname = name_map.get_name(k)
+ if tname is None:
+ print(f"Error: could not map tensor name {orig_k}")
+ print(" Note: the arch parameter must be specified if the model is not llama")
+ sys.exit(1)
+
+ if suffix == ".lora_A.weight":
+ tname += ".weight.loraA"
+ elif suffix == ".lora_B.weight":
+ tname += ".weight.loraB"
+ else:
+ assert False
+
+ print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
+ write_tensor_header(fout, tname, t.shape, t.dtype)
+ t.tofile(fout)
+
+ print(f"Converted {input_json} and {input_model} to {output_path}")
+#!/usr/bin/env python3
import torch
import os
from pprint import pprint
+++ /dev/null
--r requirements.txt
-torch==2.1.1
-transformers==4.35.2
-numpy==1.24.4
-sentencepiece==0.1.98
-transformers>=4.34.0
-gguf>=0.1.0
-protobuf>=4.21.0
+# These requirements include all dependencies for all top-level python scripts
+# for llama.cpp. Avoid adding packages here directly.
+#
+# Package versions must stay compatible across all top-level python scripts.
+#
+
+-r ./requirements/requirements-convert.txt
+
+-r ./requirements/requirements-convert-hf-to-gguf.txt
+-r ./requirements/requirements-convert-llama-ggml-to-gguf.txt
+-r ./requirements/requirements-convert-lora-to-ggml.txt
+-r ./requirements/requirements-convert-persimmon-to-gguf.txt
--- /dev/null
+-r ./requirements-convert.txt
+torch~=2.1.1
--- /dev/null
+-r ./requirements-convert.txt
--- /dev/null
+-r ./requirements-convert.txt
+torch~=2.1.1
--- /dev/null
+-r ./requirements-convert.txt
+torch~=2.1.1
--- /dev/null
+numpy~=1.24.4
+sentencepiece~=0.1.98
+transformers>=4.35.2,<5.0.0
+gguf>=0.1.0
+protobuf>=4.21.0,<5.0.0
--- /dev/null
+#!/bin/bash
+set -euo pipefail
+
+#
+# check-requirements.sh checks all requirements files for each top-level
+# convert*.py script.
+#
+# WARNING: This is quite IO intensive, because a fresh venv is set up for every
+# python script. As of 2023-12-22, this writes ~2.7GB of data. An adequately
+# sized tmpfs /tmp or ramdisk is recommended if running this frequently.
+#
+# usage: check-requirements.sh [<working_dir>]
+# check-requirements.sh nocleanup [<working_dir>]
+#
+# where:
+# - <working_dir> is a directory that can be used as the base for
+# setting up the venvs. Defaults to `/tmp`.
+# - 'nocleanup' as the first argument will disable automatic cleanup
+# of the files created by this script.
+#
+# requires:
+# - bash >= 3.2.57
+# - shellcheck
+#
+# For each script, it creates a fresh venv, `pip install`s the requirements, and
+# finally imports the python script to check for `ImportError`.
+#
+
+log() {
+ local level=$1 msg=$2
+ printf >&2 '%s: %s\n' "$level" "$msg"
+}
+
+debug() {
+ log DEBUG "$@"
+}
+
+info() {
+ log INFO "$@"
+}
+
+fatal() {
+ log FATAL "$@"
+ exit 1
+}
+
+cleanup() {
+ if [[ -n ${workdir+x} && -d $workdir && -w $workdir ]]; then
+ info "Removing $workdir"
+ local count=0
+ rm -rfv -- "$workdir" | while read -r; do
+ if (( count++ > 750 )); then
+ printf .
+ count=0
+ fi
+ done
+ printf '\n'
+ info "Removed $workdir"
+ fi
+}
+
+do_cleanup=1
+if [[ ${1-} == nocleanup ]]; then
+ do_cleanup=0; shift
+fi
+
+if (( do_cleanup )); then
+ trap exit INT TERM
+ trap cleanup EXIT
+fi
+
+this=$(realpath -- "$0"); readonly this
+cd "$(dirname "$this")/.." # PWD should stay in llama.cpp project directory
+
+shellcheck "$this"
+
+readonly reqs_dir=requirements
+
+if [[ ${1+x} ]]; then
+ tmp_dir=$(realpath -- "$1")
+ if [[ ! ( -d $tmp_dir && -w $tmp_dir ) ]]; then
+ fatal "$tmp_dir is not a writable directory"
+ fi
+else
+ tmp_dir=/tmp
+fi
+
+workdir=$(mktemp -d "$tmp_dir/check-requirements.XXXX"); readonly workdir
+info "Working directory: $workdir"
+
+check_requirements() {
+ local reqs=$1
+
+ info "$reqs: beginning check"
+ pip --disable-pip-version-check install -qr "$reqs"
+ info "$reqs: OK"
+}
+
+check_convert_script() {
+ local py=$1 # e.g. ./convert-hf-to-gguf.py
+ local pyname=${py##*/} # e.g. convert-hf-to-gguf.py
+ pyname=${pyname%.py} # e.g. convert-hf-to-gguf
+
+ info "$py: beginning check"
+
+ local reqs="$reqs_dir/requirements-$pyname.txt"
+ if [[ ! -r $reqs ]]; then
+ fatal "$py missing requirements. Expected: $reqs"
+ fi
+
+ local venv="$workdir/$pyname-venv"
+ python3 -m venv "$venv"
+
+ (
+ # shellcheck source=/dev/null
+ source "$venv/bin/activate"
+
+ check_requirements "$reqs"
+
+ python - "$py" "$pyname" <<'EOF'
+import sys
+from importlib.machinery import SourceFileLoader
+py, pyname = sys.argv[1:]
+SourceFileLoader(pyname, py).load_module()
+EOF
+ )
+
+ if (( do_cleanup )); then
+ rm -rf -- "$venv"
+ fi
+
+ info "$py: imports OK"
+}
+
+readonly ignore_eq_eq='check_requirements: ignore "=="'
+
+for req in "$reqs_dir"/*; do
+ # Check that all sub-requirements are added to top-level requirements.txt
+ if ! grep -qF "$req" requirements.txt; then
+ fatal "$req needs to be added to requirements.txt"
+ fi
+
+ # Make sure exact release versions aren't being pinned in the requirements
+ # Filters out the ignore string
+ if grep -vF "$ignore_eq_eq" "$req" | grep -q '=='; then
+ tab=$'\t'
+ cat >&2 <<EOF
+FATAL: Avoid pinning exact package versions. Use '~=' instead.
+You can suppress this error by appending the following to the line:
+$tab# $ignore_eq_eq
+EOF
+ exit 1
+ fi
+done
+
+all_venv="$workdir/all-venv"
+python3 -m venv "$all_venv"
+
+(
+ # shellcheck source=/dev/null
+ source "$all_venv/bin/activate"
+ check_requirements requirements.txt
+)
+
+if (( do_cleanup )); then
+ rm -rf -- "$all_venv"
+fi
+
+check_convert_script convert.py
+for py in convert-*.py; do
+ check_convert_script "$py"
+done
+
+info 'Done! No issues found.'