import sys
from typing import Any, Dict, Sequence, TextIO
+import numpy as np
import torch
-from convert import DATA_TYPE_TO_FTYPE, NUMPY_TYPE_TO_DATA_TYPE, DataType
+NUMPY_TYPE_TO_FTYPE: Dict[str, int] = {"float32": 0, "float16": 1}
+
HF_SUBLAYER_TO_GGML = {
- "self_attn.q_proj": "attention.wq",
- "self_attn.k_proj": "attention.wk",
- "self_attn.v_proj": "attention.wv",
- "self_attn.o_proj": "attention.wo",
- "mlp.gate_proj": "feed_forward.w1",
- "mlp.down_proj": "feed_forward.w2",
- "mlp.up_proj": "feed_forward.w3",
- "input_layernorm": "attention_norm",
+ "self_attn.q_proj": "attn_q",
+ "self_attn.k_proj": "attn_k",
+ "self_attn.v_proj": "attn_v",
+ "self_attn.o_proj": "attn_output",
+ "mlp.gate_proj": "ffn_gate",
+ "mlp.down_proj": "ffn_down",
+ "mlp.up_proj": "ffn_up",
+ "input_layernorm": "attn_norm",
"post_attention_layernorm": "ffn_norm",
- # "norm": "norm",
- # "embed_tokens": "tok_embeddings",
- # "lm_head": "output",
}
sys.exit(1)
output_string = (
- f"layers.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}"
+ f"blk.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}"
)
return output_string
else:
# https://opendelta.readthedocs.io/en/latest/modules/deltas.html says that `lora_alpha` is an int
# but some models ship a float value instead
# let's convert to int, but fail if lossless conversion is not possible
- assert int(params["lora_alpha"]) == params["lora_alpha"], "cannot convert float to int losslessly"
+ assert (
+ int(params["lora_alpha"]) == params["lora_alpha"]
+ ), "cannot convert float to int losslessly"
fout.write(struct.pack("i", int(params["lora_alpha"])))
def write_tensor_header(
- self, name: str, shape: Sequence[int], data_type: DataType
+ self, name: str, shape: Sequence[int], data_type: np.dtype
) -> None:
sname = name.encode("utf-8")
fout.write(
"iii",
len(shape),
len(sname),
- DATA_TYPE_TO_FTYPE[NUMPY_TYPE_TO_DATA_TYPE[data_type]],
+ NUMPY_TYPE_TO_FTYPE[data_type.name],
)
)
fout.write(struct.pack("i" * len(shape), *shape[::-1]))