typo correction (#8891)

author Nexes the Old <redacted>

Tue, 6 Aug 2024 23:41:54 +0000 (01:41 +0200)

committer GitHub <redacted>

Tue, 6 Aug 2024 23:41:54 +0000 (01:41 +0200)
author Nexes the Old <redacted>
Tue, 6 Aug 2024 23:41:54 +0000 (01:41 +0200)
committer GitHub <redacted>
Tue, 6 Aug 2024 23:41:54 +0000 (01:41 +0200)
diff --git a/include/llama.h b/include/llama.h

index f23355a6bc9593e53d5978ff81b381182dcccd61..66c266298e86f0204f6d466368abd8d86465a99c 100644 (file)
--- a/include/llama.h
+++ b/include/llama.h
@@ -345,7 +345,7 @@ extern "C" {
          int32_t nthread;                     // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
          enum llama_ftype ftype;              // quantize to this llama_ftype
          enum ggml_type output_tensor_type;   // output tensor type
-        enum ggml_type token_embedding_type; // itoken embeddings tensor type
+        enum ggml_type token_embedding_type; // token embeddings tensor type
          bool allow_requantize;               // allow quantizing non-f32/f16 tensors
          bool quantize_output_tensor;         // quantize output.weight
          bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
author	Nexes the Old <redacted>
	Tue, 6 Aug 2024 23:41:54 +0000 (01:41 +0200)
committer	GitHub <redacted>
	Tue, 6 Aug 2024 23:41:54 +0000 (01:41 +0200)