model : fix wavtokenizer embedding notions (#19479)

author Georgi Gerganov <redacted>

Wed, 11 Feb 2026 05:52:20 +0000 (07:52 +0200)

committer GitHub <redacted>

Wed, 11 Feb 2026 05:52:20 +0000 (07:52 +0200)
author Georgi Gerganov <redacted>
Wed, 11 Feb 2026 05:52:20 +0000 (07:52 +0200)
committer GitHub <redacted>
Wed, 11 Feb 2026 05:52:20 +0000 (07:52 +0200)
diff --git a/src/llama-hparams.h b/src/llama-hparams.h

index 6c695bdbf6625d3db6076995a790ec845406baa7..706eda8441bc74b54ef78eb01f88da53a21d83c3 100644 (file)
--- a/src/llama-hparams.h
+++ b/src/llama-hparams.h
@@ -42,7 +42,6 @@ struct llama_hparams {
  
      uint32_t n_ctx_train; // context size the model was trained on
      uint32_t n_embd;
-    uint32_t n_embd_features = 0;
      uint32_t n_layer;
      int32_t n_layer_kv_from_start = -1; // if non-negative, the first n_layer_kv_from_start layers have KV cache
      uint32_t n_rot;
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index 7a06e96c8796b379425a7f4329e7dab2c5ec09c1..5816e9a954cc679dffa3af2f2ec65c63811220be 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -523,7 +523,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
      ml.get_key(LLM_KV_EXPERT_GROUP_USED_COUNT, hparams.n_group_used,    false);
  
      if (arch == LLM_ARCH_WAVTOKENIZER_DEC) {
-        ml.get_key(LLM_KV_FEATURES_LENGTH, hparams.n_embd_features);
+        ml.get_key(LLM_KV_FEATURES_LENGTH,  hparams.n_embd);
+        ml.get_key(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd_out_impl);
  
          ml.get_key(LLM_KV_POSNET_EMBEDDING_LENGTH, hparams.posnet.n_embd);
          ml.get_key(LLM_KV_POSNET_BLOCK_COUNT,      hparams.posnet.n_layer);
@@ -6046,9 +6047,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                  } break;
              case LLM_ARCH_WAVTOKENIZER_DEC:
                  {
-                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {hparams.n_embd_features, n_vocab}, 0);
+                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {hparams.n_embd, n_vocab}, 0);
  
-                    conv1d   = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, hparams.n_embd_features, hparams.posnet.n_embd}, 0);
+                    conv1d   = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, hparams.n_embd, hparams.posnet.n_embd}, 0);
                      conv1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"),   {1, hparams.posnet.n_embd}, 0);
  
                      // posnet
@@ -6144,8 +6145,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                          output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"),   {n_embd}, 0);
                      }
  
-                    output   = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {hparams.convnext.n_embd, n_embd}, 0);
-                    output_b = create_tensor(tn(LLM_TENSOR_OUTPUT, "bias"),   {n_embd}, 0);
+                    output   = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {hparams.convnext.n_embd, hparams.n_embd_out()}, 0);
+                    output_b = create_tensor(tn(LLM_TENSOR_OUTPUT, "bias"),   {hparams.n_embd_out()}, 0);
                  } break;
              case LLM_ARCH_BAILINGMOE:
                  {
diff --git a/tools/tts/tts.cpp b/tools/tts/tts.cpp

index 8c39fce8baad76af7889e8b051631f8d0c1cea78..ac55a8b1ca41ad766d6f619371761e9891a0a26a 100644 (file)
--- a/tools/tts/tts.cpp
+++ b/tools/tts/tts.cpp
@@ -1036,7 +1036,7 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
  
  #if 1
      // spectral operations
-    const int n_embd = llama_model_n_embd(model_cts);
+    const int n_embd = llama_model_n_embd_out(model_cts);
      const float * embd = llama_get_embeddings(ctx_cts);
  
      auto audio = embd_to_audio(embd, n_codes, n_embd, params.cpuparams.n_threads);
author	Georgi Gerganov <redacted>
	Wed, 11 Feb 2026 05:52:20 +0000 (07:52 +0200)
committer	GitHub <redacted>
	Wed, 11 Feb 2026 05:52:20 +0000 (07:52 +0200)
src/llama-hparams.h		patch \| blob \| history
src/llama-model.cpp		patch \| blob \| history
tools/tts/tts.cpp		patch \| blob \| history