Remove identical wte/etw logic for jais (#10203)

author Faisal Zaghloul <redacted>

Thu, 7 Nov 2024 16:46:12 +0000 (11:46 -0500)

committer GitHub <redacted>

Thu, 7 Nov 2024 16:46:12 +0000 (08:46 -0800)
author Faisal Zaghloul <redacted>
Thu, 7 Nov 2024 16:46:12 +0000 (11:46 -0500)
committer GitHub <redacted>
Thu, 7 Nov 2024 16:46:12 +0000 (08:46 -0800)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index 76ee6cef52ac055dfcafa3eab374d8871ef242c1..39afa5ef4f27227f93fb60b5f6134661facef087 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -3748,10 +3748,7 @@ class JaisModel(Model):
  
          # Embeddings scale
          self.embeddings_scale = 1.0
-        # note: For some JAIS flavors, output is tied to (same as) wte in original model
-        self.output_is_wte = False
          if 'mup_embeddings_scale' in self.hparams:
-            self.output_is_wte = True   # Hack (?)
              self.embeddings_scale = self.hparams['mup_embeddings_scale']
          elif 'embeddings_scale' in self.hparams:
              self.embeddings_scale = self.hparams['embeddings_scale']
@@ -3808,10 +3805,7 @@ class JaisModel(Model):
  
          if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
              tensors.append((new_name, data_torch * self.embeddings_scale))
-            if self.output_is_wte:
-                tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
          elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
-            assert not self.output_is_wte
              tensors.append((new_name, data_torch * self.width_scale))
          else:
              tensors.append((new_name, data_torch))
author	Faisal Zaghloul <redacted>
	Thu, 7 Nov 2024 16:46:12 +0000 (11:46 -0500)
committer	GitHub <redacted>
	Thu, 7 Nov 2024 16:46:12 +0000 (08:46 -0800)