From: Faisal Zaghloul Date: Thu, 7 Nov 2024 16:46:12 +0000 (-0500) Subject: Remove identical wte/etw logic for jais (#10203) X-Git-Tag: upstream/0.0.4488~444 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=60e17ce23c2740369af6304113a2dfa0454eaf26;p=pkg%2Fggml%2Fsources%2Fllama.cpp Remove identical wte/etw logic for jais (#10203) --- diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 76ee6cef..39afa5ef 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3748,10 +3748,7 @@ class JaisModel(Model): # Embeddings scale self.embeddings_scale = 1.0 - # note: For some JAIS flavors, output is tied to (same as) wte in original model - self.output_is_wte = False if 'mup_embeddings_scale' in self.hparams: - self.output_is_wte = True # Hack (?) self.embeddings_scale = self.hparams['mup_embeddings_scale'] elif 'embeddings_scale' in self.hparams: self.embeddings_scale = self.hparams['embeddings_scale'] @@ -3808,10 +3805,7 @@ class JaisModel(Model): if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD): tensors.append((new_name, data_torch * self.embeddings_scale)) - if self.output_is_wte: - tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale)) elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT): - assert not self.output_is_wte tensors.append((new_name, data_torch * self.width_scale)) else: tensors.append((new_name, data_torch))