# Embeddings scale
self.embeddings_scale = 1.0
- # note: For some JAIS flavors, output is tied to (same as) wte in original model
- self.output_is_wte = False
if 'mup_embeddings_scale' in self.hparams:
- self.output_is_wte = True # Hack (?)
self.embeddings_scale = self.hparams['mup_embeddings_scale']
elif 'embeddings_scale' in self.hparams:
self.embeddings_scale = self.hparams['embeddings_scale']
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
tensors.append((new_name, data_torch * self.embeddings_scale))
- if self.output_is_wte:
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
- assert not self.output_is_wte
tensors.append((new_name, data_torch * self.width_scale))
else:
tensors.append((new_name, data_torch))