From: Georgi Gerganov Date: Wed, 22 May 2024 08:01:35 +0000 (+0300) Subject: cuda : fix rope + add tests (llama/7452) X-Git-Tag: upstream/1.7.4~695 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=c4d6958b3e8a2e3b1d36fa197df8a02ac8a00dac;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp cuda : fix rope + add tests (llama/7452) * cuda : fix rope pos data ggml-ci * ggml : drop mode & 1 == 1 support for ggml_rope ggml-ci * ggml : support freq_factors for f16 rope (CPU) ggml-ci * tests : add rope tests using frequency factors ggml-ci --- diff --git a/ggml-cuda/rope.cu b/ggml-cuda/rope.cu index 4a558f4b..50f2cf41 100644 --- a/ggml-cuda/rope.cu +++ b/ggml-cuda/rope.cu @@ -283,9 +283,9 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const bool is_neox = mode & 2; const bool is_glm = mode & 4; - if (is_neox) { - pos = (const int32_t *) src1_d; + pos = (const int32_t *) src1_d; + if (is_neox) { if (src2 != nullptr) { freq_factors = (const float *) src2->data; } diff --git a/ggml.c b/ggml.c index 37b16b7a..d316e3d3 100644 --- a/ggml.c +++ b/ggml.c @@ -6245,6 +6245,8 @@ static struct ggml_tensor * ggml_rope_impl( float xpos_base, bool xpos_down, bool inplace) { + GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported"); + GGML_ASSERT(ggml_is_vector(b)); GGML_ASSERT(b->type == GGML_TYPE_I32); GGML_ASSERT(a->ne[2] == b->ne[0]); @@ -14413,7 +14415,7 @@ static void ggml_compute_forward_rope_f32( freq_factors = (const float *) src2->data; } } else { - GGML_ASSERT(src2 == NULL && "TODO: freq_factors not implemented for mode 1"); + GGML_ASSERT(src2 == NULL && "TODO: freq_factors not implemented for !is_neox"); } // backward process uses inverse rotation by cos and sin. @@ -14529,6 +14531,7 @@ static void ggml_compute_forward_rope_f32( } } +// TODO: deduplicate f16/f32 code static void ggml_compute_forward_rope_f16( const struct ggml_compute_params * params, struct ggml_tensor * dst, @@ -14536,6 +14539,7 @@ static void ggml_compute_forward_rope_f16( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; + const struct ggml_tensor * src2 = dst->src[2]; if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -14588,6 +14592,17 @@ static void ggml_compute_forward_rope_f16( const bool is_neox = mode & 2; const bool is_glm = mode & 4; + const float * freq_factors = NULL; + if (is_neox) { + if (src2 != NULL) { + GGML_ASSERT(src2->type == GGML_TYPE_F32); + GGML_ASSERT(src2->ne[0] >= n_dims / 2); + freq_factors = (const float *) src2->data; + } + } else { + GGML_ASSERT(src2 == NULL && "TODO: freq_factors not implemented for !is_neox"); + } + // backward process uses inverse rotation by cos and sin. // cos and sin build a rotation matrix, where the inverse is the transpose. // this essentially just switches the sign of sin. @@ -14660,10 +14675,11 @@ static void ggml_compute_forward_rope_f16( // simplified from `(ib * n_dims + ic) * inv_ndims` float cur_rot = inv_ndims * ic - ib; + float freq_factor = freq_factors ? freq_factors[ic/2] : 1.0f; float cos_theta, sin_theta; rope_yarn( - theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor, + theta_base/freq_factor, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor, &cos_theta, &sin_theta ); sin_theta *= sin_sign; diff --git a/ggml.h b/ggml.h index 35ac9110..08835042 100644 --- a/ggml.h +++ b/ggml.h @@ -1460,7 +1460,7 @@ extern "C" { struct ggml_tensor * b); // rotary position embedding - // if mode & 1 == 1, skip n_past elements (DEPRECATED) + // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED) // if mode & 2 == 1, GPT-NeoX style // if mode & 4 == 1, ChatGLM style //