From: Georgi Gerganov Date: Fri, 14 Apr 2023 10:32:27 +0000 (+0300) Subject: ggml : avoid powf() calls in ggml_rope() X-Git-Tag: upstream/0.0.1642~1545 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=820477bacb536ce0b81c6f9d20996c5bdd4ca286;p=pkg%2Fggml%2Fsources%2Fggml ggml : avoid powf() calls in ggml_rope() --- diff --git a/src/ggml.c b/src/ggml.c index 15a37108..d99aca21 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -7509,6 +7509,8 @@ static void ggml_compute_forward_rope_f32( // row index used to determine which thread to use int ir = 0; + const float theta_scale = powf(10000.0, -2.0f/n_dims); + for (int64_t i3 = 0; i3 < ne3; i3++) { for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { const int p = (mode == 0 ? n_past + i2 : i2); @@ -7516,11 +7518,13 @@ static void ggml_compute_forward_rope_f32( if (ir++ < ir0) continue; if (ir > ir1) break; + float theta = (float)p; + for (int i0 = 0; i0 < n_dims; i0 += 2) { - const float theta = powf(10000.0, ((float)-i0)/n_dims); + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); - const float cos_theta = cosf(p*theta); - const float sin_theta = sinf(p*theta); + theta *= theta_scale; const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); @@ -7582,6 +7586,8 @@ static void ggml_compute_forward_rope_f16( // row index used to determine which thread to use int ir = 0; + const float theta_scale = powf(10000.0, -2.0f/n_dims); + for (int64_t i3 = 0; i3 < ne3; i3++) { for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { const int p = (mode == 0 ? n_past + i2 : i2); @@ -7589,11 +7595,13 @@ static void ggml_compute_forward_rope_f16( if (ir++ < ir0) continue; if (ir > ir1) break; + float theta = (float)p; + for (int i0 = 0; i0 < n_dims; i0 += 2) { - const float theta = powf(10000.0, ((float)-i0)/n_dims); + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); - const float cos_theta = cosf(p*theta); - const float sin_theta = sinf(p*theta); + theta *= theta_scale; const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);