ggml : move rope type enum to ggml.h (#8949)

author Daniel Bevenius <redacted>

Tue, 13 Aug 2024 19:13:15 +0000 (21:13 +0200)

committer GitHub <redacted>

Tue, 13 Aug 2024 19:13:15 +0000 (21:13 +0200)
author Daniel Bevenius <redacted>
Tue, 13 Aug 2024 19:13:15 +0000 (21:13 +0200)
committer GitHub <redacted>
Tue, 13 Aug 2024 19:13:15 +0000 (21:13 +0200)
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h

index 15602a96df7ad3ef4df675d2a786a51d888c6cdb..1d2a354024675e7b65665517889c89aa6a91a229 100644 (file)
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -244,6 +244,8 @@
  #define GGML_EXIT_SUCCESS 0
  #define GGML_EXIT_ABORTED 1
  
+#define GGML_ROPE_TYPE_NEOX 2
+
  #define GGUF_MAGIC "GGUF"
  
  #define GGUF_VERSION 3
@@ -1453,8 +1455,8 @@ extern "C" {
              struct ggml_tensor  * b);
  
      // rotary position embedding
-    // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
-    // if mode & 2 == 1, GPT-NeoX style
+    // if (mode & 1) - skip n_past elements (NOT SUPPORTED)
+    // if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
      //
      // b is an int32 vector with size a->ne[2], it contains the positions
      GGML_API struct ggml_tensor * ggml_rope(
diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp

index 8c4132f5bb7ad74b0f1544777b978541b23fc879..a4ec8418e2ab35e3d253601ac269c11fccfa5308 100644 (file)
--- a/ggml/src/ggml-cann/aclnn_ops.cpp
+++ b/ggml/src/ggml-cann/aclnn_ops.cpp
@@ -2881,7 +2881,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
      ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast,
                               beta_slow, corr_dims);
  
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
      // init cos/sin cache
      ggml_cann_pool_alloc sin_allocator(
diff --git a/ggml/src/ggml-cuda/rope.cu b/ggml/src/ggml-cuda/rope.cu

index 99ec1dd98ca9c774580f7deac8b7a06a03071543..88f586d689cfd39a9b2ebf8f482c37d1501dd771 100644 (file)
--- a/ggml/src/ggml-cuda/rope.cu
+++ b/ggml/src/ggml-cuda/rope.cu
@@ -226,7 +226,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
      memcpy(&beta_fast,   (int32_t *) dst->op_params +  9, sizeof(float));
      memcpy(&beta_slow,   (int32_t *) dst->op_params + 10, sizeof(float));
  
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
      const int32_t * pos = (const int32_t *) src1_d;
  
diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m

index aad189430ab0b3f425de57b11b5cd528656906e7..995f1934bc73bcb733eeb7b587d70803a2b3e320 100644 (file)
--- a/ggml/src/ggml-metal.m
+++ b/ggml/src/ggml-metal.m
@@ -2313,7 +2313,7 @@ static enum ggml_status ggml_metal_graph_compute(
                          memcpy(&beta_fast,   (int32_t *) dst->op_params +  9, sizeof(float));
                          memcpy(&beta_slow,   (int32_t *) dst->op_params + 10, sizeof(float));
  
-                        const bool is_neox = mode & 2;
+                        const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
                          id<MTLComputePipelineState> pipeline = nil;
  
diff --git a/ggml/src/ggml-sycl/rope.cpp b/ggml/src/ggml-sycl/rope.cpp

index c7545bcc1a8a9c6949743047dc2cf89ee48c0fdd..1f06f78fa3d9193d3dc16e45a3aea6473d053b1e 100644 (file)
--- a/ggml/src/ggml-sycl/rope.cpp
+++ b/ggml/src/ggml-sycl/rope.cpp
@@ -226,7 +226,7 @@ void ggml_sycl_op_rope(
      memcpy(&beta_fast,   (int32_t *) dst->op_params +  9, sizeof(float));
      memcpy(&beta_slow,   (int32_t *) dst->op_params + 10, sizeof(float));
  
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
      const int32_t * pos = (const int32_t *) src1_dd;
  
diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp

index 86732837254f0c59908781641f4babb7abb6e9ef..c0504e43429be0e31e0faff0ee48301a919e5d4b 100644 (file)
--- a/ggml/src/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan.cpp
@@ -4053,7 +4053,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
      case GGML_OP_ROPE:
          {
              const int mode = ((const int32_t *) dst->op_params)[2];
-            const bool is_neox = mode & 2;
+            const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
              if (is_neox) {
                  if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c

index c9b0e81684903b2d1326d3422e6f94774262fa7a..88e4fb7325dd9d8e6cde6e9cb86324775bcb42d4 100644 (file)
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -14094,7 +14094,7 @@ static void ggml_compute_forward_rope_f32(
      float corr_dims[2];
      ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
  
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
      const float * freq_factors = NULL;
      if (src2 != NULL) {
@@ -14219,7 +14219,7 @@ static void ggml_compute_forward_rope_f16(
      float corr_dims[2];
      ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
  
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
      const float * freq_factors = NULL;
      if (src2 != NULL) {
diff --git a/ggml/src/kompute-shaders/op_rope_f16.comp b/ggml/src/kompute-shaders/op_rope_f16.comp

index 1a4058b3f1f1076d3cbd56b2322213c80eabb5d2..0ecfb2eab527c7faee6b4a1a8b02d9ff177daef6 100644 (file)
--- a/ggml/src/kompute-shaders/op_rope_f16.comp
+++ b/ggml/src/kompute-shaders/op_rope_f16.comp
@@ -11,7 +11,7 @@ void main() {
      const uint i2 = gl_WorkGroupID.y;
      const uint i1 = gl_WorkGroupID.x;
  
-    const bool is_neox = (pcs.mode & 2) != 0;
+    const bool is_neox = (pcs.mode & GGML_ROPE_TYPE_NEOX) != 0;
  
      float corr_dims[2];
      rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims);
diff --git a/ggml/src/kompute-shaders/op_rope_f32.comp b/ggml/src/kompute-shaders/op_rope_f32.comp

index 65e03827a2660efc187d6ac7fa126741fa90dc44..cec0fd9a5d10c1c40910ee58c6a6985a65049812 100644 (file)
--- a/ggml/src/kompute-shaders/op_rope_f32.comp
+++ b/ggml/src/kompute-shaders/op_rope_f32.comp
@@ -11,7 +11,7 @@ void main() {
      const uint i2 = gl_WorkGroupID.y;
      const uint i1 = gl_WorkGroupID.x;
  
-    const bool is_neox = (pcs.mode & 2) != 0;
+    const bool is_neox = (pcs.mode & GGML_ROPE_TYPE_NEOX) != 0;
  
      float corr_dims[2];
      rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims);
diff --git a/ggml/src/kompute-shaders/rope_common.comp b/ggml/src/kompute-shaders/rope_common.comp

index 7b9394cb2fffc5e464fc8e7e82f8dbe7f7eac979..df4702896d46f2f5e42a3328d18e6212a0f3d9d2 100644 (file)
--- a/ggml/src/kompute-shaders/rope_common.comp
+++ b/ggml/src/kompute-shaders/rope_common.comp
@@ -1,5 +1,7 @@
  #include "common.comp"
  
+#define GGML_ROPE_TYPE_NEOX 2
+
  // TODO: use a local size of 32 or more (Metal uses 1024)
  layout(local_size_x = 1) in;
  
diff --git a/include/llama.h b/include/llama.h

index ce07f4fac8f100ef1a365b59aed27ea267877998..3c28cf0b509fb39e582ad69a612447fa6b9811da 100644 (file)
--- a/include/llama.h
+++ b/include/llama.h
@@ -95,13 +95,10 @@ extern "C" {
          LLAMA_VOCAB_PRE_TYPE_CODESHELL      = 22,
      };
  
-    // note: these values should be synchronized with ggml_rope
-    // TODO: maybe move this enum to ggml.h (ggml_rope_type)
      enum llama_rope_type {
          LLAMA_ROPE_TYPE_NONE = -1,
-        LLAMA_ROPE_TYPE_NORM =  0,
-        LLAMA_ROPE_TYPE_NEOX =  2,
-        LLAMA_ROPE_TYPE_GLM  =  4,
+        LLAMA_ROPE_TYPE_NORM = 0,
+        LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
      };
  
      enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
author	Daniel Bevenius <redacted>
	Tue, 13 Aug 2024 19:13:15 +0000 (21:13 +0200)
committer	GitHub <redacted>
	Tue, 13 Aug 2024 19:13:15 +0000 (21:13 +0200)
ggml/include/ggml.h		patch \| blob \| history
ggml/src/ggml-cann/aclnn_ops.cpp		patch \| blob \| history
ggml/src/ggml-cuda/rope.cu		patch \| blob \| history
ggml/src/ggml-metal.m		patch \| blob \| history
ggml/src/ggml-sycl/rope.cpp		patch \| blob \| history
ggml/src/ggml-vulkan.cpp		patch \| blob \| history
ggml/src/ggml.c		patch \| blob \| history
ggml/src/kompute-shaders/op_rope_f16.comp		patch \| blob \| history
ggml/src/kompute-shaders/op_rope_f32.comp		patch \| blob \| history
ggml/src/kompute-shaders/rope_common.comp		patch \| blob \| history
include/llama.h		patch \| blob \| history