ggml : move rope type enum to ggml.h (llama/8949)

author Daniel Bevenius <redacted>

Tue, 13 Aug 2024 19:13:15 +0000 (21:13 +0200)

committer Georgi Gerganov <redacted>

Tue, 27 Aug 2024 19:01:14 +0000 (22:01 +0300)
author Daniel Bevenius <redacted>
Tue, 13 Aug 2024 19:13:15 +0000 (21:13 +0200)
committer Georgi Gerganov <redacted>
Tue, 27 Aug 2024 19:01:14 +0000 (22:01 +0300)
diff --git a/include/ggml.h b/include/ggml.h

index 126ce068a020a13b9821a48dc9033984ea2e9cd0..a11a27a9b93bef9759b2a47015418dc5ef1039a4 100644 (file)
--- a/include/ggml.h
+++ b/include/ggml.h
@@ -244,6 +244,8 @@
  #define GGML_EXIT_SUCCESS 0
  #define GGML_EXIT_ABORTED 1
  
+#define GGML_ROPE_TYPE_NEOX 2
+
  #define GGUF_MAGIC "GGUF"
  
  #define GGUF_VERSION 3
@@ -1473,8 +1475,8 @@ extern "C" {
              struct ggml_tensor  * b);
  
      // rotary position embedding
-    // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
-    // if mode & 2 == 1, GPT-NeoX style
+    // if (mode & 1) - skip n_past elements (NOT SUPPORTED)
+    // if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
      //
      // b is an int32 vector with size a->ne[2], it contains the positions
      GGML_API struct ggml_tensor * ggml_rope(
diff --git a/src/ggml-cann/aclnn_ops.cpp b/src/ggml-cann/aclnn_ops.cpp

index 8c4132f5bb7ad74b0f1544777b978541b23fc879..a4ec8418e2ab35e3d253601ac269c11fccfa5308 100644 (file)
--- a/src/ggml-cann/aclnn_ops.cpp
+++ b/src/ggml-cann/aclnn_ops.cpp
@@ -2881,7 +2881,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
      ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast,
                               beta_slow, corr_dims);
  
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
      // init cos/sin cache
      ggml_cann_pool_alloc sin_allocator(
diff --git a/src/ggml-cuda/rope.cu b/src/ggml-cuda/rope.cu

index 99ec1dd98ca9c774580f7deac8b7a06a03071543..88f586d689cfd39a9b2ebf8f482c37d1501dd771 100644 (file)
--- a/src/ggml-cuda/rope.cu
+++ b/src/ggml-cuda/rope.cu
@@ -226,7 +226,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
      memcpy(&beta_fast,   (int32_t *) dst->op_params +  9, sizeof(float));
      memcpy(&beta_slow,   (int32_t *) dst->op_params + 10, sizeof(float));
  
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
      const int32_t * pos = (const int32_t *) src1_d;
  
diff --git a/src/ggml-metal.m b/src/ggml-metal.m

index 7950c0dccb7f301c8f923bbaa16b1826102f7ba9..7dcb69ee770d3374244eb43854af5ae279dafd0d 100644 (file)
--- a/src/ggml-metal.m
+++ b/src/ggml-metal.m
@@ -2373,7 +2373,7 @@ static enum ggml_status ggml_metal_graph_compute(
                          memcpy(&beta_fast,   (int32_t *) dst->op_params +  9, sizeof(float));
                          memcpy(&beta_slow,   (int32_t *) dst->op_params + 10, sizeof(float));
  
-                        const bool is_neox = mode & 2;
+                        const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
                          id<MTLComputePipelineState> pipeline = nil;
  
diff --git a/src/ggml-sycl/rope.cpp b/src/ggml-sycl/rope.cpp

index c7545bcc1a8a9c6949743047dc2cf89ee48c0fdd..1f06f78fa3d9193d3dc16e45a3aea6473d053b1e 100644 (file)
--- a/src/ggml-sycl/rope.cpp
+++ b/src/ggml-sycl/rope.cpp
@@ -226,7 +226,7 @@ void ggml_sycl_op_rope(
      memcpy(&beta_fast,   (int32_t *) dst->op_params +  9, sizeof(float));
      memcpy(&beta_slow,   (int32_t *) dst->op_params + 10, sizeof(float));
  
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
      const int32_t * pos = (const int32_t *) src1_dd;
  
diff --git a/src/ggml-vulkan.cpp b/src/ggml-vulkan.cpp

index 4db30cac3a153588cb1d0b3c02b89089abd4dd93..9e3074d58655bc54cc9a4dce022906dd5a509d02 100644 (file)
--- a/src/ggml-vulkan.cpp
+++ b/src/ggml-vulkan.cpp
@@ -4067,7 +4067,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
      case GGML_OP_ROPE:
          {
              const int mode = ((const int32_t *) dst->op_params)[2];
-            const bool is_neox = mode & 2;
+            const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
              if (is_neox) {
                  if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
diff --git a/src/ggml.c b/src/ggml.c

index 790798cee0235d055f8a126a97dd623eaa6da9a9..dc77694bc31dd88d911ff2860741bc920f22823d 100644 (file)
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -14353,7 +14353,7 @@ static void ggml_compute_forward_rope_f32(
      float corr_dims[2];
      ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
  
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
      const float * freq_factors = NULL;
      if (src2 != NULL) {
@@ -14478,7 +14478,7 @@ static void ggml_compute_forward_rope_f16(
      float corr_dims[2];
      ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
  
-    const bool is_neox = mode & 2;
+    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
  
      const float * freq_factors = NULL;
      if (src2 != NULL) {
author	Daniel Bevenius <redacted>
	Tue, 13 Aug 2024 19:13:15 +0000 (21:13 +0200)
committer	Georgi Gerganov <redacted>
	Tue, 27 Aug 2024 19:01:14 +0000 (22:01 +0300)
include/ggml.h		patch \| blob \| history
src/ggml-cann/aclnn_ops.cpp		patch \| blob \| history
src/ggml-cuda/rope.cu		patch \| blob \| history
src/ggml-metal.m		patch \| blob \| history
src/ggml-sycl/rope.cpp		patch \| blob \| history
src/ggml-vulkan.cpp		patch \| blob \| history
src/ggml.c		patch \| blob \| history