ggml : fix padding in timestep embedding kernels (#15932)

author Daniel Bevenius <redacted>

Tue, 16 Sep 2025 13:25:57 +0000 (15:25 +0200)

committer GitHub <redacted>

Tue, 16 Sep 2025 13:25:57 +0000 (15:25 +0200)
author Daniel Bevenius <redacted>
Tue, 16 Sep 2025 13:25:57 +0000 (15:25 +0200)
committer GitHub <redacted>
Tue, 16 Sep 2025 13:25:57 +0000 (15:25 +0200)
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp

index 212e52ef6a1c843fec010b49a8ac9ce9a89f0e4e..c4824d145a54d5fb9466b774421f200f9b6bf36d 100644 (file)
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@@ -8599,7 +8599,6 @@ static void ggml_compute_forward_timestep_embedding_f32(
          }
          if (dim % 2 != 0 && ith == 0) {
              embed_data[2 * half] = 0.f;
-            embed_data[dim] = 0.f;
          }
      }
  }
diff --git a/ggml/src/ggml-cuda/tsembd.cu b/ggml/src/ggml-cuda/tsembd.cu

index 153ddbcda92dcc84c501d48bac1d7be2a6dd8234..b91a26fc80e6177faef94596eaffd200c553ba48 100644 (file)
--- a/ggml/src/ggml-cuda/tsembd.cu
+++ b/ggml/src/ggml-cuda/tsembd.cu
@@ -7,11 +7,11 @@ static __global__ void timestep_embedding_f32(const float * timesteps, float * d
      int j = threadIdx.x + blockIdx.x * blockDim.x;
      float * embed_data = (float *)((char *)dst +  i*nb1);
  
-    if (dim % 2 != 0 && j == ((dim + 1) / 2)) {
-        embed_data[dim] = 0.f;
+    int half = dim / 2;
+    if (dim % 2 != 0 && j == half) {
+        embed_data[2 * half] = 0.f;
      }
  
-    int half = dim / 2;
      if (j >= half) {
          return;
      }
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal

index 4314c9cc9388a7c00b8bf5d13ae9730fec0f6ec0..5057e264f6090b9a409bd10163c99099b89ee251 100644 (file)
--- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal
@@ -4167,7 +4167,7 @@ kernel void kernel_timestep_embedding_f32(
      }
  
      if (args.dim % 2 != 0 && tpitg.x == 0) {
-        embed_data[args.dim] = 0.f;
+        embed_data[2 * half_] = 0.f;
      }
  }
  
diff --git a/ggml/src/ggml-opencl/kernels/tsembd.cl b/ggml/src/ggml-opencl/kernels/tsembd.cl

index 4b1107f70ba7ad50f1a2a19a371b9626bbba5150..21444bd958298ee9e1d3ebe613892e9655be57bd 100644 (file)
--- a/ggml/src/ggml-opencl/kernels/tsembd.cl
+++ b/ggml/src/ggml-opencl/kernels/tsembd.cl
@@ -26,8 +26,8 @@ kernel void kernel_timestep_embedding(
      local_half_dim = logical_dim / 2;
      local_embed_data_ptr = (global float *)((global char *)local_dst_output_base_ptr + local_i * dst_nb1_bytes);
  
-    if (logical_dim % 2 != 0 && local_j == ((logical_dim + 1) / 2)) {
-        local_embed_data_ptr[logical_dim] = 0.0f;
+    if (logical_dim % 2 != 0 && local_j == local_half_dim) {
+        local_embed_data_ptr[2 * local_half_dim] = 0.0f;
      }
  
      if (local_j >= local_half_dim) {
diff --git a/ggml/src/ggml-sycl/tsembd.cpp b/ggml/src/ggml-sycl/tsembd.cpp

index f6ca626ea7a53f963626fba465e42d4808f6de54..f2003794d3f55a4dbc1bbdca1b9294ac5a1f86bf 100644 (file)
--- a/ggml/src/ggml-sycl/tsembd.cpp
+++ b/ggml/src/ggml-sycl/tsembd.cpp
@@ -21,11 +21,12 @@ static void timestep_embedding_f32(
      int j = item_ct1.get_local_id(2) + item_ct1.get_group(2) * item_ct1.get_local_range(2);
      float * embed_data = (float *)((char *)dst +  i*nb1);
  
-    if (dim % 2 != 0 && j == ((dim + 1) / 2)) {
-        embed_data[dim] = 0.f;
+    int half = dim / 2;
+
+    if (dim % 2 != 0 && j == half) {
+        embed_data[2 * half] = 0.f;
      }
  
-    int half = dim / 2;
      if (j >= half) {
          return;
      }
diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp b/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp

index 79e065a9313aa1552ca55cec3d588fcefbcf5266..ce8e09442d9b69e83a91c50f3238c30828133b29 100644 (file)
--- a/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp
@@ -24,11 +24,12 @@ void main() {
      const uint j = gl_GlobalInvocationID.x;
      const uint d_offset = i * p.nb1;
  
-    if (p.dim % 2 != 0 && j == ((p.dim + 1) / 2)) {
-        data_d[d_offset + p.dim] = 0.f;
+    const uint half_dim = p.dim / 2;
+
+    if (p.dim % 2 != 0 && j == half_dim) {
+        data_d[d_offset + 2 * half_dim] = 0.f;
      }
  
-    const uint half_dim = p.dim / 2;
      if (j >= half_dim) {
          return;
      }
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c

index 50dc1aa24fff58938dcda094fac398e1a2126ce2..3584827dca7fcb14f002014147f9c9a75a8a81b8 100644 (file)
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -4923,12 +4923,8 @@ struct ggml_tensor * ggml_timestep_embedding(
          struct ggml_tensor  * timesteps,
          int                   dim,
          int                   max_period) {
-    int actual_dim = dim;
-    if (dim % 2 != 0) {
-        actual_dim = dim + 1;
-    }
  
-    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
+    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
  
      ggml_set_op_params_i32(result, 0, dim);
      ggml_set_op_params_i32(result, 1, max_period);
author	Daniel Bevenius <redacted>
	Tue, 16 Sep 2025 13:25:57 +0000 (15:25 +0200)
committer	GitHub <redacted>
	Tue, 16 Sep 2025 13:25:57 +0000 (15:25 +0200)
ggml/src/ggml-cpu/ops.cpp		patch \| blob \| history
ggml/src/ggml-cuda/tsembd.cu		patch \| blob \| history
ggml/src/ggml-metal/ggml-metal.metal		patch \| blob \| history
ggml/src/ggml-opencl/kernels/tsembd.cl		patch \| blob \| history
ggml/src/ggml-sycl/tsembd.cpp		patch \| blob \| history
ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp		patch \| blob \| history
ggml/src/ggml.c		patch \| blob \| history