}
if (dim % 2 != 0 && ith == 0) {
embed_data[2 * half] = 0.f;
- embed_data[dim] = 0.f;
}
}
}
int j = threadIdx.x + blockIdx.x * blockDim.x;
float * embed_data = (float *)((char *)dst + i*nb1);
- if (dim % 2 != 0 && j == ((dim + 1) / 2)) {
- embed_data[dim] = 0.f;
+ int half = dim / 2;
+ if (dim % 2 != 0 && j == half) {
+ embed_data[2 * half] = 0.f;
}
- int half = dim / 2;
if (j >= half) {
return;
}
}
if (args.dim % 2 != 0 && tpitg.x == 0) {
- embed_data[args.dim] = 0.f;
+ embed_data[2 * half_] = 0.f;
}
}
local_half_dim = logical_dim / 2;
local_embed_data_ptr = (global float *)((global char *)local_dst_output_base_ptr + local_i * dst_nb1_bytes);
- if (logical_dim % 2 != 0 && local_j == ((logical_dim + 1) / 2)) {
- local_embed_data_ptr[logical_dim] = 0.0f;
+ if (logical_dim % 2 != 0 && local_j == local_half_dim) {
+ local_embed_data_ptr[2 * local_half_dim] = 0.0f;
}
if (local_j >= local_half_dim) {
int j = item_ct1.get_local_id(2) + item_ct1.get_group(2) * item_ct1.get_local_range(2);
float * embed_data = (float *)((char *)dst + i*nb1);
- if (dim % 2 != 0 && j == ((dim + 1) / 2)) {
- embed_data[dim] = 0.f;
+ int half = dim / 2;
+
+ if (dim % 2 != 0 && j == half) {
+ embed_data[2 * half] = 0.f;
}
- int half = dim / 2;
if (j >= half) {
return;
}
const uint j = gl_GlobalInvocationID.x;
const uint d_offset = i * p.nb1;
- if (p.dim % 2 != 0 && j == ((p.dim + 1) / 2)) {
- data_d[d_offset + p.dim] = 0.f;
+ const uint half_dim = p.dim / 2;
+
+ if (p.dim % 2 != 0 && j == half_dim) {
+ data_d[d_offset + 2 * half_dim] = 0.f;
}
- const uint half_dim = p.dim / 2;
if (j >= half_dim) {
return;
}
struct ggml_tensor * timesteps,
int dim,
int max_period) {
- int actual_dim = dim;
- if (dim % 2 != 0) {
- actual_dim = dim + 1;
- }
- struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
+ struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
ggml_set_op_params_i32(result, 0, dim);
ggml_set_op_params_i32(result, 1, max_period);