float freq_base,
float freq_scale,
float xpos_base,
- bool xpos_downscale,
+ bool xpos_down,
bool inplace) {
GGML_ASSERT(n_past >= 0);
bool is_node = false;
int32_t params[8] = { n_past, n_dims, mode, n_ctx };
memcpy(params + 4, &freq_base, sizeof(float));
memcpy(params + 5, &freq_scale, sizeof(float));
- memcpy(params + 6, &xpos_base, sizeof(float));
- memcpy(params + 7, &xpos_downscale, sizeof(bool));
+ memcpy(params + 6, &xpos_base, sizeof(float));
+ memcpy(params + 7, &xpos_down, sizeof(bool));
ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_ROPE;
struct ggml_tensor * a,
int n_past,
int n_dims,
- float scale_base,
- bool downscale) {
- return ggml_rope_impl(ctx, a, n_past, n_dims, 0, 0, 10000.0f, 1.0f, scale_base, downscale, true);
+ float base,
+ bool down) {
+ return ggml_rope_impl(ctx, a, n_past, n_dims, 0, 0, 10000.0f, 1.0f, base, down, true);
}
// ggml_rope_back
float freq_base,
float freq_scale,
float xpos_base,
- bool xpos_downscale) {
+ bool xpos_down) {
GGML_ASSERT(n_past >= 0);
GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
int32_t params[8] = { n_past, n_dims, mode, n_ctx };
- memcpy(params + 4, &freq_base, sizeof(float));
+ memcpy(params + 4, &freq_base, sizeof(float));
memcpy(params + 5, &freq_scale, sizeof(float));
- memcpy(params + 6, &xpos_base, sizeof(float));
- memcpy(params + 7, &xpos_downscale, sizeof(bool));
+ memcpy(params + 6, &xpos_base, sizeof(float));
+ memcpy(params + 7, &xpos_down, sizeof(bool));
ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_ROPE_BACK;
// these two only relevant for xPos RoPE:
float xpos_base;
- bool xpos_downscale;
+ bool xpos_down;
const int n_past = ((int32_t *) dst->op_params)[0];
const int n_dims = ((int32_t *) dst->op_params)[1];
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
- memcpy(&xpos_downscale, (int32_t *) dst->op_params + 7, sizeof(bool));
+ memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
assert(n_past >= 0);
const float sin_theta = sinf(theta);
// zeta scaling for xPos only:
float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), (n_past + i2) / xpos_base) : 1.0f;
- if (xpos_downscale) zeta = 1.0f / zeta;
+ if (xpos_down) zeta = 1.0f / zeta;
theta *= theta_scale;
// these two only relevant for xPos RoPE:
float xpos_base;
- bool xpos_downscale;
+ bool xpos_down;
const int n_past = ((int32_t *) dst->op_params)[0];
const int n_dims = ((int32_t *) dst->op_params)[1];
const int mode = ((int32_t *) dst->op_params)[2];
- const int n_ctx = ((int32_t *) dst->op_params)[3];
+ const int n_ctx = ((int32_t *) dst->op_params)[3]; UNUSED(n_ctx);
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
- memcpy(&xpos_downscale, (int32_t *) dst->op_params + 7, sizeof(bool));
+ memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
assert(n_past >= 0);
const float sin_theta = sinf(theta);
// zeta scaling for xPos only:
float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), (n_past + i2) / xpos_base) : 1.0f;
- if (xpos_downscale) zeta = 1.0f / zeta;
+ if (xpos_down) zeta = 1.0f / zeta;
theta *= theta_scale;
const int n_dims = ((int32_t *) tensor->op_params)[1];
const int mode = ((int32_t *) tensor->op_params)[2];
const int n_ctx = ((int32_t *) tensor->op_params)[3];
- float freq_base, freq_scale, xpos_base;
- bool xpos_downscale;
+ float freq_base;
+ float freq_scale;
+ float xpos_base;
+ bool xpos_down;
memcpy(&freq_base, (int32_t *) tensor->op_params + 4, sizeof(float));
memcpy(&freq_scale, (int32_t *) tensor->op_params + 5, sizeof(float));
memcpy(&xpos_base, (int32_t *) tensor->op_params + 6, sizeof(float));
- memcpy(&xpos_downscale, (int32_t *) tensor->op_params + 7, sizeof(bool));
+ memcpy(&xpos_down, (int32_t *) tensor->op_params + 7, sizeof(bool));
src0->grad = ggml_add_impl(ctx,
src0->grad,
freq_base,
freq_scale,
xpos_base,
- xpos_downscale),
+ xpos_down),
inplace);
}
} break;
const int n_dims = ((int32_t *) tensor->op_params)[1];
const int mode = ((int32_t *) tensor->op_params)[2];
const int n_ctx = ((int32_t *) tensor->op_params)[3];
- float freq_base, freq_scale, xpos_base;
- bool xpos_downscale;
+ float freq_base;
+ float freq_scale;
+ float xpos_base;
+ bool xpos_down;
memcpy(&freq_base, (int32_t *) tensor->op_params + 4, sizeof(float));
memcpy(&freq_scale, (int32_t *) tensor->op_params + 5, sizeof(float));
memcpy(&xpos_base, (int32_t *) tensor->op_params + 6, sizeof(float));
- memcpy(&xpos_downscale, (int32_t *) tensor->op_params + 7, sizeof(bool));
+ memcpy(&xpos_down, (int32_t *) tensor->op_params + 7, sizeof(bool));
src0->grad = ggml_add_impl(ctx,
src0->grad,
freq_base,
freq_scale,
xpos_base,
- xpos_downscale,
+ xpos_down,
false),
inplace);
}