vk_pipeline pipeline_soft_max_f32_wg512, pipeline_soft_max_f32_f16_wg512;
vk_pipeline pipeline_rope_norm_f32, pipeline_rope_norm_f16;
vk_pipeline pipeline_rope_neox_f32, pipeline_rope_neox_f16;
+ vk_pipeline pipeline_rope_multi_f32, pipeline_rope_multi_f16;
+ vk_pipeline pipeline_rope_vision_f32, pipeline_rope_vision_f16;
vk_pipeline pipeline_argsort_f32;
vk_pipeline pipeline_sum_rows_f32;
vk_pipeline pipeline_im2col_f32, pipeline_im2col_f32_f16;
float corr_dims[2];
float theta_scale;
uint32_t has_ff;
+ uint32_t ne02;
+ uint32_t s1;
+ uint32_t s2;
+ int32_t sections[4];
};
struct vk_op_soft_max_push_constants {
ggml_vk_create_pipeline(device, device->pipeline_rope_norm_f32, "rope_norm_f32", rope_norm_f32_len, rope_norm_f32_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f32, "rope_neox_f32", rope_neox_f32_len, rope_neox_f32_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
+ ggml_vk_create_pipeline(device, device->pipeline_rope_multi_f32, "rope_multi_f32", rope_multi_f32_len, rope_multi_f32_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
+ ggml_vk_create_pipeline(device, device->pipeline_rope_vision_f32, "rope_vision_f32", rope_vision_f32_len, rope_vision_f32_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
if (device->float_controls_rte_fp16) {
ggml_vk_create_pipeline(device, device->pipeline_rope_norm_f16, "rope_norm_f16", rope_norm_f16_rte_len, rope_norm_f16_rte_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f16, "rope_neox_f16", rope_neox_f16_rte_len, rope_neox_f16_rte_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
+ ggml_vk_create_pipeline(device, device->pipeline_rope_multi_f16, "rope_multi_f16", rope_multi_f16_rte_len, rope_multi_f16_rte_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
+ ggml_vk_create_pipeline(device, device->pipeline_rope_vision_f16, "rope_vision_f16", rope_vision_f16_rte_len, rope_vision_f16_rte_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
} else {
ggml_vk_create_pipeline(device, device->pipeline_rope_norm_f16, "rope_norm_f16", rope_norm_f16_len, rope_norm_f16_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f16, "rope_neox_f16", rope_neox_f16_len, rope_neox_f16_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
+ ggml_vk_create_pipeline(device, device->pipeline_rope_multi_f16, "rope_multi_f16", rope_multi_f16_len, rope_multi_f16_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
+ ggml_vk_create_pipeline(device, device->pipeline_rope_vision_f16, "rope_vision_f16", rope_vision_f16_len, rope_vision_f16_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
}
ggml_vk_create_pipeline(device, device->pipeline_argsort_f32, "argsort_f32", argsort_f32_len, argsort_f32_data, "main", 2, sizeof(vk_op_argsort_push_constants), {1024, 1, 1}, {}, 1);
{
const int mode = ((const int32_t *) dst->op_params)[2];
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
+ const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE;
+ const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
if (is_neox) {
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
return ctx->device->pipeline_rope_neox_f16;
}
+ } else if (is_mrope && !is_vision) {
+ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
+ return ctx->device->pipeline_rope_multi_f32;
+ }
+ if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
+ return ctx->device->pipeline_rope_multi_f16;
+ }
+ } else if (is_vision) {
+ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
+ return ctx->device->pipeline_rope_vision_f32;
+ }
+ if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
+ return ctx->device->pipeline_rope_vision_f16;
+ }
} else {
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
return ctx->device->pipeline_rope_norm_f32;
case GGML_OP_CLAMP:
case GGML_OP_PAD:
case GGML_OP_REPEAT:
+ case GGML_OP_ROPE:
return true;
default:
return false;
static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * src2, ggml_tensor * dst, bool dryrun = false) {
const int n_dims = ((int32_t *) dst->op_params)[1];
- // const int mode = ((int32_t *) dst->op_params)[2];
+ const int mode = ((int32_t *) dst->op_params)[2];
// const int n_ctx = ((int32_t *) dst->op_params)[3];
const int n_ctx_orig = ((int32_t *) dst->op_params)[4];
const float freq_base = ((float *) dst->op_params)[5];
const float attn_factor = ((float *) dst->op_params)[8];
const float beta_fast = ((float *) dst->op_params)[9];
const float beta_slow = ((float *) dst->op_params)[10];
+ int sections[4] {};
+ if (mode & GGML_ROPE_TYPE_MROPE) {
+ memcpy(sections, (int32_t *) dst->op_params + 11, sizeof(int)*4);
+ }
float corr_dims[2];
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
const float theta_scale = powf(freq_base, -2.0f/n_dims);
+ uint32_t s1 = src0->nb[1] / ggml_type_size(src0->type);
+ uint32_t s2 = src0->nb[2] / ggml_type_size(src0->type);
+
ggml_vk_op_f32<vk_op_rope_push_constants>(ctx, subctx, src0, src1, src2, dst, GGML_OP_ROPE, {
(uint32_t)src0->ne[0], (uint32_t)n_dims, freq_scale, (uint32_t)src0->ne[1],
freq_base, ext_factor, attn_factor, {corr_dims[0], corr_dims[1]}, theta_scale,
- src2 != nullptr,
+ src2 != nullptr, (uint32_t)src0->ne[2], s1, s2,
+ sections[0], sections[1], sections[2], sections[3],
}, dryrun);
}
case GGML_OP_REPEAT:
return ggml_type_size(op->type) == sizeof(float) && ggml_type_size(op->src[0]->type) == sizeof(float);
case GGML_OP_ROPE:
- {
- const int mode = ((const int32_t *) op->op_params)[2];
- if (mode & GGML_ROPE_TYPE_MROPE) {
- return false;
- }
- if (mode & GGML_ROPE_TYPE_VISION) {
- return false;
- }
- return ggml_is_contiguous(op->src[0]);
- }
case GGML_OP_NONE:
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
const float attn_factor = ((float *) tensor->op_params)[8];
const float beta_fast = ((float *) tensor->op_params)[9];
const float beta_slow = ((float *) tensor->op_params)[10];
- tensor_clone = ggml_rope_ext(ggml_ctx, src0_clone, src1_clone, src2_clone, n_dims, mode, n_ctx_orig_ggml, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
+ if (mode & GGML_ROPE_TYPE_MROPE) {
+ int32_t *sections = ((int32_t *) tensor->op_params) + 11;
+ tensor_clone = ggml_rope_multi(ggml_ctx, src0_clone, src1_clone, src2_clone, n_dims, sections, mode, n_ctx_orig_ggml, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
+ } else {
+ tensor_clone = ggml_rope_ext(ggml_ctx, src0_clone, src1_clone, src2_clone, n_dims, mode, n_ctx_orig_ggml, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
+ }
} else if (tensor->op == GGML_OP_UNARY) {
switch (ggml_get_unary_op(tensor)) {
case GGML_UNARY_OP_SILU: