vk_pipeline pipeline_relu[2];
vk_pipeline pipeline_tanh[2];
vk_pipeline pipeline_sigmoid[2];
+ vk_pipeline pipeline_hardsigmoid[2];
+ vk_pipeline pipeline_hardswish[2];
vk_pipeline pipeline_geglu[2];
vk_pipeline pipeline_reglu[2];
CREATE_UNARY(relu)
CREATE_UNARY(tanh)
CREATE_UNARY(sigmoid)
+ CREATE_UNARY(hardsigmoid)
+ CREATE_UNARY(hardswish)
#undef CREATE_UNARY
#define CREATE_GLU(name) \
return ctx->device->pipeline_tanh[dst->type == GGML_TYPE_F16];
case GGML_UNARY_OP_SIGMOID:
return ctx->device->pipeline_sigmoid[dst->type == GGML_TYPE_F16];
+ case GGML_UNARY_OP_HARDSIGMOID:
+ return ctx->device->pipeline_hardsigmoid[dst->type == GGML_TYPE_F16];
+ case GGML_UNARY_OP_HARDSWISH:
+ return ctx->device->pipeline_hardswish[dst->type == GGML_TYPE_F16];
default:
break;
}
case GGML_UNARY_OP_RELU:
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_SIGMOID:
+ case GGML_UNARY_OP_HARDSIGMOID:
+ case GGML_UNARY_OP_HARDSWISH:
break;
default:
return false;
case GGML_UNARY_OP_RELU:
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_SIGMOID:
+ case GGML_UNARY_OP_HARDSIGMOID:
+ case GGML_UNARY_OP_HARDSWISH:
ggml_vk_unary(ctx, compute_ctx, src0, node, dryrun);
break;
default:
case GGML_UNARY_OP_RELU:
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_SIGMOID:
+ case GGML_UNARY_OP_HARDSIGMOID:
+ case GGML_UNARY_OP_HARDSWISH:
buf = tensor->buffer;
break;
default:
case GGML_UNARY_OP_RELU:
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_SIGMOID:
+ case GGML_UNARY_OP_HARDSIGMOID:
+ case GGML_UNARY_OP_HARDSWISH:
return ggml_is_contiguous(op->src[0]) &&
(op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16) &&
(op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) &&
case GGML_UNARY_OP_SIGMOID:
tensor_clone = ggml_sigmoid(ggml_ctx, src_clone[0]);
break;
+ case GGML_UNARY_OP_HARDSIGMOID:
+ tensor_clone = ggml_hardsigmoid(ggml_ctx, src_clone[0]);
+ break;
+ case GGML_UNARY_OP_HARDSWISH:
+ tensor_clone = ggml_hardswish(ggml_ctx, src_clone[0]);
+ break;
default:
std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl;
GGML_ABORT("fatal error");
--- /dev/null
+#version 450
+
+#include "generic_head.comp"
+#include "types.comp"
+
+#extension GL_EXT_control_flow_attributes : enable
+
+layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
+
+layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
+layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
+
+void main() {
+ const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
+
+ if (i >= p.KX) {
+ return;
+ }
+
+ const float x = float(data_a[i]);
+ data_d[i] = D_TYPE(min(1.0f, max(0.0f, (x + 3.0f) / 6.0f)));
+}
--- /dev/null
+#version 450
+
+#include "generic_head.comp"
+#include "types.comp"
+
+#extension GL_EXT_control_flow_attributes : enable
+
+layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
+
+layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
+layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
+
+void main() {
+ const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
+
+ if (i >= p.KX) {
+ return;
+ }
+
+ const float x = float(data_a[i]);
+ data_d[i] = D_TYPE(x * min(1.0f, max(0.0f, (x + 3.0f) / 6.0f)));
+}
string_to_spv("tanh_f32", "tanh.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
string_to_spv("sigmoid_f16", "sigmoid.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
string_to_spv("sigmoid_f32", "sigmoid.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
+ string_to_spv("hardsigmoid_f16","hardsigmoid.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
+ string_to_spv("hardsigmoid_f32","hardsigmoid.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
+ string_to_spv("hardswish_f16", "hardswish.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
+ string_to_spv("hardswish_f32", "hardswish.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
for (auto rte : {false, true}) {
std::string suffix = rte ? "_rte" : "";