#define CEIL_DIV(M, N) (((M) + (N)-1) / (N))
#define VK_VENDOR_ID_AMD 0x1002
+#define VK_VENDOR_ID_APPLE 0x106b
#define VK_VENDOR_ID_INTEL 0x8086
#define VK_VENDOR_ID_NVIDIA 0x10de
return ctx->pipeline_matmul_f32_aligned_l.align;
}
+static vk_pipeline* ggml_vk_guess_matmul_pipeline_amd(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, int m, int n, bool aligned) {
+ if (bit16_x && bit16_y) {
+ if (m <= 32 || n <= 32) {
+#ifdef GGML_VULKAN_DEBUG
+ std::cerr << " S" << std::endl;
+#endif
+ return aligned ? &ctx->pipeline_matmul_f16_aligned_s : &ctx->pipeline_matmul_f16_s;
+ }
+#ifdef GGML_VULKAN_DEBUG
+ std::cerr << " M" << std::endl;
+#endif
+ return aligned ? &ctx->pipeline_matmul_f16_aligned_m : &ctx->pipeline_matmul_f16_m;
+ }
+ if (bit16_x && !bit16_y) {
+ if (m <= 32 || n <= 32) {
+#ifdef GGML_VULKAN_DEBUG
+ std::cerr << " S" << std::endl;
+#endif
+ return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_s : &ctx->pipeline_matmul_f16_f32_s;
+ }
+#ifdef GGML_VULKAN_DEBUG
+ std::cerr << " M" << std::endl;
+#endif
+ return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_m : &ctx->pipeline_matmul_f16_f32_m;
+ }
+ if (!bit16_x && bit16_y) {
+ GGML_ASSERT(false);
+ }
+
+ if (m <= 32 || n <= 32) {
+#ifdef GGML_VULKAN_DEBUG
+ std::cerr << " S" << std::endl;
+#endif
+ return aligned ? &ctx->pipeline_matmul_f32_aligned_s : &ctx->pipeline_matmul_f32_s;
+ }
+#ifdef GGML_VULKAN_DEBUG
+ std::cerr << " M" << std::endl;
+#endif
+ return aligned ? &ctx->pipeline_matmul_f32_aligned_m : &ctx->pipeline_matmul_f32_m;
+}
+
+static vk_pipeline* ggml_vk_guess_matmul_pipeline_apple(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, bool aligned) {
+#ifdef GGML_VULKAN_DEBUG
+ std::cerr << " M" << std::endl;
+#endif
+ if (bit16_x && bit16_y) {
+ return aligned ? &ctx->pipeline_matmul_f16_aligned_m : &ctx->pipeline_matmul_f16_m;
+ }
+ if (bit16_x && !bit16_y) {
+ return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_m : &ctx->pipeline_matmul_f16_f32_m;
+ }
+ if (!bit16_x && bit16_y) {
+ GGML_ASSERT(false);
+ }
+ return aligned ? &ctx->pipeline_matmul_f32_aligned_m : &ctx->pipeline_matmul_f32_m;
+}
+
+static vk_pipeline* ggml_vk_guess_matmul_pipeline_intel(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, bool aligned) {
+#ifdef GGML_VULKAN_DEBUG
+ std::cerr << " S" << std::endl;
+#endif
+ if (bit16_x && bit16_y) {
+ return aligned ? &ctx->pipeline_matmul_f16_aligned_s : &ctx->pipeline_matmul_f16_s;
+ }
+ if (bit16_x && !bit16_y) {
+ return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_s : &ctx->pipeline_matmul_f16_f32_s;
+ }
+ if (!bit16_x && bit16_y) {
+ GGML_ASSERT(false);
+ }
+ return aligned ? &ctx->pipeline_matmul_f32_aligned_s : &ctx->pipeline_matmul_f32_s;
+}
+
static vk_pipeline* ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, int m, int n, bool aligned) {
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_guess_matmul_pipeline(" << bit16_x << ", " << bit16_y << ", " << m << ", " << n << ", " << aligned << ")";
#endif
+ switch (ctx->device.lock()->vendor_id) {
+ case VK_VENDOR_ID_AMD:
+ return ggml_vk_guess_matmul_pipeline_amd(ctx, bit16_x, bit16_y, m, n, aligned);
+ case VK_VENDOR_ID_APPLE:
+ return ggml_vk_guess_matmul_pipeline_apple(ctx, bit16_x, bit16_y, aligned);
+ case VK_VENDOR_ID_INTEL:
+ return ggml_vk_guess_matmul_pipeline_intel(ctx, bit16_x, bit16_y, aligned);
+ }
+
if (bit16_x && bit16_y) {
- if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
+ if (m <= 32 || n <= 32) {
#ifdef GGML_VULKAN_DEBUG
std::cerr << " S" << std::endl;
#endif
return aligned ? &ctx->pipeline_matmul_f16_aligned_s : &ctx->pipeline_matmul_f16_s;
}
- if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) {
+ if (m <= 64 || n <= 64) {
#ifdef GGML_VULKAN_DEBUG
std::cerr << " M" << std::endl;
#endif
return aligned ? &ctx->pipeline_matmul_f16_aligned_l : &ctx->pipeline_matmul_f16_l;
}
if (bit16_x && !bit16_y) {
- if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
+ if (m <= 32 || n <= 32) {
#ifdef GGML_VULKAN_DEBUG
std::cerr << " S" << std::endl;
#endif
return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_s : &ctx->pipeline_matmul_f16_f32_s;
}
- if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) {
+ if (m <= 64 || n <= 64) {
#ifdef GGML_VULKAN_DEBUG
std::cerr << " M" << std::endl;
#endif
GGML_ASSERT(false);
}
- if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
+ if (m <= 32 || n <= 32) {
#ifdef GGML_VULKAN_DEBUG
std::cerr << " S" << std::endl;
#endif
return aligned ? &ctx->pipeline_matmul_f32_aligned_s : &ctx->pipeline_matmul_f32_s;
}
- if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) {
+ if (m <= 64 || n <= 64) {
#ifdef GGML_VULKAN_DEBUG
std::cerr << " M" << std::endl;
#endif