#include "ggml-cpu.h"
#endif
+// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers-
+#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
+
#include <vulkan/vulkan.hpp>
+// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers-
+VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
+
#include <algorithm>
#include <cmath>
#include <iomanip>
bool needed {};
// set to true when the shader has been compiled
bool compiled {};
+ // number of registers used, extracted from pipeline executable properties
+ uint32_t register_count {};
};
typedef std::shared_ptr<vk_pipeline_struct> vk_pipeline;
bool coopmat2;
+ bool pipeline_executable_properties_support {};
+
size_t idx;
bool mul_mat_l[GGML_TYPE_COUNT];
vk_instance.pfn_vkSetDebugUtilsObjectNameEXT(device->device, &static_cast<VkDebugUtilsObjectNameInfoEXT &>(duoni));
}
+ if (device->pipeline_executable_properties_support) {
+ vk::PipelineExecutableInfoKHR executableInfo;
+ executableInfo.pipeline = pipeline->pipeline;
+
+ auto statistics = device->device.getPipelineExecutableStatisticsKHR(executableInfo);
+ for (auto & s : statistics) {
+ // "Register Count" is reported by NVIDIA drivers.
+ if (strcmp(s.name, "Register Count") == 0) {
+ VK_LOG_DEBUG(pipeline->name << " " << s.name << ": " << s.value.u64 << " registers");
+ pipeline->register_count = (uint32_t)s.value.u64;
+ }
+ }
+ }
+
{
std::lock_guard<std::recursive_mutex> guard(device->mutex);
device->all_pipelines.push_back(pipeline);
bool amd_shader_core_properties2 = false;
bool pipeline_robustness = false;
bool coopmat2_support = false;
+ bool pipeline_executable_properties_support = false;
device->coopmat_support = false;
device->integer_dot_product = false;
bool bfloat16_support = false;
!getenv("GGML_VK_DISABLE_BFLOAT16")) {
bfloat16_support = true;
#endif
+ } else if (strcmp("VK_KHR_pipeline_executable_properties", properties.extensionName) == 0) {
+ pipeline_executable_properties_support = true;
}
}
device_extensions.push_back("VK_KHR_shader_integer_dot_product");
}
+ VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR pep_features {};
+ pep_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR;
+ if (pipeline_executable_properties_support) {
+ last_struct->pNext = (VkBaseOutStructure *)&pep_features;
+ last_struct = (VkBaseOutStructure *)&pep_features;
+ device_extensions.push_back("VK_KHR_pipeline_executable_properties");
+ }
+
vkGetPhysicalDeviceFeatures2(device->physical_device, &device_features2);
+ device->pipeline_executable_properties_support = pipeline_executable_properties_support;
+
device->fp16 = device->fp16 && vk12_features.shaderFloat16;
#if defined(VK_KHR_shader_bfloat16)
}
VK_LOG_DEBUG("ggml_vk_instance_init()");
+ // See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers-
+ VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
+
uint32_t api_version = vk::enumerateInstanceVersion();
if (api_version < VK_API_VERSION_1_2) {
vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr;
+ // See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers-
+ VULKAN_HPP_DEFAULT_DISPATCHER.init(vk_instance.instance);
+
std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
// Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan