bool vmm; // virtual memory support
size_t vmm_granularity; // granularity of virtual memory
size_t total_vram;
+ int warp_size; // Number of threads in a dispatch
};
cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};
info.devices[id].nsm = prop.multiProcessorCount;
info.devices[id].smpb = prop.sharedMemPerBlock;
+ info.devices[id].warp_size = prop.warpSize;
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
info.devices[id].smpbo = prop.sharedMemPerBlock;
info.devices[id].cc += prop.minor * 0x10;
}
}
- GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s\n",
- id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, device_vmm ? "yes" : "no");
+ GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
+ id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
+ device_vmm ? "yes" : "no", prop.warpSize);
#else
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
info.devices[id].cc = 100*prop.major + 10*prop.minor;