@@ -196,6 +196,7 @@ enum vk_device_architecture {
196
196
AMD_RDNA1,
197
197
AMD_RDNA2,
198
198
AMD_RDNA3,
199
+ INTEL_XE2,
199
200
};
200
201
201
202
static vk_device_architecture get_device_architecture(const vk::PhysicalDevice& device) {
@@ -246,6 +247,34 @@ static vk_device_architecture get_device_architecture(const vk::PhysicalDevice&
246
247
}
247
248
return vk_device_architecture::AMD_RDNA2;
248
249
}
250
+ } else if (props.vendorID == VK_VENDOR_ID_INTEL) {
251
+ const std::vector<vk::ExtensionProperties> ext_props = device.enumerateDeviceExtensionProperties();
252
+
253
+ bool subgroup_size_control = false;
254
+
255
+ for (const auto& properties : ext_props) {
256
+ if (strcmp("VK_EXT_subgroup_size_control", properties.extensionName) == 0) {
257
+ subgroup_size_control = true;
258
+ }
259
+ }
260
+
261
+ if (!subgroup_size_control) {
262
+ return vk_device_architecture::OTHER;
263
+ }
264
+
265
+ vk::PhysicalDeviceProperties2 props2;
266
+ vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_size_control_props;
267
+
268
+ props2.pNext = &subgroup_size_control_props;
269
+ device.getProperties2(&props2);
270
+
271
+ if (subgroup_size_control_props.minSubgroupSize == 16) {
272
+ // Xe2 architecture uses SIMD16 while previous Xe and Gen architecture uses SIMD8.
273
+ // Minimum subgroup size matches the SIMD width so we distinguish architecture by checking this value.
274
+ // https://www.intel.com/content/www/us/en/content-details/824434/2024-intel-tech-tour-xe2-and-lunar-lake-s-gpu.html
275
+ // https://www.intel.com/content/www/us/en/docs/oneapi/optimization-guide-gpu/2025-0/intel-xe-gpu-architecture.html
276
+ return vk_device_architecture::INTEL_XE2;
277
+ }
249
278
}
250
279
return vk_device_architecture::OTHER;
251
280
}
@@ -10263,8 +10292,9 @@ static bool ggml_vk_instance_portability_enumeration_ext_available(const std::ve
10263
10292
static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch) {
10264
10293
switch (props.vendorID) {
10265
10294
case VK_VENDOR_ID_INTEL:
10266
- // Intel drivers don't support coopmat properly yet
10267
- return false;
10295
+ // Only allowing Xe2 GPU at the moment since Xe2 GPU can gain significant performance boost,
10296
+ // while some older hardware (ex. Arc A770) has performance regressions
10297
+ return arch == vk_device_architecture::INTEL_XE2;
10268
10298
case VK_VENDOR_ID_AMD:
10269
10299
if (driver_props.driverID == vk::DriverId::eAmdProprietary || driver_props.driverID == vk::DriverId::eAmdOpenSource) {
10270
10300
// Workaround for AMD proprietary driver reporting support on all GPUs
0 commit comments