[CI] Automatically detect AMD architecture (#16071)

sarnex · KornevNikita · commit cf324f30c79e · 2024-12-05T04:09:46.000-08:00
We can figure it out from the `sycl-ls` output. Confirmed working [here](https://github.com/intel/llvm/actions/runs/11841045635/job/32998817316?pr=16071) Closes: #16057 --------- Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
diff --git a/sycl/test-e2e/Matrix/joint_matrix_hip_gfx90a.cpp b/sycl/test-e2e/Matrix/joint_matrix_hip_gfx90a.cpp
@@ -6,10 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %{build} -fsycl -fsycl-targets=amd_gpu_gfx90a %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx90a %s -o %t.out
 // RUN: %{run} %t.out
 
-// REQUIRES: gpu-amd-gfx90a
+// REQUIRES: arch-amd_gpu_gfx90a
 
 #include "joint_matrix_hip_apply.hpp"
 #include "joint_matrix_hip_copy.hpp"
diff --git a/sycl/test-e2e/Matrix/joint_matrix_hip_half_gfx90a.cpp b/sycl/test-e2e/Matrix/joint_matrix_hip_half_gfx90a.cpp
@@ -6,10 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %{build} -fsycl -fsycl-targets=amd_gpu_gfx90a %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx90a %s -o %t.out
 // RUN: %{run} %t.out
 
-// REQUIRES: gpu-amd-gfx90a
+// REQUIRES: arch-amd_gpu_gfx90a
 // REQUIRES: aspect-fp16
 
 #include "joint_matrix_hip_apply.hpp"
diff --git a/sycl/test-e2e/Matrix/runtime_query_hip_gfx90a.cpp b/sycl/test-e2e/Matrix/runtime_query_hip_gfx90a.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-// REQUIRES: gpu-amd-gfx90a
-// RUN: %{build} -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx90a -o %t.out
+// REQUIRES: arch-amd_gpu_gfx90a
+// RUN: %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx90a %s -o %t.out
 // RUN: %{run} %t.out
 
 #include <sycl/detail/core.hpp>
diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py
@@ -442,8 +442,6 @@ def open_check_file(file_name):
     for line in sp.splitlines():
         if "Intel(R) Data Center GPU Max 1100" in line:
             config.available_features.add("gpu-intel-pvc-1T")
-        if "gfx90a" in line:
-            config.available_features.add("gpu-amd-gfx90a")
         if not line.startswith("["):
             continue
         (backend, device) = line[1:].split("]")[0].split(":")
@@ -540,39 +538,6 @@ def open_check_file(file_name):
         config.cuda_libs_dir = os.path.join(os.environ["CUDA_PATH"], r"lib64")
         config.cuda_include = os.path.join(os.environ["CUDA_PATH"], "include")
 
-# FIXME: This needs to be made per-device as well, possibly with a helper.
-if "hip:gpu" in config.sycl_devices and config.hip_platform == "AMD":
-    if not config.amd_arch:
-        lit_config.error(
-            "Cannot run tests for HIP without an offload-arch. Please "
-            + "specify one via the 'amd_arch' parameter or 'AMD_ARCH' CMake "
-            + "variable."
-        )
-    llvm_config.with_system_environment("ROCM_PATH")
-    config.available_features.add("hip_amd")
-    arch_flag = (
-        "-Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=" + config.amd_arch
-    )
-elif "hip:gpu" in config.sycl_devices and config.hip_platform == "NVIDIA":
-    config.available_features.add("hip_nvidia")
-    arch_flag = ""
-else:
-    arch_flag = ""
-
-if lit_config.params.get("compatibility_testing", False):
-    config.substitutions.append(("%clangxx", " true "))
-    config.substitutions.append(("%clang", " true "))
-else:
-    config.substitutions.append(
-        (
-            "%clangxx",
-            " " + config.dpcpp_compiler + " " + config.cxx_flags + " " + arch_flag,
-        )
-    )
-    config.substitutions.append(
-        ("%clang", " " + config.dpcpp_compiler + " " + config.c_flags)
-    )
-
 config.substitutions.append(("%threads_lib", config.sycl_threads_lib))
 
 if lit_config.params.get("ze_debug"):
@@ -805,12 +770,50 @@ def open_check_file(file_name):
     # Use short names for LIT rules.
     features.add(be)
 
+    if be == "hip" and config.hip_platform == "AMD":
+        if not config.amd_arch:
+            # Guaranteed to be a single element in the set
+            arch = [x for x in architecture_feature][0]
+            amd_arch_prefix = "arch-amd_gpu_"
+            if amd_arch_prefix not in arch or len(architecture_feature) != 1:
+                lit_config.error(
+                    "Cannot detect architecture for AMD HIP device, specify it explicitly"
+                )
+            config.amd_arch = arch.replace(amd_arch_prefix, "")
+        llvm_config.with_system_environment("ROCM_PATH")
+        config.available_features.add("hip_amd")
+        arch_flag = (
+            "-Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=" + config.amd_arch
+        )
+        config.substitutions.append(
+            ("%rocm_path", os.environ.get("ROCM_PATH", "/opt/rocm"))
+        )
+    elif be == "hip" and config.hip_platform == "NVIDIA":
+        config.available_features.add("hip_nvidia")
+        arch_flag = ""
+    else:
+        arch_flag = ""
+
     config.sycl_dev_features[sycl_device] = features.union(config.available_features)
     if is_intel_driver:
         config.intel_driver_ver[sycl_device] = intel_driver_ver
     else:
         config.intel_driver_ver[sycl_device] = {}
 
+if lit_config.params.get("compatibility_testing", False):
+    config.substitutions.append(("%clangxx", " true "))
+    config.substitutions.append(("%clang", " true "))
+else:
+    config.substitutions.append(
+        (
+            "%clangxx",
+            " " + config.dpcpp_compiler + " " + config.cxx_flags + " " + arch_flag,
+        )
+    )
+    config.substitutions.append(
+        ("%clang", " " + config.dpcpp_compiler + " " + config.c_flags)
+    )
+
 # Set timeout for a single test
 try:
     import psutil