[SYCL][COMPAT] Fix error on headers, add helloworld test (#18401)

Ruyk · web-flow · commit f9a7cc8aaea5 · 2025-05-16T08:24:14.000+02:00
This PR fixes an error on the SYCL compat headers that caused build to
fail.
The error manifests when building the basic hello world from the
documentation.
To prevent the basic hello world from failing again, this PR adds an e2e
for the basic hello world
and updates the documentation to match the updated hello world source
code.
diff --git a/sycl/doc/syclcompat/README.md b/sycl/doc/syclcompat/README.md
@@ -3474,116 +3474,8 @@ public:
 
 ## Sample Code
 
-Below is a simple linear algebra sample, which computes `y = mx + b` implemented
-using this library:
-
-``` c++
-#include <cassert>
-#include <iostream>
-
-#include <syclcompat.hpp>
-#include <sycl/sycl.hpp>
-
-/**
- * Slope intercept form of a straight line equation: Y = m * X + b
- */
-template <int BLOCK_SIZE>
-void slope_intercept(float *Y, float *X, float m, float b, size_t n) {
-
-  // Block index
-  size_t bx = syclcompat::work_group_id::x();
-  // Thread index
-  size_t tx = syclcompat::local_id::x();
-
-  size_t i = bx * BLOCK_SIZE + tx;
-  // or  i = syclcompat::global_id::x();
-  if (i < n)
-    Y[i] = m * X[i] + b;
-}
-
-void check_memory(void *ptr, std::string msg) {
-  if (ptr == nullptr) {
-    std::cerr << "Failed to allocate memory: " << msg << std::endl;
-    exit(EXIT_FAILURE);
-  }
-}
-
-/**
- * Program main
- */
-int main(int argc, char **argv) {
-  std::cout << "Simple Kernel example" << std::endl;
-
-  constexpr size_t n_points = 32;
-  constexpr float m = 1.5f;
-  constexpr float b = 0.5f;
-
-  int block_size = 32;
-  if (block_size > syclcompat::get_current_device()
-                       .get_info<sycl::info::device::max_work_group_size>())
-    block_size = 16;
-
-  std::cout << "block_size = " << block_size << ", n_points = " << n_points
-            << std::endl;
-
-  // Allocate host memory for vectors X and Y
-  size_t mem_size = n_points * sizeof(float);
-  float *h_X = (float *)syclcompat::malloc_host(mem_size);
-  float *h_Y = (float *)syclcompat::malloc_host(mem_size);
-  check_memory(h_X, "h_X allocation failed.");
-  check_memory(h_Y, "h_Y allocation failed.");
-
-  // Alternative templated allocation for the expected output
-  float *h_expected = syclcompat::malloc_host<float>(n_points);
-  check_memory(h_expected, "Not enough for h_expected.");
-
-  // Initialize host memory & expected output
-  for (size_t i = 0; i < n_points; i++) {
-    h_X[i] = i + 1;
-    h_expected[i] = m * h_X[i] + b;
-  }
-
-  // Allocate device memory
-  float *d_X = (float *)syclcompat::malloc(mem_size);
-  float *d_Y = (float *)syclcompat::malloc(mem_size);
-  check_memory(d_X, "d_X allocation failed.");
-  check_memory(d_Y, "d_Y allocation failed.");
-
-  // copy host memory to device
-  syclcompat::memcpy(d_X, h_X, mem_size);
-
-  size_t threads = block_size;
-  size_t grid = n_points / block_size;
-
-  std::cout << "Computing result using SYCL Kernel... ";
-  if (block_size == 16) {
-    syclcompat::launch<slope_intercept<16>>(grid, threads, d_Y, d_X, m, b,
-                                        n_points);
-  } else {
-    syclcompat::launch<slope_intercept<32>>(grid, threads, d_Y, d_X, m, b,
-                                        n_points);
-  }
-  syclcompat::wait();
-  std::cout << "DONE" << std::endl;
-
-  // Async copy result from device to host
-  syclcompat::memcpy_async(h_Y, d_Y, mem_size).wait();
-
-  // Check output
-  for (size_t i = 0; i < n_points; i++) {
-    assert(h_Y[i] - h_expected[i] < 1e-6);
-  }
-
-  // Clean up memory
-  syclcompat::free(h_X);
-  syclcompat::free(h_Y);
-  syclcompat::free(h_expected);
-  syclcompat::free(d_X);
-  syclcompat::free(d_Y);
-
-  return 0;
-}
-```
+The file [helloworld.cpp](../../test-e2e/syclcompat/helloworld.cpp) contains
+a simple example which computes `y = mx + b` implemented using this library.
 
 ## Maintainers
 
diff --git a/sycl/include/syclcompat/traits.hpp b/sycl/include/syclcompat/traits.hpp
@@ -87,7 +87,7 @@ template <int Dim> struct range_to_item_map<sycl::nd_range<Dim>> {
   using ItemT = sycl::nd_item<Dim>;
 };
 template <int Dim> struct range_to_item_map<sycl::range<Dim>> {
-  using ItemT = sycl::item<Dim>;
+  using ItemT = sycl::item<Dim, false>;
 };
 
 template <typename T>
diff --git a/sycl/test-e2e/syclcompat/helloworld.cpp b/sycl/test-e2e/syclcompat/helloworld.cpp
@@ -0,0 +1,142 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+ *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+ *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  SYCLcompat API
+ *
+ *  helloworld.cpp
+ *
+ *  Description:
+ *    Checks that the SYCLcompat example program compiles and runs
+ **************************************************************************/
+
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/detail/core.hpp>
+
+// The example uses specific headers but the user can
+// simple include <syclcompat/syclcompat.hpp> to get all the
+// functionality with a single header
+
+#include <syclcompat/device.hpp>
+#include <syclcompat/id_query.hpp>
+#include <syclcompat/launch.hpp>
+#include <syclcompat/memory.hpp>
+
+#include <cstdlib>
+#include <iostream>
+
+#define CHECK_MEMORY(ptr)                                                      \
+  if ((ptr) == nullptr) {                                                      \
+    std::cerr << "Failed to allocate memory: " << (#ptr) << "\n";              \
+    exit(EXIT_FAILURE);                                                        \
+  }
+
+/**
+ * Slope intercept form of a straight line equation: Y = m * X + b
+ */
+template <int BLOCK_SIZE>
+void slope_intercept(float *Y, float *X, float m, float b, size_t n) {
+
+  // Block index
+  size_t bx = syclcompat::work_group_id::x();
+  // Thread index
+  size_t tx = syclcompat::local_id::x();
+
+  size_t i = bx * BLOCK_SIZE + tx;
+  // or  i = syclcompat::global_id::x();
+  if (i < n)
+    Y[i] = m * X[i] + b;
+}
+
+/**
+ * Program main
+ */
+int main(int argc, char **argv) {
+  std::cout << "Simple Kernel example" << "\n";
+
+  constexpr size_t n_points = 32;
+  constexpr float m = 1.5f;
+  constexpr float b = 0.5f;
+
+  int block_size = 32;
+  if (block_size > syclcompat::get_current_device()
+                       .get_info<sycl::info::device::max_work_group_size>()) {
+    block_size = 16;
+  }
+
+  std::cout << "block_size = " << block_size << ", n_points = " << n_points
+            << "\n";
+
+  // Allocate host memory for vectors X and Y
+  size_t mem_size = n_points * sizeof(float);
+  float *h_X = (float *)syclcompat::malloc_host(mem_size);
+  float *h_Y = (float *)syclcompat::malloc_host(mem_size);
+  CHECK_MEMORY(h_X);
+  CHECK_MEMORY(h_Y);
+
+  // Alternative templated allocation for the expected output
+  float *h_expected = syclcompat::malloc_host<float>(n_points);
+  CHECK_MEMORY(h_expected);
+
+  // Initialize host memory & expected output
+  for (size_t i = 0; i < n_points; i++) {
+    h_X[i] = i + 1;
+    h_expected[i] = m * h_X[i] + b;
+  }
+
+  // Allocate device memory
+  float *d_X = (float *)syclcompat::malloc(mem_size);
+  float *d_Y = (float *)syclcompat::malloc(mem_size);
+  CHECK_MEMORY(d_X);
+  CHECK_MEMORY(d_Y);
+
+  // copy host memory to device
+  syclcompat::memcpy(d_X, h_X, mem_size);
+
+  size_t threads = block_size;
+  size_t grid = n_points / block_size;
+
+  std::cout << "Computing result using SYCL Kernel... ";
+  if (block_size == 16) {
+    syclcompat::launch<slope_intercept<16>>(grid, threads, d_Y, d_X, m, b,
+                                            n_points);
+  } else {
+    syclcompat::launch<slope_intercept<32>>(grid, threads, d_Y, d_X, m, b,
+                                            n_points);
+  }
+  syclcompat::wait();
+  std::cout << "DONE" << "\n";
+
+  // Async copy result from device to host
+  syclcompat::memcpy_async(h_Y, d_Y, mem_size).wait();
+
+  // Check output
+  for (size_t i = 0; i < n_points; i++) {
+    if (std::abs(h_Y[i] - h_expected[i]) >= 1e-6) {
+      std::cerr << "Mismatch at index " << i << ": expected " << h_expected[i]
+                << ", but got " << h_Y[i] << "\n";
+      exit(EXIT_FAILURE);
+    }
+  }
+
+  // Clean up memory
+  syclcompat::free(h_X);
+  syclcompat::free(h_Y);
+  syclcompat::free(h_expected);
+  syclcompat::free(d_X);
+  syclcompat::free(d_Y);
+
+  return EXIT_SUCCESS;
+}