Skip to content

Commit f9a7cc8

Browse files
authored
[SYCL][COMPAT] Fix error on headers, add helloworld test (#18401)
This PR fixes an error on the SYCL compat headers that caused build to fail. The error manifests when building the basic hello world from the documentation. To prevent the basic hello world from failing again, this PR adds an e2e for the basic hello world and updates the documentation to match the updated hello world source code.
1 parent c568f2e commit f9a7cc8

File tree

3 files changed

+145
-111
lines changed

3 files changed

+145
-111
lines changed

sycl/doc/syclcompat/README.md

+2-110
Original file line numberDiff line numberDiff line change
@@ -3474,116 +3474,8 @@ public:
34743474
34753475
## Sample Code
34763476
3477-
Below is a simple linear algebra sample, which computes `y = mx + b` implemented
3478-
using this library:
3479-
3480-
``` c++
3481-
#include <cassert>
3482-
#include <iostream>
3483-
3484-
#include <syclcompat.hpp>
3485-
#include <sycl/sycl.hpp>
3486-
3487-
/**
3488-
* Slope intercept form of a straight line equation: Y = m * X + b
3489-
*/
3490-
template <int BLOCK_SIZE>
3491-
void slope_intercept(float *Y, float *X, float m, float b, size_t n) {
3492-
3493-
// Block index
3494-
size_t bx = syclcompat::work_group_id::x();
3495-
// Thread index
3496-
size_t tx = syclcompat::local_id::x();
3497-
3498-
size_t i = bx * BLOCK_SIZE + tx;
3499-
// or i = syclcompat::global_id::x();
3500-
if (i < n)
3501-
Y[i] = m * X[i] + b;
3502-
}
3503-
3504-
void check_memory(void *ptr, std::string msg) {
3505-
if (ptr == nullptr) {
3506-
std::cerr << "Failed to allocate memory: " << msg << std::endl;
3507-
exit(EXIT_FAILURE);
3508-
}
3509-
}
3510-
3511-
/**
3512-
* Program main
3513-
*/
3514-
int main(int argc, char **argv) {
3515-
std::cout << "Simple Kernel example" << std::endl;
3516-
3517-
constexpr size_t n_points = 32;
3518-
constexpr float m = 1.5f;
3519-
constexpr float b = 0.5f;
3520-
3521-
int block_size = 32;
3522-
if (block_size > syclcompat::get_current_device()
3523-
.get_info<sycl::info::device::max_work_group_size>())
3524-
block_size = 16;
3525-
3526-
std::cout << "block_size = " << block_size << ", n_points = " << n_points
3527-
<< std::endl;
3528-
3529-
// Allocate host memory for vectors X and Y
3530-
size_t mem_size = n_points * sizeof(float);
3531-
float *h_X = (float *)syclcompat::malloc_host(mem_size);
3532-
float *h_Y = (float *)syclcompat::malloc_host(mem_size);
3533-
check_memory(h_X, "h_X allocation failed.");
3534-
check_memory(h_Y, "h_Y allocation failed.");
3535-
3536-
// Alternative templated allocation for the expected output
3537-
float *h_expected = syclcompat::malloc_host<float>(n_points);
3538-
check_memory(h_expected, "Not enough for h_expected.");
3539-
3540-
// Initialize host memory & expected output
3541-
for (size_t i = 0; i < n_points; i++) {
3542-
h_X[i] = i + 1;
3543-
h_expected[i] = m * h_X[i] + b;
3544-
}
3545-
3546-
// Allocate device memory
3547-
float *d_X = (float *)syclcompat::malloc(mem_size);
3548-
float *d_Y = (float *)syclcompat::malloc(mem_size);
3549-
check_memory(d_X, "d_X allocation failed.");
3550-
check_memory(d_Y, "d_Y allocation failed.");
3551-
3552-
// copy host memory to device
3553-
syclcompat::memcpy(d_X, h_X, mem_size);
3554-
3555-
size_t threads = block_size;
3556-
size_t grid = n_points / block_size;
3557-
3558-
std::cout << "Computing result using SYCL Kernel... ";
3559-
if (block_size == 16) {
3560-
syclcompat::launch<slope_intercept<16>>(grid, threads, d_Y, d_X, m, b,
3561-
n_points);
3562-
} else {
3563-
syclcompat::launch<slope_intercept<32>>(grid, threads, d_Y, d_X, m, b,
3564-
n_points);
3565-
}
3566-
syclcompat::wait();
3567-
std::cout << "DONE" << std::endl;
3568-
3569-
// Async copy result from device to host
3570-
syclcompat::memcpy_async(h_Y, d_Y, mem_size).wait();
3571-
3572-
// Check output
3573-
for (size_t i = 0; i < n_points; i++) {
3574-
assert(h_Y[i] - h_expected[i] < 1e-6);
3575-
}
3576-
3577-
// Clean up memory
3578-
syclcompat::free(h_X);
3579-
syclcompat::free(h_Y);
3580-
syclcompat::free(h_expected);
3581-
syclcompat::free(d_X);
3582-
syclcompat::free(d_Y);
3583-
3584-
return 0;
3585-
}
3586-
```
3477+
The file [helloworld.cpp](../../test-e2e/syclcompat/helloworld.cpp) contains
3478+
a simple example which computes `y = mx + b` implemented using this library.
35873479
35883480
## Maintainers
35893481

sycl/include/syclcompat/traits.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ template <int Dim> struct range_to_item_map<sycl::nd_range<Dim>> {
8787
using ItemT = sycl::nd_item<Dim>;
8888
};
8989
template <int Dim> struct range_to_item_map<sycl::range<Dim>> {
90-
using ItemT = sycl::item<Dim>;
90+
using ItemT = sycl::item<Dim, false>;
9191
};
9292

9393
template <typename T>
+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/***************************************************************************
2+
*
3+
* Copyright (C) Codeplay Software Ltd.
4+
*
5+
* Part of the LLVM Project, under the Apache License v2.0 with LLVM
6+
* Exceptions. See https://llvm.org/LICENSE.txt for license information.
7+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*
15+
* SYCLcompat API
16+
*
17+
* helloworld.cpp
18+
*
19+
* Description:
20+
* Checks that the SYCLcompat example program compiles and runs
21+
**************************************************************************/
22+
23+
// RUN: %{build} -o %t.out
24+
// RUN: %{run} %t.out
25+
26+
#include <sycl/detail/core.hpp>
27+
28+
// The example uses specific headers but the user can
29+
// simple include <syclcompat/syclcompat.hpp> to get all the
30+
// functionality with a single header
31+
32+
#include <syclcompat/device.hpp>
33+
#include <syclcompat/id_query.hpp>
34+
#include <syclcompat/launch.hpp>
35+
#include <syclcompat/memory.hpp>
36+
37+
#include <cstdlib>
38+
#include <iostream>
39+
40+
#define CHECK_MEMORY(ptr) \
41+
if ((ptr) == nullptr) { \
42+
std::cerr << "Failed to allocate memory: " << (#ptr) << "\n"; \
43+
exit(EXIT_FAILURE); \
44+
}
45+
46+
/**
47+
* Slope intercept form of a straight line equation: Y = m * X + b
48+
*/
49+
template <int BLOCK_SIZE>
50+
void slope_intercept(float *Y, float *X, float m, float b, size_t n) {
51+
52+
// Block index
53+
size_t bx = syclcompat::work_group_id::x();
54+
// Thread index
55+
size_t tx = syclcompat::local_id::x();
56+
57+
size_t i = bx * BLOCK_SIZE + tx;
58+
// or i = syclcompat::global_id::x();
59+
if (i < n)
60+
Y[i] = m * X[i] + b;
61+
}
62+
63+
/**
64+
* Program main
65+
*/
66+
int main(int argc, char **argv) {
67+
std::cout << "Simple Kernel example" << "\n";
68+
69+
constexpr size_t n_points = 32;
70+
constexpr float m = 1.5f;
71+
constexpr float b = 0.5f;
72+
73+
int block_size = 32;
74+
if (block_size > syclcompat::get_current_device()
75+
.get_info<sycl::info::device::max_work_group_size>()) {
76+
block_size = 16;
77+
}
78+
79+
std::cout << "block_size = " << block_size << ", n_points = " << n_points
80+
<< "\n";
81+
82+
// Allocate host memory for vectors X and Y
83+
size_t mem_size = n_points * sizeof(float);
84+
float *h_X = (float *)syclcompat::malloc_host(mem_size);
85+
float *h_Y = (float *)syclcompat::malloc_host(mem_size);
86+
CHECK_MEMORY(h_X);
87+
CHECK_MEMORY(h_Y);
88+
89+
// Alternative templated allocation for the expected output
90+
float *h_expected = syclcompat::malloc_host<float>(n_points);
91+
CHECK_MEMORY(h_expected);
92+
93+
// Initialize host memory & expected output
94+
for (size_t i = 0; i < n_points; i++) {
95+
h_X[i] = i + 1;
96+
h_expected[i] = m * h_X[i] + b;
97+
}
98+
99+
// Allocate device memory
100+
float *d_X = (float *)syclcompat::malloc(mem_size);
101+
float *d_Y = (float *)syclcompat::malloc(mem_size);
102+
CHECK_MEMORY(d_X);
103+
CHECK_MEMORY(d_Y);
104+
105+
// copy host memory to device
106+
syclcompat::memcpy(d_X, h_X, mem_size);
107+
108+
size_t threads = block_size;
109+
size_t grid = n_points / block_size;
110+
111+
std::cout << "Computing result using SYCL Kernel... ";
112+
if (block_size == 16) {
113+
syclcompat::launch<slope_intercept<16>>(grid, threads, d_Y, d_X, m, b,
114+
n_points);
115+
} else {
116+
syclcompat::launch<slope_intercept<32>>(grid, threads, d_Y, d_X, m, b,
117+
n_points);
118+
}
119+
syclcompat::wait();
120+
std::cout << "DONE" << "\n";
121+
122+
// Async copy result from device to host
123+
syclcompat::memcpy_async(h_Y, d_Y, mem_size).wait();
124+
125+
// Check output
126+
for (size_t i = 0; i < n_points; i++) {
127+
if (std::abs(h_Y[i] - h_expected[i]) >= 1e-6) {
128+
std::cerr << "Mismatch at index " << i << ": expected " << h_expected[i]
129+
<< ", but got " << h_Y[i] << "\n";
130+
exit(EXIT_FAILURE);
131+
}
132+
}
133+
134+
// Clean up memory
135+
syclcompat::free(h_X);
136+
syclcompat::free(h_Y);
137+
syclcompat::free(h_expected);
138+
syclcompat::free(d_X);
139+
syclcompat::free(d_Y);
140+
141+
return EXIT_SUCCESS;
142+
}

0 commit comments

Comments
 (0)