Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

@shared in DPCPP incorrect output #237

Open
Kyrylo-Mazhara opened this issue May 28, 2024 · 0 comments
Open

@shared in DPCPP incorrect output #237

Kyrylo-Mazhara opened this issue May 28, 2024 · 0 comments

Comments

@Kyrylo-Mazhara
Copy link

Kyrylo-Mazhara commented May 28, 2024

In the example below, the converted @shared is displayed in the same place where it was originally declared. While when running the test code through OCCA, the converted @shared is printed before loop.

Input:

@kernel void test_kern() {
    @tile(4, @outer) for (int i = 0; i < 10; ++i) {
        @shared int shm[10];
        @tile(4, @inner, @inner) for (int j = 0; j < 10; ++j) {
            shm[j] = j;
        }
    }
}

Output:

#include <CL/sycl.hpp>
using namespace sycl;

extern "C" [[sycl::reqd_work_group_size(1, 3, 4)]] void _occa_test_kern_0(sycl::queue *queue_,
                                  sycl::nd_range<3> *range_) {
  queue_->submit([&](sycl::handler &handler_) {
    handler_.parallel_for(*range_, [=](sycl::nd_item<3> item_) {
      {
        int _occa_tiled_i = (0) + ((4) * item_.get_group(2));
        for (int i = _occa_tiled_i; i < (_occa_tiled_i + (4)); ++i) {
          if (i < 10) {
   ->       auto &shm =
                *(sycl::ext::oneapi::group_local_memory_for_overwrite<int[10]>(
                    item_.get_group()));
            {
              int _occa_tiled_j = (0) + ((4) * item.get_local_id(1));
              {
                int j = _occa_tiled_j + item.get_local_id(2);
                if (j < 10) {
                  shm[j] = j;
                }
              }
            }
            item_.barrier(sycl::access::fence_space::local_space);
          }
        }
      }
    });
  });
}

Expected output:

#include <CL/sycl.hpp>
 using namespace sycl;

extern "C" void _occa_test_kern_0(sycl::queue * queue_,
                                  sycl::nd_range<3> * range_) {
  queue_->submit(
    [&](sycl::handler & handler_) {
      handler_.parallel_for(
        *range_,
        [=](sycl::nd_item<3> item_)  {
 ->       auto & shm = *(sycl::ext::oneapi::group_local_memory_for_overwrite<int[10]>(item_.get_group()));
          {
            int _occa_tiled_i = 0 + (4 * item_.get_group(2));
            for (int i = _occa_tiled_i; i < (_occa_tiled_i + 4); ++i) {
              if (i < 10) {
                {
                  int _occa_tiled_j = 0 + (4 * item_.get_local_id(1));
                  {
                    int j = _occa_tiled_j + item_.get_local_id(2);
                    if (j < 10) {
                      shm[j] = j;
                    }
                  }
                }
                item_.barrier(sycl::access::fence_space::local_space);
              }
            }
          }
        }
      );
    }
  );
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant