diff --git a/svm/README.md b/svm/README.md
index 21a15e03..802373a7 100644
--- a/svm/README.md
+++ b/svm/README.md
@@ -16,6 +16,12 @@ SYCL for Nvidia backend - CC=clang CXX=clang++ cmake -DUSE_NVIDIA_BACKEND=TRUE -
 SYCL for AMD backend - CC=clang CXX=clang++ cmake -DUSE_AMD_BACKEND=TRUE -DUSE_AMD_ARCH={flag for hip i.e 90a for MI250} ../ \ 
 make \
 
+
+### In-order queue
+The CMake option `-DIN_ORDER_QUEUE` adds the `in_order` property to the SYCL
+queue, as well as `discard_events` if available. The default value of this
+option is `ON` for NVIDIA and AMD backends, and `OFF` otherwise.
+
 ### Running the workload
 ./svm_sycl a9a a.m
 
diff --git a/svm/SYCL/CMakeLists.txt b/svm/SYCL/CMakeLists.txt
index d17fd105..bc5f3916 100644
--- a/svm/SYCL/CMakeLists.txt
+++ b/svm/SYCL/CMakeLists.txt
@@ -61,6 +61,12 @@ option(GPU_AOT                 "Build AOT for Intel GPU"      OFF)
 option(USE_NVIDIA_BACKEND      "Build for NVIDIA backend"     OFF)
 option(USE_AMD_BACKEND      "Build for AMD HIP backend"    OFF)
 
+set(IN_ORDER_QUEUE_DEFAULT OFF)
+if (${USE_NVIDIA_BACKEND} OR ${USE_AMD_BACKEND})
+    set(IN_ORDER_QUEUE_DEFAULT ON)
+endif()
+option(IN_ORDER_QUEUE "Use in-order SYCL queue" ${IN_ORDER_QUEUE_DEFAULT})
+
 set(SOURCES
     cuSVM/cuSVMSolver.dp.cpp
     cuSVM/cuSVM_wrapper.cpp
@@ -78,7 +84,8 @@ set(SOURCES
 include_directories(${CMAKE_SOURCE_DIR}
                     ${CMAKE_SOURCE_DIR}/libSVM
                     ${CMAKE_SOURCE_DIR}/cuSVM
-                    ${CMAKE_SOURCE_DIR}/infrastructure)
+                    ${CMAKE_SOURCE_DIR}/infrastructure
+                    ${CMAKE_SOURCE_DIR}/../../infrastructure)
 
 
 # Use either default or user defined CXX flags
@@ -91,6 +98,12 @@ set(DEF_INTEL_GENERAL_CXX_FLAGS  " -O3 -fsycl ")
 set(DEF_NVIDIA_GENERAL_CXX_FLAGS " -O3 -fsycl ")
 set(DEF_AMD_GENERAL_CXX_FLAGS    " -O3 -fsycl -D__HIP_PLATFORM_AMD__ ")
 
+if (${IN_ORDER_QUEUE})
+    string(APPEND DEF_INTEL_GENERAL_CXX_FLAGS " -DIN_ORDER_QUEUE ")
+    string(APPEND DEF_NVIDIA_GENERAL_CXX_FLAGS " -DIN_ORDER_QUEUE ")
+    string(APPEND DEF_AMD_GENERAL_CXX_FLAGS " -DIN_ORDER_QUEUE ")
+endif()
+
 if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
     message(FATAL_ERROR "Both  CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together")
 elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "")
diff --git a/svm/SYCL/common/SYCL.cpp b/svm/SYCL/common/SYCL.cpp
deleted file mode 100755
index 7e2351a4..00000000
--- a/svm/SYCL/common/SYCL.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
-MIT License
-
-Copyright (c) 2015 University of West Bohemia
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-
-/*
-MIT License
-
-Modifications Copyright (C) 2023 Intel Corporation
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-SPDX-License-Identifier: MIT License
-*/
-
-#include "SYCL.h"
-#include "Utilities.h"
-#include <iostream>
-
-#define CPP_MODULE "SYCL"
-#include "Logging.h"
-
-
-cl::sycl::device SYCL::QueryAndGetDevice(std::string const &sDeviceType) 
-{
-    if (sDeviceType.empty()) {
-        LOG("Using default selector()");
-        return cl::sycl::device(cl::sycl::device(cl::sycl::default_selector()));
-    }
-
-    LOG("Attemping to use " << sDeviceType << "_selector()");
-    std::string const sLowerCaseDeviceType(Utility::ToLowerCase(sDeviceType));
-    if (sLowerCaseDeviceType == "cpu") 
-        return cl::sycl::device(cl::sycl::device(cl::sycl::cpu_selector()));
-    else if (sLowerCaseDeviceType == "gpu") 
-       return cl::sycl::device(cl::sycl::device(cl::sycl::gpu_selector()));
-    else if (sLowerCaseDeviceType == "acc") 
-       return cl::sycl::device(cl::sycl::device(cl::sycl::accelerator_selector()));
-    else if (sLowerCaseDeviceType == "host") 
-       return cl::sycl::device(cl::sycl::device(cl::sycl::host_selector()));
-    else if (sLowerCaseDeviceType == "nvidia")
-        return cl::sycl::device(cl::sycl::device(nvidia_selector())); // Custom selector
-    else {
-        LOG_ERROR("Unknown device " << sDeviceType << "_selector()");
-    }
-}
-
-SYCL::SYCL(cl::sycl::device const &SelectedDevice)
-{
-    std::function<void (cl::sycl::exception_list ExceptionList)> lQExceptionHandler([&](cl::sycl::exception_list ExceptionList) { // lambda Q exception handler
-        try { 
-            for (auto const &Exception : ExceptionList) {
-                std::rethrow_exception(Exception);
-            }
-        } catch (cl::sycl::exception e) {
-           LOG_ERROR("SYCL exception caught: " << e.what()); 
-        }});
-
-    m_sySelectedQueue = cl::sycl::queue(SelectedDevice, lQExceptionHandler, {cl::sycl::property::queue::enable_profiling()});
-    DisplayDeviceProperties();
-}
-
-void SYCL::DisplayDeviceProperties() const
-{
-    LOG("SYCL device initialization successful")
-    LOG("\t Using SYCL device         : " << m_sySelectedQueue.get_device().get_info<cl::sycl::info::device::name>() << 
-        " (Driver version "     << m_sySelectedQueue.get_device().get_info<cl::sycl::info::device::driver_version>() << ")");
-
-    std::vector<std::string> const vLDDPaths(Utility::ExtractLDDPathNameFromProcess({"libOpenCL", "libsycl", "libComputeCpp"})); //[0] OCL, [1] Intel's SYCL, [2] ComputeCpp SYCL
-    if (vLDDPaths.empty()) {
-        LOG_WARNING("Unable to print OpenCL and SYCL dependent libraries! The LD_LIBRARY_PATH may be incorrectly set"); // Should not reach to this case
-        return;
-    }
-
-    LOG("\t Using OpenCL library      : " << (!vLDDPaths[0].empty() ? vLDDPaths[0] : "WARNING! OpenCL library not found!"));
-   
-    if (!vLDDPaths[1].empty())
-       LOG("\t Using Intel's SYCL library: " << vLDDPaths[1]);
-
-    if (!vLDDPaths[2].empty())
-        LOG("\t Found ComputeCPP library  : " << vLDDPaths[2]);
-}
-
diff --git a/svm/SYCL/common/SYCL.h b/svm/SYCL/common/SYCL.h
deleted file mode 100755
index 46a899f0..00000000
--- a/svm/SYCL/common/SYCL.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
-MIT License
-
-Copyright (c) 2015 University of West Bohemia
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-
-/*
-MIT License
-
-Modifications Copyright (C) 2023 Intel Corporation
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-SPDX-License-Identifier: MIT License
-*/
-
-#ifndef SYCL_CLASS_H
-#define SYCL_CLASS_H
-
-#include <sycl/sycl.hpp>
-#include <vector>
-
-class SYCL
-{
-private:
-    cl::sycl::queue m_sySelectedQueue;
-    cl::sycl::event m_syEventQueue;
-
-    cl::sycl::platform GetPlatform(unsigned int                  const  uiPlatformIndex) const;
-    cl::sycl::device   GetDevice  (std::string                   const &sDeviceType)     const;
-    cl::sycl::device   GetDevice  (std::vector<cl::sycl::device> const &vSyclDevices, 
-                                   unsigned int                  const uiDeviceIndex)    const;
-    
-    std::string GetDeviceType(cl::sycl::device const &syclDevice) const;
-
-    void DisplayDeviceProperties() const;
-public:
-    // NVIDIA GPU selector
-    class nvidia_selector : public cl::sycl::device_selector
-    {
-        virtual int operator()(cl::sycl::device const &SYCLDevice) const override 
-        {
-            if (!SYCLDevice.is_gpu())
-                return -1;
-
-            std::string const sVendor(SYCLDevice.get_info<cl::sycl::info::device::vendor>());
-            return sVendor.find("NVIDIA") != std::string::npos ? 1000 : -1;
-        }
-    };
-
-    explicit SYCL(cl::sycl::device const &Device);
-            ~SYCL(){}
-
-    SYCL           (SYCL const &RHS) = delete; 
-    SYCL &operator=(SYCL const &RHS) = delete;
-
-    static cl::sycl::device QueryAndGetDevice(std::string const &sDeviceType);
-
-    cl::sycl::queue &GetQueue()      { return m_sySelectedQueue; }
-    cl::sycl::event &GetEventQueue() { return m_syEventQueue;    }
-};
-
-#endif
diff --git a/svm/SYCL/cuSVM/cuSVMSolver.dp.cpp b/svm/SYCL/cuSVM/cuSVMSolver.dp.cpp
index 57232293..e38ea9df 100644
--- a/svm/SYCL/cuSVM/cuSVMSolver.dp.cpp
+++ b/svm/SYCL/cuSVM/cuSVMSolver.dp.cpp
@@ -68,7 +68,7 @@ SPDX-License-Identifier: MIT License
 #include <vector>
 #include <chrono>
 #include "CommandLineParser.h"
-//#include "SYCL.h"
+#include "SYCL.h"
 
 #ifdef USE_CUBLAS
 #include <sycl/backend/cuda.hpp>
@@ -487,20 +487,16 @@ float *d_SelfDotProd,const int& m,const int& n,const int &nbrCtas,const int& thr
     #if USE_CUBLAS
         cublasHandle_t handle;
         CHECK_ERROR(cublasCreate(&handle)); 
+        CUstream cuStream;
 
-    q_ct1.submit([&](sycl::handler &cgh) {
-        //auto d_A = b_A.get_access<sycl::access::mode::read_write>(cgh);
-        cgh.host_task([=](sycl::interop_handle ih) {
+        SYCL::ExecNativeCommand(q_ct1, [=, &cuStream](sycl::interop_handle ih) {
             cuCtxSetCurrent(ih.get_native_context<sycl::backend::ext_oneapi_cuda>());
-            auto cuStream = ih.get_native_queue<sycl::backend::ext_oneapi_cuda>();
+            cuStream = ih.template get_native_queue<sycl::backend::ext_oneapi_cuda>();
             cublasSetStream(handle, cuStream);
             constexpr float ALPHA = 1.0f;
             constexpr float BETA = 0.0f;
             CHECK_ERROR(cublasSgemv (handle, CUBLAS_OP_N, m, n, &ALPHA, d_x, m, d_Kernel_InterRow, 1, &BETA, d_KernelDotProd, 1));
-            cudaStreamSynchronize(cuStream);
-            //cudaDeviceSynchronize(); 
-        });
-    }).wait_and_throw();
+        }, [&cuStream]{cudaStreamSynchronize(cuStream);});
 
     cublasDestroy(handle);
 
@@ -513,17 +509,15 @@ float *d_SelfDotProd,const int& m,const int& n,const int &nbrCtas,const int& thr
   	hipblasCreate(&handle); 
 
          
-  	q_ct1.submit([&](sycl::handler &h){
-
-                h.host_task([=](sycl::interop_handle ih) {
+    SYCL::ExecNativeCommand(q_ct1, [&cuStream, =](sycl::interop_handle ih) {
                       //hipCtxSetCurrent(ih.get_native_context<sycl::backend::ext_oneapi_hip>());
                       //hipblasSetStream(handle, ih.get_native_queue<sycl::backend::ext_oneapi_hip>());
                       hipblasSgemv (handle, HIPBLAS_OP_N, m, n, &ALPHA, d_x, m, d_Kernel_InterRow, 1, &BETA, d_KernelDotProd, 1); 
-                });
-        }).wait_and_throw();
+    }, []{});
     
     #else
-        oneapi::mkl::blas::column_major::gemv(q_ct1,  oneapi::mkl::transpose::nontrans, m, n, 1, d_x, m, d_Kernel_InterRow, 1, 0, d_KernelDotProd, 1).wait();
+        oneapi::mkl::blas::column_major::gemv(q_ct1,  oneapi::mkl::transpose::nontrans, m, n, 1, d_x, m, d_Kernel_InterRow, 1, 0, d_KernelDotProd, 1);
+        q_ct1.wait();
     #endif
     
 
@@ -806,12 +800,18 @@ extern "C" void SVMTrain(float *mexalpha, float *beta, float *y, float *x,
     auto max_wgroup_size = selected_device.get_info<sycl::info::device::max_work_group_size>();
     printf("Workgroup Size: %lu\n", max_wgroup_size);
 
-    #if KERNEL_USE_PROFILE
-       auto propList = sycl::property_list{sycl::property::queue::enable_profiling()};
-       sycl::queue q_ct1(context, selected_device, propList);
-    #else
-       sycl::queue q_ct1(context, selected_device);
-   #endif
+    auto propList = sycl::property_list{
+        #if IN_ORDER_QUEUE
+        sycl::property::queue::in_order{},
+        #ifdef SYCL_EXT_ONEAPI_DISCARD_QUEUE_EVENTS
+        sycl::ext::oneapi::property::queue::discard_events{},
+        #endif
+        #endif
+        #if KERNEL_USE_PROFILE
+        sycl::property::queue::enable_profiling{},
+        #endif
+    };
+    sycl::queue q_ct1(context, selected_device, propList);
 
 
  mxArray *mexelapsed =mxCreateNumericMatrix(1, 1,mxSINGLE_CLASS, mxREAL);
@@ -873,7 +873,8 @@ _kernelwidth*=-1;
 
     mxCUDA_SAFE_CALL((d_x = sycl::malloc_device<float>(m * n * sizeof(float), q_ct1), 0));
     mxCUDA_SAFE_CALL((d_xT = sycl::malloc_device<float>(m * n * sizeof(float), q_ct1), 0));
-    mxCUDA_SAFE_CALL((q_ct1.memcpy(d_x, x, sizeof(float) * n * m).wait(), 0));
+    mxCUDA_SAFE_CALL((q_ct1.memcpy(d_x, x, sizeof(float) * n * m), 0));
+    q_ct1.wait();
  
 
     sycl::range<3> gridtranspose(ceil((float)m / TRANS_BLOCK_DIM),
@@ -937,7 +938,8 @@ _kernelwidth*=-1;
 
     float *xT=new float [n*m];
  
-    mxCUDA_SAFE_CALL((q_ct1.memcpy(xT, d_xT, sizeof(float) * m * n).wait(), 0));
+    mxCUDA_SAFE_CALL((q_ct1.memcpy(xT, d_xT, sizeof(float) * m * n), 0));
+    q_ct1.wait();
     (sycl::free(d_xT, q_ct1), 0);
  
 
@@ -949,10 +951,11 @@ _kernelwidth*=-1;
     mxCUDA_SAFE_CALL((d_SelfDotProd = sycl::malloc_device<float>(m * sizeof(float), q_ct1), 0));
     mxCUDA_SAFE_CALL((d_KernelDotProd = sycl::malloc_device<float>(m * sizeof(float), q_ct1), 0));
     
-    mxCUDA_SAFE_CALL((q_ct1.memcpy(d_y, y, sizeof(float) * m).wait(), 0));
-    mxCUDA_SAFE_CALL((q_ct1.memcpy(d_alpha, h_alpha, sizeof(float) * m).wait(), 0));
-    mxCUDA_SAFE_CALL((q_ct1.memcpy(d_F, h_F, sizeof(float) * m).wait(), 0));
-    mxCUDA_SAFE_CALL((q_ct1.memcpy(d_SelfDotProd, SelfDotProd, sizeof(float) * m).wait(), 0));
+    mxCUDA_SAFE_CALL((q_ct1.memcpy(d_y, y, sizeof(float) * m), 0));
+    mxCUDA_SAFE_CALL((q_ct1.memcpy(d_alpha, h_alpha, sizeof(float) * m), 0));
+    mxCUDA_SAFE_CALL((q_ct1.memcpy(d_F, h_F, sizeof(float) * m), 0));
+    mxCUDA_SAFE_CALL((q_ct1.memcpy(d_SelfDotProd, SelfDotProd, sizeof(float) * m), 0));
+    q_ct1.wait();
  
 
     delete [] SelfDotProd;
@@ -1058,13 +1061,14 @@ _kernelwidth*=-1;
             elapsed_kernel_time += (time_end - time_start)/1e9;
         #endif
         
-        mxCUDA_SAFE_CALL((q_ct1.memcpy(value_inter, d_value_inter, sizeof(float) * numBlocks).wait(), 0));
-        
-        mxCUDA_SAFE_CALL((q_ct1.memcpy(index_inter, d_index_inter, sizeof(int) * numBlocks).wait(), 0));
+        mxCUDA_SAFE_CALL((q_ct1.memcpy(value_inter, d_value_inter, sizeof(float) * numBlocks), 0));
+        mxCUDA_SAFE_CALL((q_ct1.memcpy(index_inter, d_index_inter, sizeof(int) * numBlocks), 0));
+        q_ct1.wait();
   
         CpuMaxInd(BIValue,BIIndex,value_inter,index_inter,numBlocks);
 
-        q_ct1.memcpy(&Fi, d_F + BIIndex, sizeof(float)).wait();
+        q_ct1.memcpy(&Fi, d_F + BIIndex, sizeof(float));
+        q_ct1.wait();
 
         if (iter == (NUM_ITERATIONS - 1))
         {
@@ -1105,7 +1109,8 @@ _kernelwidth*=-1;
             
             
             
-            mxCUDA_SAFE_CALL((q_ct1.memcpy(value_inter, d_value_inter, sizeof(float) * numBlocks).wait(), 0));
+            mxCUDA_SAFE_CALL((q_ct1.memcpy(value_inter, d_value_inter, sizeof(float) * numBlocks), 0));
+            q_ct1.wait();
             
             CpuMin(SJValue,value_inter,numBlocks);
 
@@ -1128,7 +1133,8 @@ _kernelwidth*=-1;
 			
             d_KernelI=d_Kernel_Cache+CacheDiffI*m;
    
-            mxCUDA_SAFE_CALL((q_ct1.memcpy(d_KernelInterRow, xT + BIIndex * n, n * sizeof(float)).wait(), 0));
+            mxCUDA_SAFE_CALL((q_ct1.memcpy(d_KernelInterRow, xT + BIIndex * n, n * sizeof(float)), 0));
+            q_ct1.wait();
 
             RBFKernel(d_KernelI,BIIndex,d_x,d_KernelInterRow,d_KernelDotProd,d_SelfDotProd, m,n,nbrCtas,threadsPerCta, q_ct1, elapsed_kernel_time);
             
@@ -1181,23 +1187,19 @@ _kernelwidth*=-1;
 
 
 
-        mxCUDA_SAFE_CALL((q_ct1.memcpy(value_inter, d_value_inter, sizeof(float) * numBlocks).wait(), 0));
-  
-        mxCUDA_SAFE_CALL((q_ct1.memcpy(index_inter, d_index_inter, sizeof(int) * numBlocks).wait(), 0));
+        mxCUDA_SAFE_CALL((q_ct1.memcpy(value_inter, d_value_inter, sizeof(float) * numBlocks), 0));
+        mxCUDA_SAFE_CALL((q_ct1.memcpy(index_inter, d_index_inter, sizeof(int) * numBlocks), 0));
+        q_ct1.wait();
         
         CpuMaxInd(BJSecondOrderValue,BJIndex,value_inter,index_inter,numBlocks);
 
-        mxCUDA_SAFE_CALL( (q_ct1.memcpy(&Kij, d_KernelI + BJIndex, sizeof(float)).wait(), 0));
-
-        mxCUDA_SAFE_CALL( (q_ct1.memcpy(&alphai, d_alpha + BIIndex, sizeof(float)).wait(), 0));
-        
-        mxCUDA_SAFE_CALL( (q_ct1.memcpy(&alphaj, d_alpha + BJIndex, sizeof(float)).wait(), 0));
-
-        mxCUDA_SAFE_CALL((q_ct1.memcpy(&yi, d_y + BIIndex, sizeof(float)).wait(), 0));
-  
-        mxCUDA_SAFE_CALL((q_ct1.memcpy(&yj, d_y + BJIndex, sizeof(float)).wait(), 0));
-  
-        mxCUDA_SAFE_CALL((q_ct1.memcpy(&Fj, d_F + BJIndex, sizeof(float)).wait(), 0));
+        mxCUDA_SAFE_CALL( (q_ct1.memcpy(&Kij, d_KernelI + BJIndex, sizeof(float)), 0));
+        mxCUDA_SAFE_CALL( (q_ct1.memcpy(&alphai, d_alpha + BIIndex, sizeof(float)), 0));
+        mxCUDA_SAFE_CALL( (q_ct1.memcpy(&alphaj, d_alpha + BJIndex, sizeof(float)), 0));
+        mxCUDA_SAFE_CALL((q_ct1.memcpy(&yi, d_y + BIIndex, sizeof(float)), 0));
+        mxCUDA_SAFE_CALL((q_ct1.memcpy(&yj, d_y + BJIndex, sizeof(float)), 0));
+        mxCUDA_SAFE_CALL((q_ct1.memcpy(&Fj, d_F + BJIndex, sizeof(float)), 0));
+        q_ct1.wait();
 
         oldalphai=alphai;
 		
@@ -1206,9 +1208,9 @@ _kernelwidth*=-1;
 
 		UpdateAlphas(alphai,alphaj,Kij,yi,yj,Fi,Fj,_C,h_taumin);
 
-        mxCUDA_SAFE_CALL((q_ct1.memcpy(d_alpha + BIIndex, &alphai, sizeof(float)).wait(), 0));
-  
-        mxCUDA_SAFE_CALL( (q_ct1.memcpy(d_alpha + BJIndex, &alphaj, sizeof(float)).wait(), 0));
+        mxCUDA_SAFE_CALL((q_ct1.memcpy(d_alpha + BIIndex, &alphai, sizeof(float)), 0));
+        mxCUDA_SAFE_CALL( (q_ct1.memcpy(d_alpha + BJIndex, &alphaj, sizeof(float)), 0));
+        q_ct1.wait();
 
         float deltaalphai = alphai - oldalphai;
 		float deltaalphaj = alphaj - oldalphaj;
@@ -1221,7 +1223,8 @@ _kernelwidth*=-1;
 			d_KernelJ=d_Kernel_Cache+CacheDiffJ*m;
    
 
-            mxCUDA_SAFE_CALL( (q_ct1.memcpy(d_KernelInterRow, xT + BJIndex * n, n * sizeof(float)).wait(), 0));
+            mxCUDA_SAFE_CALL( (q_ct1.memcpy(d_KernelInterRow, xT + BJIndex * n, n * sizeof(float)), 0));
+            q_ct1.wait();
           
             RBFKernel(d_KernelJ,BJIndex,d_x,d_KernelInterRow,d_KernelDotProd, d_SelfDotProd, m,n,nbrCtas,threadsPerCta, q_ct1, elapsed_kernel_time);
 			*(KernelCacheIndices.begin()+CacheDiffJ)=BJIndex;
@@ -1278,7 +1281,8 @@ _kernelwidth*=-1;
 		//cout << "Average Kernel Time per run: " << (elapsed/NUM_OF_RUNS) << " sec.\n";
 	#endif
     
-    q_ct1.memcpy(mexalpha, d_alpha, m * sizeof(float)).wait();
+    q_ct1.memcpy(mexalpha, d_alpha, m * sizeof(float));
+    q_ct1.wait();
 
     stop_ct1 = std::chrono::high_resolution_clock::now();
     
diff --git a/svm/SYCL/infrastructure/SYCL.cpp b/svm/SYCL/infrastructure/SYCL.cpp
deleted file mode 100644
index 634d648a..00000000
--- a/svm/SYCL/infrastructure/SYCL.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
-MIT License
-
-Copyright (c) 2015 University of West Bohemia
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-
-/*
-MIT License
-
-Modifications Copyright (C) 2023 Intel Corporation
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-SPDX-License-Identifier: MIT License
-*/
-
-
-#include "SYCL.h"
-#include "Utilities.h"
-#include <iostream>
-#include <numeric>
-
-#define CPP_MODULE "SYCL"
-#include "Logging.h"
-
-#ifdef USE_INFRASTRUCTURE // Move to test bench base??
-void SYCL::RegisterDeviceSettings(VelocityBench::CommandLineParser &cmdLineParser) 
-{
-    cmdLineParser.AddSetting("--device_type", "SYCL device type to use", false, "", VelocityBench::CommandLineParser::InputType_t::STRING, 1 /* One operand */);
-    LOG("Registered SYCL device querying settings");
-}
-#endif
-
-cl::sycl::device SYCL::QueryAndGetDevice(std::string const &sDeviceType) 
-{
-    if (sDeviceType.empty()) {
-        LOG("Using default selector()");
-        return cl::sycl::device(cl::sycl::device(cl::sycl::default_selector()));
-    }
-
-    LOG("Attemping to use " << sDeviceType << "_selector()");
-    std::string const sLowerCaseDeviceType(Utility::ToLowerCase(sDeviceType));
-    if (sLowerCaseDeviceType == "cpu") 
-        return cl::sycl::device(cl::sycl::cpu_selector());
-    else if (sLowerCaseDeviceType == "gpu") 
-       return cl::sycl::device(cl::sycl::gpu_selector());
-    else if (sLowerCaseDeviceType == "nvidia")
-        return cl::sycl::device(nvidia_selector()); // Custom selector
-    else {
-        LOG_ERROR("Unknown device " << sDeviceType << "_selector()");
-    }
-}
-
-SYCL::SYCL(cl::sycl::device const &SelectedDevice)
-    : m_syclQueue(SelectedDevice,                                       // Device selected
-                  [](cl::sycl::exception_list ExceptionList) {          // Async exception handler
-                  try { 
-                      for (auto const &Exception : ExceptionList) {
-                          std::rethrow_exception(Exception);
-                      }
-                  } catch (cl::sycl::exception e) {
-                     LOG_ERROR("SYCL exception caught: " << e.what()); 
-                  }}
-#ifdef ENABLE_KERNEL_PROFILING
-                  , {cl::sycl::property::queue::enable_profiling()}     // Setting profiling if we want to
-#endif
-                  )
-    , m_syclEvent()
-{
-    LOG("SYCL Queue initialization successful");
-    DisplayDeviceProperties(m_syclQueue.get_device());
-}
-
-void SYCL::DisplayDeviceProperties(cl::sycl::device const &Device)
-{
-    LOG("\t Using SYCL device         : " << Device.get_info<cl::sycl::info::device::name>() << " (Driver version " << Device.get_info<cl::sycl::info::device::driver_version>() << ")");
-    LOG("\t Platform                  : " << Device.get_platform().get_info<cl::sycl::info::platform::name>());
-    LOG("\t Vendor                    : " << Device.get_info<cl::sycl::info::device::vendor>());
-    LOG("\t Max compute units         : " << Device.get_info<cl::sycl::info::device::max_compute_units>());
-#ifdef ENABLE_KERNEL_PROFILING
-    LOG("\t Kernel profiling          : enabled");
-#else
-    LOG("\t Kernel profiling          : disabled");
-#endif
-
-    std::vector<std::string> const vLDDPaths(Utility::ExtractLDDPathNameFromProcess({"libOpenCL", "libsycl", "libComputeCpp", "libze"})); //[0] OCL, [1] Intel's SYCL, [2] ComputeCpp SYCL
-    if (vLDDPaths.empty()) {
-        LOG_WARNING("Unable to print OpenCL and SYCL dependent libraries! The LD_LIBRARY_PATH may be incorrectly set"); // Should not reach to this case
-        return;
-    }
-
-    LOG("\t Using OpenCL library      : " << (!vLDDPaths[0].empty() ? vLDDPaths[0] : "WARNING! OpenCL library not found!"));
-   
-    if (!vLDDPaths[1].empty()) { // Implies we are using Intel's DPC++ compiler
-       LOG("\t Using OneAPI SYCL library : " << vLDDPaths[1]);
-       LOG("\t Using Level Zero library  : " << (!vLDDPaths[3].empty() ? vLDDPaths[3] : "WARNING! Level zero library not found! L0 backend may not be available!"));
-    }
-
-    if (!vLDDPaths[2].empty())
-       LOG("\t Using ComputeCPP library  : " << vLDDPaths[2]);
-}
-
-std::chrono::steady_clock::duration SYCL::ConvertKernelTimeToDuration(cl::sycl::event const &Event)
-{
-    std::chrono::time_point<std::chrono::steady_clock> const tpStart (std::chrono::nanoseconds(Event.template get_profiling_info<cl::sycl::info::event_profiling::command_start>())); 
-    std::chrono::time_point<std::chrono::steady_clock> const tpFinish(std::chrono::nanoseconds(Event.template get_profiling_info<cl::sycl::info::event_profiling::command_end>())); 
-    return std::chrono::steady_clock::duration(tpFinish - tpStart);
-}
diff --git a/svm/SYCL/infrastructure/SYCL.h b/svm/SYCL/infrastructure/SYCL.h
deleted file mode 100644
index a78819e9..00000000
--- a/svm/SYCL/infrastructure/SYCL.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
-MIT License
-
-Copyright (c) 2015 University of West Bohemia
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-
-/*
-MIT License
-
-Modifications Copyright (C) 2023 Intel Corporation
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-SPDX-License-Identifier: MIT License
-*/
-
-
-#ifndef SYCL_CLASS_H
-#define SYCL_CLASS_H
-
-#include "CommandLineParser.h"
-
-#include <sycl/sycl.hpp>
-#include <string>
-#include <vector>
-
-class SYCL
-{
-private:
-    cl::sycl::queue m_syclQueue;
-    cl::sycl::event m_syclEvent;
-public:
-    // NVIDIA GPU selector
-    class nvidia_selector : public cl::sycl::device_selector
-    {
-        virtual int operator()(cl::sycl::device const &SYCLDevice) const override 
-        {
-            if (!SYCLDevice.is_gpu())
-                return -1;
-
-            std::string const sVendor(SYCLDevice.get_info<cl::sycl::info::device::vendor>());
-            return sVendor.find("NVIDIA") != std::string::npos ? 1000 : -1;
-        }
-    };
-
-    explicit SYCL(cl::sycl::device const &Device);
-            ~SYCL(){}
-
-    SYCL           (SYCL const &RHS) = delete; 
-    SYCL &operator=(SYCL const &RHS) = delete;
-
-    cl::sycl::queue &GetQueue() { return m_syclQueue; } 
-    cl::sycl::event &GetEvent() { return m_syclEvent; }
-
-    static void             DisplayDeviceProperties(cl::sycl::device const &Device);
-    static cl::sycl::device QueryAndGetDevice      (std::string const &sDeviceType);
-
-    static std::chrono::steady_clock::duration ConvertKernelTimeToDuration(cl::sycl::event const &Event);
-
-#ifdef USE_INFRASTRUCTURE // Move to testbench base??
-    static void RegisterDeviceSettings (VelocityBench::CommandLineParser &cmdLineParser);
-#endif
-};
-
-#endif