@@ -1997,16 +1997,16 @@ void instrumentationAddExtraKernelMetadata(
1997
1997
std::mutex *KernelMutex = nullptr ;
1998
1998
const KernelArgMask *EliminatedArgMask = nullptr ;
1999
1999
2000
- if (auto SyclKernelImpl = KernelBundleImplPtr
2001
- ? KernelBundleImplPtr->tryGetKernel (
2002
- KernelName, KernelBundleImplPtr)
2003
- : std::shared_ptr<kernel_impl>{nullptr }) {
2004
- EliminatedArgMask = SyclKernelImpl->getKernelArgMask ();
2005
- Program = SyclKernelImpl->getDeviceImage ()->get_ur_program_ref ();
2006
- } else if (nullptr != SyclKernel) {
2000
+ if (nullptr != SyclKernel) {
2007
2001
Program = SyclKernel->getProgramRef ();
2008
2002
if (!SyclKernel->isCreatedFromSource ())
2009
2003
EliminatedArgMask = SyclKernel->getKernelArgMask ();
2004
+ } else if (auto SyclKernelImpl =
2005
+ KernelBundleImplPtr ? KernelBundleImplPtr->tryGetKernel (
2006
+ KernelName, KernelBundleImplPtr)
2007
+ : std::shared_ptr<kernel_impl>{nullptr }) {
2008
+ EliminatedArgMask = SyclKernelImpl->getKernelArgMask ();
2009
+ Program = SyclKernelImpl->getDeviceImage ()->get_ur_program_ref ();
2010
2010
} else if (Queue) {
2011
2011
// NOTE: Queue can be null when kernel is directly enqueued to a command
2012
2012
// buffer
@@ -2521,17 +2521,17 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl,
2521
2521
const KernelArgMask *EliminatedArgMask = nullptr ;
2522
2522
auto &KernelBundleImplPtr = CommandGroup.MKernelBundle ;
2523
2523
2524
- if (auto SyclKernelImpl =
2525
- KernelBundleImplPtr
2526
- ? KernelBundleImplPtr->tryGetKernel (CommandGroup.MKernelName ,
2527
- KernelBundleImplPtr)
2528
- : std::shared_ptr<kernel_impl>{nullptr }) {
2524
+ if (auto Kernel = CommandGroup.MSyclKernel ; Kernel != nullptr ) {
2525
+ UrKernel = Kernel->getHandleRef ();
2526
+ EliminatedArgMask = Kernel->getKernelArgMask ();
2527
+ } else if (auto SyclKernelImpl =
2528
+ KernelBundleImplPtr
2529
+ ? KernelBundleImplPtr->tryGetKernel (
2530
+ CommandGroup.MKernelName , KernelBundleImplPtr)
2531
+ : std::shared_ptr<kernel_impl>{nullptr }) {
2529
2532
UrKernel = SyclKernelImpl->getHandleRef ();
2530
2533
DeviceImageImpl = SyclKernelImpl->getDeviceImage ();
2531
2534
EliminatedArgMask = SyclKernelImpl->getKernelArgMask ();
2532
- } else if (auto Kernel = CommandGroup.MSyclKernel ; Kernel != nullptr ) {
2533
- UrKernel = Kernel->getHandleRef ();
2534
- EliminatedArgMask = Kernel->getKernelArgMask ();
2535
2535
} else {
2536
2536
ur_program_handle_t UrProgram = nullptr ;
2537
2537
std::tie (UrKernel, std::ignore, EliminatedArgMask, UrProgram) =
@@ -2678,18 +2678,7 @@ void enqueueImpKernel(
2678
2678
std::shared_ptr<kernel_impl> SyclKernelImpl;
2679
2679
std::shared_ptr<device_image_impl> DeviceImageImpl;
2680
2680
2681
- if ((SyclKernelImpl = KernelBundleImplPtr
2682
- ? KernelBundleImplPtr->tryGetKernel (
2683
- KernelName, KernelBundleImplPtr)
2684
- : std::shared_ptr<kernel_impl>{nullptr })) {
2685
- Kernel = SyclKernelImpl->getHandleRef ();
2686
- DeviceImageImpl = SyclKernelImpl->getDeviceImage ();
2687
-
2688
- Program = DeviceImageImpl->get_ur_program_ref ();
2689
-
2690
- EliminatedArgMask = SyclKernelImpl->getKernelArgMask ();
2691
- KernelMutex = SyclKernelImpl->getCacheMutex ();
2692
- } else if (nullptr != MSyclKernel) {
2681
+ if (nullptr != MSyclKernel) {
2693
2682
assert (MSyclKernel->get_info <info::kernel::context>() ==
2694
2683
Queue->get_context ());
2695
2684
Kernel = MSyclKernel->getHandleRef ();
@@ -2703,6 +2692,17 @@ void enqueueImpKernel(
2703
2692
// their duplication in such cases.
2704
2693
KernelMutex = &MSyclKernel->getNoncacheableEnqueueMutex ();
2705
2694
EliminatedArgMask = MSyclKernel->getKernelArgMask ();
2695
+ } else if ((SyclKernelImpl = KernelBundleImplPtr
2696
+ ? KernelBundleImplPtr->tryGetKernel (
2697
+ KernelName, KernelBundleImplPtr)
2698
+ : std::shared_ptr<kernel_impl>{nullptr })) {
2699
+ Kernel = SyclKernelImpl->getHandleRef ();
2700
+ DeviceImageImpl = SyclKernelImpl->getDeviceImage ();
2701
+
2702
+ Program = DeviceImageImpl->get_ur_program_ref ();
2703
+
2704
+ EliminatedArgMask = SyclKernelImpl->getKernelArgMask ();
2705
+ KernelMutex = SyclKernelImpl->getCacheMutex ();
2706
2706
} else {
2707
2707
std::tie (Kernel, KernelMutex, EliminatedArgMask, Program) =
2708
2708
detail::ProgramManager::getInstance ().getOrCreateKernel (
@@ -3511,6 +3511,18 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
3511
3511
const AdapterPtr &Adapter = MQueue->getAdapter ();
3512
3512
if (MEvent != nullptr )
3513
3513
MEvent->setHostEnqueueTime ();
3514
+ // User can specify explicit dependencies via depends_on call that we should
3515
+ // honor here. It is very important for cross queue dependencies. We wait
3516
+ // them explicitly since barrier w/o wait list waits for all commands
3517
+ // submitted before and we can't add new dependencies to its wait list.
3518
+ // Output event for wait operation is not requested since barrier is
3519
+ // submitted immediately after and should synchronize it internally.
3520
+ if (RawEvents.size ()) {
3521
+ auto Result = Adapter->call_nocheck <UrApiKind::urEnqueueEventsWait>(
3522
+ MQueue->getHandleRef (), RawEvents.size (), &RawEvents[0 ], nullptr );
3523
+ if (Result != UR_RESULT_SUCCESS)
3524
+ return Result;
3525
+ }
3514
3526
if (auto Result =
3515
3527
Adapter->call_nocheck <UrApiKind::urEnqueueEventsWaitWithBarrierExt>(
3516
3528
MQueue->getHandleRef (), &Properties, 0 , nullptr , Event);
@@ -3545,6 +3557,12 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
3545
3557
const AdapterPtr &Adapter = MQueue->getAdapter ();
3546
3558
if (MEvent != nullptr )
3547
3559
MEvent->setHostEnqueueTime ();
3560
+ // User can specify explicit dependencies via depends_on call that we should
3561
+ // honor here. It is very important for cross queue dependencies. Adding
3562
+ // them to the barrier wait list since barrier w/ wait list waits only for
3563
+ // the events provided in wait list and we can just extend the list.
3564
+ UrEvents.insert (UrEvents.end (), RawEvents.begin (), RawEvents.end ());
3565
+
3548
3566
if (auto Result =
3549
3567
Adapter->call_nocheck <UrApiKind::urEnqueueEventsWaitWithBarrierExt>(
3550
3568
MQueue->getHandleRef (), &Properties, UrEvents.size (),
0 commit comments