Skip to content

Commit a5c7d88

Browse files
authored
[UR][CUDA] Fix prefetch/advise early exit (#18395)
When we exit early we still need to return a proper event. This fixes segfaults in the UR CTS, the HIP version of these entry points was already handling this properly, so remove the known failures for both.
1 parent 6499330 commit a5c7d88

File tree

3 files changed

+69
-68
lines changed

3 files changed

+69
-68
lines changed

unified-runtime/source/adapters/cuda/enqueue.cpp

+68-58
Original file line numberDiff line numberDiff line change
@@ -1603,24 +1603,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
16031603
UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE);
16041604
ur_device_handle_t Device = hQueue->getDevice();
16051605

1606-
// Certain cuda devices and Windows do not have support for some Unified
1607-
// Memory features. cuMemPrefetchAsync requires concurrent memory access
1608-
// for managed memory. Therefore, ignore prefetch hint if concurrent managed
1609-
// memory access is not available.
1610-
if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
1611-
UR_LOG(WARN, "Prefetch hint ignored as device does not support "
1612-
"concurrent managed access.");
1613-
return UR_RESULT_SUCCESS;
1614-
}
1615-
1616-
unsigned int IsManaged;
1617-
UR_CHECK_ERROR(cuPointerGetAttribute(
1618-
&IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem));
1619-
if (!IsManaged) {
1620-
UR_LOG(WARN, "Prefetch hint ignored as prefetch only works with USM.");
1621-
return UR_RESULT_SUCCESS;
1622-
}
1623-
16241606
ur_result_t Result = UR_RESULT_SUCCESS;
16251607
std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
16261608

@@ -1635,12 +1617,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
16351617
UR_COMMAND_MEM_BUFFER_COPY, hQueue, CuStream));
16361618
UR_CHECK_ERROR(EventPtr->start());
16371619
}
1620+
1621+
// Ensure we release the event even on early exit
1622+
OnScopeExit ReleaseEvent([&]() {
1623+
if (phEvent) {
1624+
UR_CHECK_ERROR(EventPtr->record());
1625+
*phEvent = EventPtr.release();
1626+
}
1627+
});
1628+
1629+
// Certain cuda devices and Windows do not have support for some Unified
1630+
// Memory features. cuMemPrefetchAsync requires concurrent memory access
1631+
// for managed memory. Therefore, ignore prefetch hint if concurrent managed
1632+
// memory access is not available.
1633+
if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
1634+
UR_LOG(WARN, "Prefetch hint ignored as device does not support "
1635+
"concurrent managed access.");
1636+
return UR_RESULT_SUCCESS;
1637+
}
1638+
1639+
unsigned int IsManaged;
1640+
UR_CHECK_ERROR(cuPointerGetAttribute(
1641+
&IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem));
1642+
if (!IsManaged) {
1643+
UR_LOG(WARN, "Prefetch hint ignored as prefetch only works with USM.");
1644+
return UR_RESULT_SUCCESS;
1645+
}
1646+
16381647
UR_CHECK_ERROR(
16391648
cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream));
1640-
if (phEvent) {
1641-
UR_CHECK_ERROR(EventPtr->record());
1642-
*phEvent = EventPtr.release();
1643-
}
16441649
} catch (ur_result_t Err) {
16451650
Result = Err;
16461651
}
@@ -1656,37 +1661,6 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
16561661
&PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem));
16571662
UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE);
16581663

1659-
// Certain cuda devices and Windows do not have support for some Unified
1660-
// Memory features. Passing CU_MEM_ADVISE_SET/CLEAR_PREFERRED_LOCATION and
1661-
// to cuMemAdvise on a GPU device requires the GPU device to report a non-zero
1662-
// value for CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. Therfore, ignore
1663-
// memory advise if concurrent managed memory access is not available.
1664-
if ((advice & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION) ||
1665-
(advice & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION) ||
1666-
(advice & UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE) ||
1667-
(advice & UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE) ||
1668-
(advice & UR_USM_ADVICE_FLAG_DEFAULT)) {
1669-
ur_device_handle_t Device = hQueue->getDevice();
1670-
if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
1671-
UR_LOG(WARN, "Mem advise ignored as device does not support "
1672-
"concurrent managed access.");
1673-
return UR_RESULT_SUCCESS;
1674-
}
1675-
1676-
// TODO: If ptr points to valid system-allocated pageable memory we should
1677-
// check that the device also has the
1678-
// CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS property.
1679-
}
1680-
1681-
unsigned int IsManaged;
1682-
UR_CHECK_ERROR(cuPointerGetAttribute(
1683-
&IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem));
1684-
if (!IsManaged) {
1685-
UR_LOG(WARN,
1686-
"Memory advice ignored as memory advices only works with USM.");
1687-
return UR_RESULT_SUCCESS;
1688-
}
1689-
16901664
ur_result_t Result = UR_RESULT_SUCCESS;
16911665
std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
16921666

@@ -1700,6 +1674,47 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
17001674
UR_CHECK_ERROR(EventPtr->start());
17011675
}
17021676

1677+
// Ensure we release the event even on early exit
1678+
OnScopeExit ReleaseEvent([&]() {
1679+
if (phEvent) {
1680+
UR_CHECK_ERROR(EventPtr->record());
1681+
*phEvent = EventPtr.release();
1682+
}
1683+
});
1684+
1685+
// Certain cuda devices and Windows do not have support for some Unified
1686+
// Memory features. Passing CU_MEM_ADVISE_SET/CLEAR_PREFERRED_LOCATION and
1687+
// to cuMemAdvise on a GPU device requires the GPU device to report a
1688+
// non-zero value for CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS.
1689+
// Therfore, ignore memory advise if concurrent managed memory access is not
1690+
// available.
1691+
if ((advice & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION) ||
1692+
(advice & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION) ||
1693+
(advice & UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE) ||
1694+
(advice & UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE) ||
1695+
(advice & UR_USM_ADVICE_FLAG_DEFAULT)) {
1696+
ur_device_handle_t Device = hQueue->getDevice();
1697+
if (!getAttribute(Device,
1698+
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
1699+
UR_LOG(WARN, "Mem advise ignored as device does not support "
1700+
"concurrent managed access.");
1701+
return UR_RESULT_SUCCESS;
1702+
}
1703+
1704+
// TODO: If ptr points to valid system-allocated pageable memory we should
1705+
// check that the device also has the
1706+
// CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS property.
1707+
}
1708+
1709+
unsigned int IsManaged;
1710+
UR_CHECK_ERROR(cuPointerGetAttribute(
1711+
&IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem));
1712+
if (!IsManaged) {
1713+
UR_LOG(WARN,
1714+
"Memory advice ignored as memory advices only works with USM.");
1715+
return UR_RESULT_SUCCESS;
1716+
}
1717+
17031718
if (advice & UR_USM_ADVICE_FLAG_DEFAULT) {
17041719
UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
17051720
CU_MEM_ADVISE_UNSET_READ_MOSTLY,
@@ -1714,11 +1729,6 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
17141729
Result = setCuMemAdvise((CUdeviceptr)pMem, size, advice,
17151730
hQueue->getDevice()->get());
17161731
}
1717-
1718-
if (phEvent) {
1719-
UR_CHECK_ERROR(EventPtr->record());
1720-
*phEvent = EventPtr.release();
1721-
}
17221732
} catch (ur_result_t err) {
17231733
Result = err;
17241734
} catch (...) {

unified-runtime/test/conformance/enqueue/urEnqueueUSMAdvise.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ UUR_DEVICE_TEST_SUITE_WITH_PARAM(
2525
uur::deviceTestWithParamPrinter<ur_usm_advice_flag_t>);
2626

2727
TEST_P(urEnqueueUSMAdviseWithParamTest, Success) {
28-
UUR_KNOWN_FAILURE_ON(uur::HIP{}, uur::CUDA{});
29-
3028
ur_event_handle_t advise_event = nullptr;
3129
ur_result_t result = urEnqueueUSMAdvise(queue, ptr, allocation_size,
3230
getParam(), &advise_event);
@@ -54,8 +52,6 @@ struct urEnqueueUSMAdviseTest : uur::urUSMDeviceAllocTest {
5452
UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEnqueueUSMAdviseTest);
5553

5654
TEST_P(urEnqueueUSMAdviseTest, MultipleParamsSuccess) {
57-
UUR_KNOWN_FAILURE_ON(uur::HIP{}, uur::CUDA{});
58-
5955
ur_result_t result = urEnqueueUSMAdvise(queue, ptr, allocation_size,
6056
UR_USM_ADVICE_FLAG_SET_READ_MOSTLY |
6157
UR_USM_ADVICE_FLAG_BIAS_CACHED,

unified-runtime/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp

+1-6
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,6 @@ UUR_DEVICE_TEST_SUITE_WITH_PARAM(
2424

2525
TEST_P(urEnqueueUSMPrefetchWithParamTest, Success) {
2626
UUR_KNOWN_FAILURE_ON(
27-
// HIP and CUDA return UR_RESULT_ERROR_ADAPTER_SPECIFIC to issue a
28-
// warning about the hint being unsupported. The same applies for
29-
// subsequent fails in this file.
30-
// TODO: codify this in the spec and account for it in the CTS.
31-
uur::HIP{}, uur::CUDA{},
3227
// The setup for the parent fixture does a urQueueFlush, which isn't
3328
// supported by native cpu. Again same goes for subsequent fails in
3429
// this file.
@@ -53,7 +48,7 @@ TEST_P(urEnqueueUSMPrefetchWithParamTest, Success) {
5348
* executing.
5449
*/
5550
TEST_P(urEnqueueUSMPrefetchWithParamTest, CheckWaitEvent) {
56-
UUR_KNOWN_FAILURE_ON(uur::HIP{}, uur::CUDA{}, uur::NativeCPU{});
51+
UUR_KNOWN_FAILURE_ON(uur::NativeCPU{});
5752

5853
ur_queue_handle_t fill_queue;
5954
ASSERT_SUCCESS(urQueueCreate(context, device, nullptr, &fill_queue));

0 commit comments

Comments
 (0)