Skip to content

Commit faa2365

Browse files
Seanst98hdelannpmiller
authored
[AsyncAlloc][SYCL][CUDA][Exp] Initial device side implementation for the sycl_ext_oneapi_async_memory_alloc extension (#16900)
Implement the [sycl_ext_oneapi_async_memory_alloc](#14800) extension for asynchronous memory allocation and freeing in CUDA, for device allocated pools only. SYCL entrypoints which specify host or shared side pools, or pools created by pre-existing allocations will throw. co-authored-by: Sean Stirling <[email protected]> co-authored-by: Hugh Delaney <[email protected]> --------- Co-authored-by: Hugh Delaney <[email protected]> Co-authored-by: Nicolas Miller <[email protected]>
1 parent 583de24 commit faa2365

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+2922
-478
lines changed

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

+3-1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def AspectExt_oneapi_bindless_images_gather : Aspect<"ext_oneapi_bindless_images
9191
def AspectExt_intel_current_clock_throttle_reasons : Aspect<"ext_intel_current_clock_throttle_reasons">;
9292
def AspectExt_intel_fan_speed : Aspect<"ext_intel_fan_speed">;
9393
def AspectExt_intel_power_limits : Aspect<"ext_intel_power_limits">;
94+
def AspectExt_oneapi_async_memory_alloc : Aspect<"ext_oneapi_async_memory_alloc">;
9495

9596
// Deprecated aspects
9697
def AspectInt64_base_atomics : Aspect<"int64_base_atomics">;
@@ -161,7 +162,8 @@ def : TargetInfo<"__TestAspectList",
161162
AspectExt_intel_spill_memory_size,
162163
AspectExt_intel_current_clock_throttle_reasons,
163164
AspectExt_intel_fan_speed,
164-
AspectExt_intel_power_limits],
165+
AspectExt_intel_power_limits,
166+
AspectExt_oneapi_async_memory_alloc],
165167
[]>;
166168
// This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
167169
// match.

sycl/include/sycl/context.hpp

+18
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
1717
#include <sycl/platform.hpp> // for platform
1818
#include <sycl/property_list.hpp> // for property_list
19+
#include <sycl/usm/usm_enums.hpp> // for usm::alloc
1920
#include <ur_api.h> // for ur_native_handle_t
2021

2122
#ifdef __SYCL_INTERNAL_API
@@ -36,6 +37,10 @@ inline namespace _V1 {
3637
class device;
3738
class platform;
3839

40+
namespace ext::oneapi::experimental {
41+
class memory_pool;
42+
} // namespace ext::oneapi::experimental
43+
3944
namespace detail {
4045
class context_impl;
4146
}
@@ -245,6 +250,19 @@ class __SYCL_EXPORT context : public detail::OwnerLessBase<context> {
245250
/// \return a vector of valid SYCL device instances.
246251
std::vector<device> get_devices() const;
247252

253+
/// Gets default memory pool associated with a device and context.
254+
///
255+
/// \return a memory pool for a particular device and context.
256+
sycl::ext::oneapi::experimental::memory_pool
257+
ext_oneapi_get_default_memory_pool(const device &dev,
258+
sycl::usm::alloc kind) const;
259+
260+
/// Gets default memory pool associated with the context and allocation kind.
261+
///
262+
/// \return a memory pool associated with this context.
263+
sycl::ext::oneapi::experimental::memory_pool
264+
ext_oneapi_get_default_memory_pool(sycl::usm::alloc kind) const;
265+
248266
private:
249267
/// Constructs a SYCL context object from a valid context_impl instance.
250268
context(std::shared_ptr<detail::context_impl> Impl);

sycl/include/sycl/detail/cg_types.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ enum class CGType : unsigned int {
6565
SemaphoreSignal = 25,
6666
ProfilingTag = 26,
6767
EnqueueNativeCommand = 27,
68+
AsyncAlloc = 28,
69+
AsyncFree = 29,
6870
};
6971

7072
template <typename, typename T> struct check_fn_signature {

sycl/include/sycl/detail/property_helper.hpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,10 @@ enum DataLessPropKind {
5151
GraphDependOnAllLeaves = 24,
5252
GraphUpdatable = 25,
5353
GraphEnableProfiling = 26,
54+
MemPoolReadOnly = 27,
55+
MemPoolZeroInit = 28,
5456
// Indicates the last known dataless property.
55-
LastKnownDataLessPropKind = 26,
57+
LastKnownDataLessPropKind = 28,
5658
// Exceeding 32 may cause ABI breaking change on some of OSes.
5759
DataLessPropKindSize = 32
5860
};
@@ -67,7 +69,9 @@ enum PropWithDataKind {
6769
AccPropBufferLocation = 5,
6870
QueueComputeIndex = 6,
6971
GraphNodeDependencies = 7,
70-
PropWithDataKindSize = 8
72+
MemPoolInitialThreshold = 8,
73+
MemPoolMaximumSize = 9,
74+
PropWithDataKindSize = 10
7175
};
7276

7377
// Base class for dataless properties, needed to check that the type of an
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
//==----------- async_alloc.hpp --- SYCL asynchronous allocation -----------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
#include <sycl/detail/common.hpp> // for code_location
11+
#include <sycl/handler.hpp> // for handler
12+
#include <sycl/queue.hpp> // for queue
13+
#include <sycl/usm/usm_enums.hpp> // for usm::alloc
14+
15+
namespace sycl {
16+
inline namespace _V1 {
17+
namespace ext::oneapi::experimental {
18+
19+
// Forward declare memory_pool.
20+
class memory_pool;
21+
22+
/**
23+
* @brief Asynchronousy allocate memory from a default pool.
24+
*
25+
* @param q The queue with which to enqueue the asynchronous allocation.
26+
* @param kind The kind of memory pool allocation - device, host, shared, etc.
27+
* @param size The size in bytes to allocate.
28+
*
29+
* @return Generic pointer to allocated USM memory.
30+
*/
31+
__SYCL_EXPORT void *async_malloc(const sycl::queue &q, sycl::usm::alloc kind,
32+
size_t size,
33+
const sycl::detail::code_location &CodeLoc =
34+
sycl::detail::code_location::current());
35+
36+
/**
37+
* @brief Asynchronously allocate memory from a default pool.
38+
*
39+
* @param h The handler with which to enqueue the asynchronous allocation.
40+
* @param kind The kind of memory pool allocation - device, host, shared, etc.
41+
* @param size The size in bytes to allocate.
42+
*
43+
* @return Generic pointer to allocated USM memory.
44+
*/
45+
__SYCL_EXPORT void *async_malloc(sycl::handler &h, sycl::usm::alloc kind,
46+
size_t size);
47+
48+
/**
49+
* @brief Asynchronously allocate memory from a specified pool.
50+
*
51+
* @param q The queue with which to enqueue the asynchronous allocation.
52+
* @param size The size in bytes to allocate.
53+
* @param pool The pool with which to allocate from.
54+
*
55+
* @return Generic pointer to allocated USM memory.
56+
*/
57+
__SYCL_EXPORT void *
58+
async_malloc_from_pool(const sycl::queue &q, size_t size,
59+
const memory_pool &pool,
60+
const sycl::detail::code_location &CodeLoc =
61+
sycl::detail::code_location::current());
62+
63+
/**
64+
* @brief Asynchronously allocate memory from a specified pool.
65+
*
66+
* @param h The handler with which to enqueue the asynchronous allocation.
67+
* @param size The size in bytes to allocate.
68+
* @param pool The pool with which to allocate from.
69+
*
70+
* @return Generic pointer to allocated USM memory.
71+
*/
72+
__SYCL_EXPORT void *async_malloc_from_pool(sycl::handler &h, size_t size,
73+
const memory_pool &pool);
74+
75+
/**
76+
* @brief Asynchronously free memory.
77+
*
78+
* @param q The queue with which to enqueue the asynchronous free.
79+
* @param ptr The generic pointer to be freed.
80+
*/
81+
__SYCL_EXPORT void async_free(const sycl::queue &q, void *ptr,
82+
const sycl::detail::code_location &CodeLoc =
83+
sycl::detail::code_location::current());
84+
85+
/**
86+
* @brief Asynchronously free memory.
87+
*
88+
* @param h The handler with which to enqueue the asynchronous free.
89+
* @param ptr The generic pointer to be freed.
90+
*/
91+
__SYCL_EXPORT void async_free(sycl::handler &h, void *ptr);
92+
93+
} // namespace ext::oneapi::experimental
94+
} // namespace _V1
95+
} // namespace sycl
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
//==----------- memory_pool.hpp --- SYCL asynchronous allocation -----------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
#include <sycl/context.hpp> // for context
11+
#include <sycl/device.hpp> // for device
12+
#include <sycl/ext/oneapi/experimental/async_alloc/memory_pool_properties.hpp>
13+
#include <sycl/queue.hpp> // for queue
14+
#include <sycl/usm/usm_enums.hpp> // for usm::alloc
15+
16+
namespace sycl {
17+
inline namespace _V1 {
18+
namespace ext::oneapi::experimental {
19+
20+
// Forward declare memory_pool_impl.
21+
namespace detail {
22+
class memory_pool_impl;
23+
} // namespace detail
24+
25+
/// Memory pool
26+
class __SYCL_EXPORT memory_pool {
27+
28+
public:
29+
// NOT SUPPORTED: Host side pools unsupported.
30+
memory_pool(const sycl::context &, sycl::usm::alloc kind,
31+
const property_list & = {}) {
32+
if (kind == sycl::usm::alloc::device || kind == sycl::usm::alloc::shared)
33+
throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
34+
"Device and shared allocation kinds are disallowed "
35+
"without specifying a device!");
36+
if (kind == sycl::usm::alloc::unknown)
37+
throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
38+
"Unknown allocation kinds are disallowed!");
39+
40+
throw sycl::exception(
41+
sycl::make_error_code(sycl::errc::feature_not_supported),
42+
"Host allocated pools are unsupported!");
43+
}
44+
45+
memory_pool(const sycl::context &ctx, const sycl::device &dev,
46+
sycl::usm::alloc kind, const property_list &props = {});
47+
48+
memory_pool(const sycl::queue &q, sycl::usm::alloc kind,
49+
const property_list &props = {})
50+
: memory_pool(q.get_context(), q.get_device(), kind, props) {}
51+
52+
// NOT SUPPORTED: Creating a pool from an existing allocation is unsupported.
53+
memory_pool(const sycl::context &, void *, size_t,
54+
const property_list & = {}) {
55+
throw sycl::exception(
56+
sycl::make_error_code(sycl::errc::feature_not_supported),
57+
"Creating a pool from an existing allocation is unsupported!");
58+
}
59+
60+
~memory_pool() = default;
61+
62+
// Copy constructible/assignable, move constructible/assignable.
63+
memory_pool(const memory_pool &) = default;
64+
memory_pool(memory_pool &&) = default;
65+
memory_pool &operator=(const memory_pool &) = default;
66+
memory_pool &operator=(memory_pool &&) = default;
67+
68+
// Equality comparison.
69+
bool operator==(const memory_pool &rhs) const { return impl == rhs.impl; }
70+
bool operator!=(const memory_pool &rhs) const { return !(*this == rhs); }
71+
72+
// Impl handles getters and setters.
73+
sycl::context get_context() const;
74+
sycl::device get_device() const;
75+
sycl::usm::alloc get_alloc_kind() const;
76+
size_t get_threshold() const;
77+
size_t get_reserved_size_current() const;
78+
size_t get_used_size_current() const;
79+
80+
void increase_threshold_to(size_t newThreshold);
81+
82+
// Property getters.
83+
template <typename PropertyT> bool has_property() const noexcept {
84+
return getPropList().template has_property<PropertyT>();
85+
}
86+
template <typename PropertyT> PropertyT get_property() const {
87+
return getPropList().template get_property<PropertyT>();
88+
}
89+
90+
protected:
91+
std::shared_ptr<detail::memory_pool_impl> impl;
92+
93+
memory_pool(std::shared_ptr<detail::memory_pool_impl> Impl) : impl(Impl) {}
94+
95+
template <class Obj>
96+
friend const decltype(Obj::impl) &
97+
sycl::detail::getSyclObjImpl(const Obj &SyclObject);
98+
99+
template <class T>
100+
friend T sycl::detail::createSyclObjFromImpl(
101+
std::add_rvalue_reference_t<decltype(T::impl)> ImplObj);
102+
template <class T>
103+
friend T sycl::detail::createSyclObjFromImpl(
104+
std::add_lvalue_reference_t<const decltype(T::impl)> ImplObj);
105+
106+
const property_list &getPropList() const;
107+
};
108+
109+
} // namespace ext::oneapi::experimental
110+
} // namespace _V1
111+
} // namespace sycl
112+
113+
namespace std {
114+
template <> struct hash<sycl::ext::oneapi::experimental::memory_pool> {
115+
size_t operator()(
116+
const sycl::ext::oneapi::experimental::memory_pool &mem_pool) const {
117+
return hash<std::shared_ptr<
118+
sycl::ext::oneapi::experimental::detail::memory_pool_impl>>()(
119+
sycl::detail::getSyclObjImpl(mem_pool));
120+
}
121+
};
122+
} // namespace std
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
//==------ memory_pool_properties.hpp --- SYCL asynchronous allocation -----==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
#include <cstddef>
11+
#include <sycl/properties/property_traits.hpp>
12+
13+
namespace sycl {
14+
inline namespace _V1 {
15+
namespace ext::oneapi::experimental {
16+
17+
// Forward declare memory_pool.
18+
class memory_pool;
19+
20+
namespace property::memory_pool {
21+
22+
// Property that determines the initial threshold of a memory pool.
23+
struct initial_threshold : public sycl::detail::PropertyWithData<
24+
sycl::detail::MemPoolInitialThreshold> {
25+
initial_threshold(size_t initialThreshold)
26+
: initialThreshold(initialThreshold) {};
27+
size_t get_initial_threshold() { return initialThreshold; }
28+
29+
private:
30+
size_t initialThreshold;
31+
};
32+
33+
// Property that determines the maximum size of a memory pool.
34+
struct maximum_size
35+
: public sycl::detail::PropertyWithData<sycl::detail::MemPoolMaximumSize> {
36+
maximum_size(size_t maxSize) : maxSize(maxSize) {};
37+
size_t get_maximum_size() { return maxSize; }
38+
39+
private:
40+
size_t maxSize;
41+
};
42+
43+
// Property that provides a performance hint that all allocations from this pool
44+
// will only be read from within SYCL kernel functions.
45+
struct read_only
46+
: public sycl::detail::DataLessProperty<sycl::detail::MemPoolReadOnly> {
47+
read_only() = default;
48+
};
49+
50+
// Property that initial allocations to a pool (not subsequent allocations
51+
// from prior frees) are iniitialised to zero.
52+
struct zero_init
53+
: public sycl::detail::DataLessProperty<sycl::detail::MemPoolZeroInit> {
54+
zero_init() = default;
55+
};
56+
} // namespace property::memory_pool
57+
} // namespace ext::oneapi::experimental
58+
59+
template <>
60+
struct is_property<
61+
sycl::ext::oneapi::experimental::property::memory_pool::initial_threshold>
62+
: std::true_type {};
63+
64+
template <>
65+
struct is_property<
66+
sycl::ext::oneapi::experimental::property::memory_pool::maximum_size>
67+
: std::true_type {};
68+
69+
template <>
70+
struct is_property<
71+
sycl::ext::oneapi::experimental::property::memory_pool::read_only>
72+
: std::true_type {};
73+
74+
template <>
75+
struct is_property<
76+
sycl::ext::oneapi::experimental::property::memory_pool::zero_init>
77+
: std::true_type {};
78+
79+
} // namespace _V1
80+
} // namespace sycl

0 commit comments

Comments
 (0)