@@ -28,26 +28,26 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
2828// /
2929// / One of the main differences between the UR API and the HIP driver API is
3030// / that the second modifies the state of the threads by assigning
31- // / ` hipCtx_t` objects to threads. ` hipCtx_t` objects store data associated
31+ // / \c hipCtx_t objects to threads. \c hipCtx_t objects store data associated
3232// / with a given device and control access to said device from the user side.
3333// / UR API context are objects that are passed to functions, and not bound
3434// / to threads.
35- // / The ur_context_handle_t_ object doesn't implement this behavior. It only
36- // / holds the HIP context data. The RAII object \ref ScopedContext implements
37- // / the active context behavior.
3835// /
39- // / <b> Primary vs UserDefined context </b>
36+ // / Since the \c ur_context_handle_t can contain multiple devices, and a \c
37+ // / hipCtx_t refers to only a single device, the \c hipCtx_t is more tightly
38+ // / coupled to a \c ur_device_handle_t than a \c ur_context_handle_t. In order
39+ // / to remove some ambiguities about the different semantics of \c
40+ // / \c ur_context_handle_t and native \c hipCtx_t, we access the native \c
41+ // / hipCtx_t solely through the \c ur_device_handle_t class, by using the object
42+ // / \ref ScopedContext, which sets the active device (by setting the active
43+ // / native \c hipCtx_t).
4044// /
41- // / HIP has two different types of context, the Primary context,
42- // / which is usable by all threads on a given process for a given device, and
43- // / the aforementioned custom contexts.
44- // / The HIP documentation, and performance analysis, suggest using the Primary
45- // / context whenever possible. The Primary context is also used by the HIP
46- // / Runtime API. For UR applications to interop with HIP Runtime API, they have
47- // / to use the primary context - and make that active in the thread. The
48- // / `ur_context_handle_t_` object can be constructed with a `kind` parameter
49- // / that allows to construct a Primary or `UserDefined` context, so that
50- // / the UR object interface is always the same.
45+ // / <b> Primary vs User-defined \c hipCtx_t </b>
46+ // /
47+ // / HIP has two different types of \c hipCtx_t, the Primary context, which is
48+ // / usable by all threads on a given process for a given device, and the
49+ // / aforementioned custom \c hipCtx_t s. The HIP documentation, confirmed with
50+ // / performance analysis, suggest using the Primary context whenever possible.
5151// /
5252// / <b> Destructor callback </b>
5353// /
@@ -57,6 +57,16 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
5757// / See proposal for details.
5858// / https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md
5959// /
60+ // / <b> Memory Management for Devices in a Context <\b>
61+ // /
62+ // / A \c ur_mem_handle_t is associated with a \c ur_context_handle_t_, which
63+ // / may refer to multiple devices. Therefore the \c ur_mem_handle_t must
64+ // / handle a native allocation for each device in the context. UR is
65+ // / responsible for automatically handling event dependencies for kernels
66+ // / writing to or reading from the same \c ur_mem_handle_t and migrating memory
67+ // / between native allocations for devices in the same \c ur_context_handle_t_
68+ // / if necessary.
69+ // /
6070struct ur_context_handle_t_ {
6171
6272 struct deleter_data {
@@ -68,15 +78,22 @@ struct ur_context_handle_t_ {
6878
6979 using native_type = hipCtx_t;
7080
71- ur_device_handle_t DeviceId;
81+ std::vector<ur_device_handle_t > Devices;
82+
7283 std::atomic_uint32_t RefCount;
7384
74- ur_context_handle_t_ (ur_device_handle_t DevId)
75- : DeviceId{DevId}, RefCount{1 } {
76- urDeviceRetain (DeviceId);
85+ ur_context_handle_t_ (const ur_device_handle_t *Devs, uint32_t NumDevices)
86+ : Devices{Devs, Devs + NumDevices}, RefCount{1 } {
87+ for (auto &Dev : Devices) {
88+ urDeviceRetain (Dev);
89+ }
7790 };
7891
79- ~ur_context_handle_t_ () { urDeviceRelease (DeviceId); }
92+ ~ur_context_handle_t_ () {
93+ for (auto &Dev : Devices) {
94+ urDeviceRelease (Dev);
95+ }
96+ }
8097
8198 void invokeExtendedDeleters () {
8299 std::lock_guard<std::mutex> Guard (Mutex);
@@ -91,7 +108,9 @@ struct ur_context_handle_t_ {
91108 ExtendedDeleters.emplace_back (deleter_data{Function, UserData});
92109 }
93110
94- ur_device_handle_t getDevice () const noexcept { return DeviceId; }
111+ const std::vector<ur_device_handle_t > &getDevices () const noexcept {
112+ return Devices;
113+ }
95114
96115 uint32_t incrementReferenceCount () noexcept { return ++RefCount; }
97116
0 commit comments