@@ -33,22 +33,22 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
3333// / with a given device and control access to said device from the user side.
3434// / UR API context are objects that are passed to functions, and not bound
3535// / to threads.
36- // / The ur_context_handle_t_ object doesn't implement this behavior. It only
37- // / holds the HIP context data. The RAII object \ref ScopedContext implements
38- // / the active context behavior.
3936// /
40- // / <b> Primary vs UserDefined context </b>
37+ // / Since the ur_context_handle_t can contain multiple devices, and a `hipCtx_t`
38+ // / refers to only a single device, the `hipCtx_t` is more tightly coupled to a
39+ // / ur_device_handle_t than a ur_context_handle_t. In order to remove some
40+ // / ambiguities about the different semantics of ur_context_handle_t s and
41+ // / native `hipCtx_t`, we access the native `hipCtx_t` solely through the
42+ // / ur_device_handle_t class, by using the RAII object \ref ScopedDevice, which
43+ // / sets the active device (by setting the active native `hipCtx_t`).
4144// /
42- // / HIP has two different types of context, the Primary context,
43- // / which is usable by all threads on a given process for a given device, and
44- // / the aforementioned custom contexts.
45- // / The HIP documentation, and performance analysis, suggest using the Primary
46- // / context whenever possible. The Primary context is also used by the HIP
47- // / Runtime API. For UR applications to interop with HIP Runtime API, they have
48- // / to use the primary context - and make that active in the thread. The
49- // / `ur_context_handle_t_` object can be constructed with a `kind` parameter
50- // / that allows to construct a Primary or `UserDefined` context, so that
51- // / the UR object interface is always the same.
45+ // / <b> Primary vs User-defined `hipCtx_t` </b>
46+ // /
47+ // / HIP has two different types of `hipCtx_t`, the Primary context, which is
48+ // / usable by all threads on a given process for a given device, and the
49+ // / aforementioned custom `hipCtx_t`s.
50+ // / The HIP documentation, confirmed with performance analysis, suggest using
51+ // / the Primary context whenever possible.
5252// /
5353// / <b> Destructor callback </b>
5454// /
@@ -58,6 +58,15 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
5858// / See proposal for details.
5959// / https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md
6060// /
61+ // / <b> Memory Management for Devices in a Context <\b>
62+ // /
63+ // / A ur_buffer_ is associated with a ur_context_handle_t_, which may refer to
64+ // / multiple devices. Therefore the ur_buffer_ must handle a native allocation
65+ // / for each device in the context. UR is responsible for automatically
66+ // / handling event dependencies for kernels writing to or reading from the
67+ // / same ur_buffer_ and migrating memory between native allocations for
68+ // / devices in the same ur_context_handle_t_ if necessary.
69+ // /
6170struct ur_context_handle_t_ {
6271
6372 struct deleter_data {
@@ -69,15 +78,23 @@ struct ur_context_handle_t_ {
6978
7079 using native_type = hipCtx_t;
7180
72- ur_device_handle_t DeviceId;
81+ std::vector<ur_device_handle_t > Devices;
82+ uint32_t NumDevices;
83+
7384 std::atomic_uint32_t RefCount;
7485
75- ur_context_handle_t_ (ur_device_handle_t DevId)
76- : DeviceId{DevId}, RefCount{1 } {
77- urDeviceRetain (DeviceId);
86+ ur_context_handle_t_ (const ur_device_handle_t *Devs, uint32_t NumDevices)
87+ : Devices{Devs, Devs + NumDevices}, NumDevices{NumDevices}, RefCount{1 } {
88+ for (auto &Dev : Devices) {
89+ urDeviceRetain (Dev);
90+ }
7891 };
7992
80- ~ur_context_handle_t_ () { urDeviceRelease (DeviceId); }
93+ ~ur_context_handle_t_ () {
94+ for (auto &Dev : Devices) {
95+ urDeviceRelease (Dev);
96+ }
97+ }
8198
8299 void invokeExtendedDeleters () {
83100 std::lock_guard<std::mutex> Guard (Mutex);
@@ -92,7 +109,9 @@ struct ur_context_handle_t_ {
92109 ExtendedDeleters.emplace_back (deleter_data{Function, UserData});
93110 }
94111
95- ur_device_handle_t getDevice () const noexcept { return DeviceId; }
112+ const std::vector<ur_device_handle_t > &getDevices () const noexcept {
113+ return Devices;
114+ }
96115
97116 uint32_t incrementReferenceCount () noexcept { return ++RefCount; }
98117
0 commit comments