From f99f958c47b7771bdf24e7bca12300364fb355e2 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Thu, 13 Feb 2025 13:11:39 +0100 Subject: [PATCH 1/2] Refactor: Cycles: Add host_alloc/free to device API This may be used for device to do host memory allocation in a way that is more efficient for copy the host memory to the device. Also rename and group device memory allocation functions for clarity. Pull Request: https://projects.blender.org/blender/blender/pulls/134412 --- intern/cycles/device/cuda/device_impl.cpp | 8 ++--- intern/cycles/device/cuda/device_impl.h | 33 +++++++++++---------- intern/cycles/device/device.cpp | 32 +++++++++++++------- intern/cycles/device/device.h | 30 ++++++++++--------- intern/cycles/device/hip/device_impl.cpp | 8 ++--- intern/cycles/device/hip/device_impl.h | 32 ++++++++++---------- intern/cycles/device/memory.cpp | 8 ++--- intern/cycles/device/memory.h | 2 +- intern/cycles/device/metal/device_impl.mm | 2 +- intern/cycles/device/multi/device.cpp | 33 +++++++++++++++++---- intern/cycles/device/oneapi/device_impl.cpp | 6 ++-- intern/cycles/device/oneapi/device_impl.h | 33 +++++++++++---------- 12 files changed, 132 insertions(+), 95 deletions(-) diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index 7bc30f061e1..fde508074b6 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -537,7 +537,7 @@ void CUDADevice::free_device(void *device_pointer) cuda_assert(cuMemFree((CUdeviceptr)device_pointer)); } -bool CUDADevice::alloc_host(void *&shared_pointer, const size_t size) +bool CUDADevice::shared_alloc(void *&shared_pointer, const size_t size) { CUDAContextScope scope(this); @@ -546,14 +546,14 @@ bool CUDADevice::alloc_host(void *&shared_pointer, const size_t size) return mem_alloc_result == CUDA_SUCCESS; } -void CUDADevice::free_host(void *shared_pointer) +void CUDADevice::shared_free(void *shared_pointer) { CUDAContextScope scope(this); cuMemFreeHost(shared_pointer); } -void *CUDADevice::transform_host_to_device_pointer(const void *shared_pointer) +void *CUDADevice::shared_to_device_pointer(const void *shared_pointer) { CUDAContextScope scope(this); void *device_pointer = nullptr; @@ -646,7 +646,7 @@ void CUDADevice::mem_zero(device_memory &mem) return; } - if (!(mem.is_host_mapped(this) && mem.host_pointer == mem.shared_pointer)) { + if (!(mem.is_shared(this) && mem.host_pointer == mem.shared_pointer)) { const CUDAContextScope scope(this); cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size())); } diff --git a/intern/cycles/device/cuda/device_impl.h b/intern/cycles/device/cuda/device_impl.h index 07e001d289e..f022864d3d3 100644 --- a/intern/cycles/device/cuda/device_impl.h +++ b/intern/cycles/device/cuda/device_impl.h @@ -59,42 +59,43 @@ class CUDADevice : public GPUDevice { bool force_ptx = false); bool load_kernels(const uint kernel_features) override; - void reserve_local_memory(const uint kernel_features); - void get_device_memory_info(size_t &total, size_t &free) override; - bool alloc_device(void *&device_pointer, const size_t size) override; - void free_device(void *device_pointer) override; - bool alloc_host(void *&shared_pointer, const size_t size) override; - void free_host(void *shared_pointer) override; - void *transform_host_to_device_pointer(const void *shared_pointer) override; - void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override; - + /* All memory types. */ void mem_alloc(device_memory &mem) override; - void mem_copy_to(device_memory &mem) override; - void mem_move_to_host(device_memory &mem) override; - void mem_copy_from( device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override; - void mem_zero(device_memory &mem) override; - void mem_free(device_memory &mem) override; device_ptr mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/) override; - void const_copy_to(const char *name, void *host, const size_t size) override; - + /* Global memory. */ void global_alloc(device_memory &mem); void global_copy_to(device_memory &mem); void global_free(device_memory &mem); + /* Texture memory. */ void tex_alloc(device_texture &mem); void tex_copy_to(device_texture &mem); void tex_free(device_texture &mem); + /* Device side memory. */ + void get_device_memory_info(size_t &total, size_t &free) override; + bool alloc_device(void *&device_pointer, const size_t size) override; + void free_device(void *device_pointer) override; + + /* Shared memory. */ + bool shared_alloc(void *&shared_pointer, const size_t size) override; + void shared_free(void *shared_pointer) override; + void *shared_to_device_pointer(const void *shared_pointer) override; + + /* Memory copy. */ + void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override; + void const_copy_to(const char *name, void *host, const size_t size) override; + bool should_use_graphics_interop() override; unique_ptr gpu_queue_create() override; diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index d149274ae55..cf77f76bec6 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -496,6 +496,16 @@ OSLGlobals *Device::get_cpu_osl_memory() return nullptr; } +void *Device::host_alloc(const MemoryType /*type*/, const size_t size) +{ + return util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES); +} + +void Device::host_free(const MemoryType /*type*/, void *host_pointer, const size_t size) +{ + util_aligned_free(host_pointer, size); +} + GPUDevice::~GPUDevice() noexcept(false) = default; bool GPUDevice::load_texture_info() @@ -572,7 +582,7 @@ void GPUDevice::move_textures_to_host(size_t size, const size_t headroom, const /* Can only move textures allocated on this device (and not those from peer devices). * And need to ignore memory that is already on the host. */ - if (!mem.is_resident(this) || mem.is_host_mapped(this)) { + if (!mem.is_resident(this) || mem.is_shared(this)) { continue; } @@ -677,14 +687,14 @@ GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, const size_t pitch_ } else if (map_host_used + size < map_host_limit) { /* Allocate host memory ourselves. */ - mem_alloc_result = alloc_host(shared_pointer, size); + mem_alloc_result = shared_alloc(shared_pointer, size); assert((mem_alloc_result && shared_pointer != nullptr) || (!mem_alloc_result && shared_pointer == nullptr)); } if (mem_alloc_result) { - device_pointer = transform_host_to_device_pointer(shared_pointer); + device_pointer = shared_to_device_pointer(shared_pointer); map_host_used += size; status = " in host memory"; } @@ -728,7 +738,7 @@ GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, const size_t pitch_ mem.host_pointer != shared_pointer) { memcpy(shared_pointer, mem.host_pointer, size); - util_aligned_free(mem.host_pointer, mem.memory_size()); + host_free(mem.type, mem.host_pointer, mem.memory_size()); mem.host_pointer = shared_pointer; } mem.shared_pointer = shared_pointer; @@ -752,7 +762,7 @@ void GPUDevice::generic_free(device_memory &mem) DCHECK(device_mem_map.find(&mem) != device_mem_map.end()); /* For host mapped memory, reference counting is used to safely free it. */ - if (mem.is_host_mapped(this)) { + if (mem.is_shared(this)) { assert(mem.shared_counter > 0); if (--mem.shared_counter == 0) { if (mem.host_pointer == mem.shared_pointer) { @@ -764,7 +774,7 @@ void GPUDevice::generic_free(device_memory &mem) mem.host_pointer = mem.host_alloc(size); memcpy(mem.host_pointer, mem.shared_pointer, size); } - free_host(mem.shared_pointer); + shared_free(mem.shared_pointer); mem.shared_pointer = nullptr; } map_host_used -= mem.device_size; @@ -791,17 +801,17 @@ void GPUDevice::generic_copy_to(device_memory &mem) /* If not host mapped, the current device only uses device memory allocated by backend * device allocation regardless of mem.host_pointer and mem.shared_pointer, and should * copy data from mem.host_pointer. */ - if (!(mem.is_host_mapped(this) && mem.host_pointer == mem.shared_pointer)) { + if (!(mem.is_shared(this) && mem.host_pointer == mem.shared_pointer)) { copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size()); } } -bool GPUDevice::is_host_mapped(const void *shared_pointer, - const device_ptr device_pointer, - Device * /*sub_device*/) +bool GPUDevice::is_shared(const void *shared_pointer, + const device_ptr device_pointer, + Device * /*sub_device*/) { return (shared_pointer && device_pointer && - (device_ptr)transform_host_to_device_pointer(shared_pointer) == device_pointer); + (device_ptr)shared_to_device_pointer(shared_pointer) == device_pointer); } /* DeviceInfo */ diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 8c01bd6f769..8912000f788 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -247,9 +247,9 @@ class Device { return false; } - virtual bool is_host_mapped(const void * /*shared_pointer*/, - const device_ptr /*device_pointer*/, - Device * /*sub_device*/) + virtual bool is_shared(const void * /*shared_pointer*/, + const device_ptr /*device_pointer*/, + Device * /*sub_device*/) { return false; } @@ -320,6 +320,9 @@ class Device { friend class DeviceServer; friend class device_memory; + virtual void *host_alloc(const MemoryType type, const size_t size); + virtual void host_free(const MemoryType type, void *host_pointer, const size_t size); + virtual void mem_alloc(device_memory &mem) = 0; virtual void mem_copy_to(device_memory &mem) = 0; virtual void mem_move_to_host(device_memory &mem) = 0; @@ -398,22 +401,21 @@ class GPUDevice : public Device { /* total - amount of device memory, free - amount of available device memory */ virtual void get_device_memory_info(size_t &total, size_t &free) = 0; + /* Device side memory. */ virtual bool alloc_device(void *&device_pointer, const size_t size) = 0; - virtual void free_device(void *device_pointer) = 0; - virtual bool alloc_host(void *&shared_pointer, const size_t size) = 0; - - virtual void free_host(void *shared_pointer) = 0; - - bool is_host_mapped(const void *shared_pointer, - const device_ptr device_pointer, - Device *sub_device) override; - + /* Shared memory. */ + virtual bool shared_alloc(void *&shared_pointer, const size_t size) = 0; + virtual void shared_free(void *shared_pointer) = 0; + bool is_shared(const void *shared_pointer, + const device_ptr device_pointer, + Device *sub_device) override; /* This function should return device pointer corresponding to shared pointer, which - * is host buffer, allocated in `alloc_host`. */ - virtual void *transform_host_to_device_pointer(const void *shared_pointer) = 0; + * is host buffer, allocated in `shared_alloc`. */ + virtual void *shared_to_device_pointer(const void *shared_pointer) = 0; + /* Memory copy. */ virtual void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) = 0; diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index f2c3b7b0c1e..567e9139452 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -498,7 +498,7 @@ void HIPDevice::free_device(void *device_pointer) hip_assert(hipFree((hipDeviceptr_t)device_pointer)); } -bool HIPDevice::alloc_host(void *&shared_pointer, const size_t size) +bool HIPDevice::shared_alloc(void *&shared_pointer, const size_t size) { HIPContextScope scope(this); @@ -508,14 +508,14 @@ bool HIPDevice::alloc_host(void *&shared_pointer, const size_t size) return mem_alloc_result == hipSuccess; } -void HIPDevice::free_host(void *shared_pointer) +void HIPDevice::shared_free(void *shared_pointer) { HIPContextScope scope(this); hipHostFree(shared_pointer); } -void *HIPDevice::transform_host_to_device_pointer(const void *shared_pointer) +void *HIPDevice::shared_to_device_pointer(const void *shared_pointer) { HIPContextScope scope(this); void *device_pointer = nullptr; @@ -608,7 +608,7 @@ void HIPDevice::mem_zero(device_memory &mem) return; } - if (!(mem.is_host_mapped(this) && mem.host_pointer == mem.shared_pointer)) { + if (!(mem.is_shared(this) && mem.host_pointer == mem.shared_pointer)) { const HIPContextScope scope(this); hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size())); } diff --git a/intern/cycles/device/hip/device_impl.h b/intern/cycles/device/hip/device_impl.h index 3d483ee1790..fa6a17223d2 100644 --- a/intern/cycles/device/hip/device_impl.h +++ b/intern/cycles/device/hip/device_impl.h @@ -60,39 +60,41 @@ class HIPDevice : public GPUDevice { bool load_kernels(const uint kernel_features) override; void reserve_local_memory(const uint kernel_features); - void get_device_memory_info(size_t &total, size_t &free) override; - bool alloc_device(void *&device_pointer, const size_t size) override; - void free_device(void *device_pointer) override; - bool alloc_host(void *&shared_pointer, const size_t size) override; - void free_host(void *shared_pointer) override; - void *transform_host_to_device_pointer(const void *shared_pointer) override; - void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override; - + /* All memory types. */ void mem_alloc(device_memory &mem) override; - void mem_copy_to(device_memory &mem) override; - void mem_move_to_host(device_memory &mem) override; - void mem_copy_from( device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override; - void mem_zero(device_memory &mem) override; - void mem_free(device_memory &mem) override; device_ptr mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/) override; - void const_copy_to(const char *name, void *host, const size_t size) override; - + /* Global memory. */ void global_alloc(device_memory &mem); void global_copy_to(device_memory &mem); void global_free(device_memory &mem); + /* Texture memory. */ void tex_alloc(device_texture &mem); void tex_copy_to(device_texture &mem); void tex_free(device_texture &mem); + /* Device side memory. */ + void get_device_memory_info(size_t &total, size_t &free) override; + bool alloc_device(void *&device_pointer, const size_t size) override; + void free_device(void *device_pointer) override; + + /* Shared memory. */ + bool shared_alloc(void *&shared_pointer, const size_t size) override; + void shared_free(void *shared_pointer) override; + void *shared_to_device_pointer(const void *shared_pointer) override; + + /* Memory copy. */ + void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override; + void const_copy_to(const char *name, void *host, const size_t size) override; + /* Graphics resources interoperability. */ bool should_use_graphics_interop() override; diff --git a/intern/cycles/device/memory.cpp b/intern/cycles/device/memory.cpp index fd66cf9ba9f..ec2a46a6c34 100644 --- a/intern/cycles/device/memory.cpp +++ b/intern/cycles/device/memory.cpp @@ -45,7 +45,7 @@ void *device_memory::host_alloc(const size_t size) return nullptr; } - void *ptr = util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES); + void *ptr = device->host_alloc(type, size); if (ptr == nullptr) { throw std::bad_alloc(); @@ -58,7 +58,7 @@ void device_memory::host_and_device_free() { if (host_pointer) { if (host_pointer != shared_pointer) { - util_aligned_free(host_pointer, memory_size()); + device->host_free(type, host_pointer, memory_size()); } host_pointer = nullptr; } @@ -136,9 +136,9 @@ bool device_memory::is_resident(Device *sub_device) const return device->is_resident(device_pointer, sub_device); } -bool device_memory::is_host_mapped(Device *sub_device) const +bool device_memory::is_shared(Device *sub_device) const { - return device->is_host_mapped(shared_pointer, device_pointer, sub_device); + return device->is_shared(shared_pointer, device_pointer, sub_device); } /* Device Sub `ptr`. */ diff --git a/intern/cycles/device/memory.h b/intern/cycles/device/memory.h index 63125e94cab..6d978e98591 100644 --- a/intern/cycles/device/memory.h +++ b/intern/cycles/device/memory.h @@ -255,7 +255,7 @@ class device_memory { void restore_device(); bool is_resident(Device *sub_device) const; - bool is_host_mapped(Device *sub_device) const; + bool is_shared(Device *sub_device) const; /* No copying and allowed. * diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index 6a4421257fb..1b2abea8617 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -731,7 +731,7 @@ if (mem.host_pointer && mem.host_pointer != mmem->hostPtr) { memcpy(mmem->hostPtr, mem.host_pointer, size); - util_aligned_free(mem.host_pointer, mem.memory_size()); + host_free(mem.type, mem.host_pointer, mem.memory_size()); mem.host_pointer = mmem->hostPtr; } mem.shared_pointer = mmem->hostPtr; diff --git a/intern/cycles/device/multi/device.cpp b/intern/cycles/device/multi/device.cpp index 06751045fe4..d1489524289 100644 --- a/intern/cycles/device/multi/device.cpp +++ b/intern/cycles/device/multi/device.cpp @@ -313,6 +313,29 @@ class MultiDevice : public Device { return find_matching_mem_device(key, sub)->ptr_map[key]; } + void *host_alloc(const MemoryType type, const size_t size) override + { + for (SubDevice &sub : devices) { + if (sub.device->info.type != DEVICE_CPU) { + return sub.device->host_alloc(type, size); + } + } + + return Device::host_alloc(type, size); + } + + void host_free(const MemoryType type, void *host_pointer, const size_t size) override + { + for (SubDevice &sub : devices) { + if (sub.device->info.type != DEVICE_CPU) { + sub.device->host_free(type, host_pointer, size); + return; + } + } + + Device::host_free(type, host_pointer, size); + } + void mem_alloc(device_memory &mem) override { device_ptr key = unique_key++; @@ -378,7 +401,7 @@ class MultiDevice : public Device { mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0; mem.device_size = existing_size; - if (!owner_sub->device->is_host_mapped( + if (!owner_sub->device->is_shared( mem.shared_pointer, mem.device_pointer, owner_sub->device.get())) { owner_sub->device->mem_move_to_host(mem); @@ -398,9 +421,7 @@ class MultiDevice : public Device { stats.mem_alloc(mem.device_size - existing_size); } - bool is_host_mapped(const void *shared_pointer, - const device_ptr key, - Device *sub_device) override + bool is_shared(const void *shared_pointer, const device_ptr key, Device *sub_device) override { if (key == 0) { return false; @@ -408,11 +429,11 @@ class MultiDevice : public Device { for (const SubDevice &sub : devices) { if (sub.device.get() == sub_device) { - return sub_device->is_host_mapped(shared_pointer, sub.ptr_map.at(key), sub_device); + return sub_device->is_shared(shared_pointer, sub.ptr_map.at(key), sub_device); } } - assert(!"is_host_mapped failed to find matching device"); + assert(!"is_shared failed to find matching device"); return false; } diff --git a/intern/cycles/device/oneapi/device_impl.cpp b/intern/cycles/device/oneapi/device_impl.cpp index a3657a4ccd1..6b91ff05757 100644 --- a/intern/cycles/device/oneapi/device_impl.cpp +++ b/intern/cycles/device/oneapi/device_impl.cpp @@ -335,18 +335,18 @@ void OneapiDevice::free_device(void *device_pointer) usm_free(device_queue_, device_pointer); } -bool OneapiDevice::alloc_host(void *&shared_pointer, const size_t size) +bool OneapiDevice::shared_alloc(void *&shared_pointer, const size_t size) { shared_pointer = usm_aligned_alloc_host(device_queue_, size, 64); return shared_pointer != nullptr; } -void OneapiDevice::free_host(void *shared_pointer) +void OneapiDevice::shared_free(void *shared_pointer) { usm_free(device_queue_, shared_pointer); } -void *OneapiDevice::transform_host_to_device_pointer(const void *shared_pointer) +void *OneapiDevice::shared_to_device_pointer(const void *shared_pointer) { /* Device and host pointer are in the same address space * as we're using Unified Shared Memory. */ diff --git a/intern/cycles/device/oneapi/device_impl.h b/intern/cycles/device/oneapi/device_impl.h index c66e49095d1..898354600be 100644 --- a/intern/cycles/device/oneapi/device_impl.h +++ b/intern/cycles/device/oneapi/device_impl.h @@ -57,50 +57,51 @@ class OneapiDevice : public GPUDevice { void reserve_private_memory(const uint kernel_features); - void get_device_memory_info(size_t &total, size_t &free) override; - bool alloc_device(void *&device_pointer, const size_t size) override; - void free_device(void *device_pointer) override; - bool alloc_host(void *&shared_pointer, const size_t size) override; - void free_host(void *shared_pointer) override; - void *transform_host_to_device_pointer(const void *shared_pointer) override; - void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override; - string oneapi_error_message(); int scene_max_shaders(); void *kernel_globals_device_pointer(); + /* All memory types. */ void mem_alloc(device_memory &mem) override; - void mem_copy_to(device_memory &mem) override; - void mem_move_to_host(device_memory &mem) override; - void mem_copy_from( device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override; - void mem_copy_from(device_memory &mem) { mem_copy_from(mem, 0, 0, 0, 0); } - void mem_zero(device_memory &mem) override; - void mem_free(device_memory &mem) override; device_ptr mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/) override; - void const_copy_to(const char *name, void *host, const size_t size) override; - + /* Global memory. */ void global_alloc(device_memory &mem); void global_copy_to(device_memory &mem); void global_free(device_memory &mem); + /* Texture memory. */ void tex_alloc(device_texture &mem); void tex_copy_to(device_texture &mem); void tex_free(device_texture &mem); + /* Device side memory. */ + void get_device_memory_info(size_t &total, size_t &free) override; + bool alloc_device(void *&device_pointer, const size_t size) override; + void free_device(void *device_pointer) override; + + /* Shared memory. */ + bool shared_alloc(void *&shared_pointer, const size_t size) override; + void shared_free(void *shared_pointer) override; + void *shared_to_device_pointer(const void *shared_pointer) override; + + /* Memory copy. */ + void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override; + void const_copy_to(const char *name, void *host, const size_t size) override; + /* Graphics resources interoperability. */ bool should_use_graphics_interop() override; From c87a26902196f8e5b705868eab8eb788bdaac6eb Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 11 Feb 2025 18:45:29 +0100 Subject: [PATCH 2/2] Fix #133953: Cycles oneAPI texture randomly renders black * Do oneAPI copy optimization as part of host memory alloc and free, so it is properly released before host memory is freed. * Synchronize after loading texture info, like CUDA and HIP. https://projects.blender.org/blender/blender/pulls/134412 --- intern/cycles/device/oneapi/device_impl.cpp | 43 ++++++++++++++------- intern/cycles/device/oneapi/device_impl.h | 4 ++ intern/cycles/device/oneapi/queue.cpp | 2 +- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/intern/cycles/device/oneapi/device_impl.cpp b/intern/cycles/device/oneapi/device_impl.cpp index 6b91ff05757..4d36c432de5 100644 --- a/intern/cycles/device/oneapi/device_impl.cpp +++ b/intern/cycles/device/oneapi/device_impl.cpp @@ -379,6 +379,35 @@ void *OneapiDevice::kernel_globals_device_pointer() return kg_memory_device_; } +void *OneapiDevice::host_alloc(const MemoryType type, const size_t size) +{ + void *host_pointer = GPUDevice::host_alloc(type, size); + +# ifdef SYCL_EXT_ONEAPI_COPY_OPTIMIZE + if (host_pointer) { + /* Import host_pointer into USM memory for faster host<->device data transfers. */ + if (type == MEM_READ_WRITE || type == MEM_READ_ONLY) { + sycl::queue *queue = reinterpret_cast(device_queue_); + sycl::ext::oneapi::experimental::prepare_for_device_copy(host_pointer, size, *queue); + } + } +# endif + + return host_pointer; +} + +void OneapiDevice::host_free(const MemoryType type, void *host_pointer, const size_t size) +{ +# ifdef SYCL_EXT_ONEAPI_COPY_OPTIMIZE + if (type == MEM_READ_WRITE || type == MEM_READ_ONLY) { + sycl::queue *queue = reinterpret_cast(device_queue_); + sycl::ext::oneapi::experimental::release_from_device_copy(host_pointer, *queue); + } +# endif + + GPUDevice::host_free(type, host_pointer, size); +} + void OneapiDevice::mem_alloc(device_memory &mem) { if (mem.type == MEM_TEXTURE) { @@ -394,14 +423,6 @@ void OneapiDevice::mem_alloc(device_memory &mem) << string_human_readable_size(mem.memory_size()) << ")"; } generic_alloc(mem); -# ifdef SYCL_EXT_ONEAPI_COPY_OPTIMIZE - /* Import host_pointer into USM memory for faster host<->device data transfers. */ - if (mem.type == MEM_READ_WRITE || mem.type == MEM_READ_ONLY) { - sycl::queue *queue = reinterpret_cast(device_queue_); - sycl::ext::oneapi::experimental::prepare_for_device_copy( - mem.host_pointer, mem.memory_size(), *queue); - } -# endif } } @@ -543,12 +564,6 @@ void OneapiDevice::mem_free(device_memory &mem) tex_free((device_texture &)mem); } else { -# ifdef SYCL_EXT_ONEAPI_COPY_OPTIMIZE - if (mem.type == MEM_READ_WRITE || mem.type == MEM_READ_ONLY) { - sycl::queue *queue = reinterpret_cast(device_queue_); - sycl::ext::oneapi::experimental::release_from_device_copy(mem.host_pointer, *queue); - } -# endif generic_free(mem); } } diff --git a/intern/cycles/device/oneapi/device_impl.h b/intern/cycles/device/oneapi/device_impl.h index 898354600be..0d9a90cf18b 100644 --- a/intern/cycles/device/oneapi/device_impl.h +++ b/intern/cycles/device/oneapi/device_impl.h @@ -88,6 +88,10 @@ class OneapiDevice : public GPUDevice { void tex_copy_to(device_texture &mem); void tex_free(device_texture &mem); + /* Host side memory, override for more efficient copies. */ + void *host_alloc(const MemoryType type, const size_t size) override; + void host_free(const MemoryType type, void *host_pointer, const size_t size) override; + /* Device side memory. */ void get_device_memory_info(size_t &total, size_t &free) override; bool alloc_device(void *&device_pointer, const size_t size) override; diff --git a/intern/cycles/device/oneapi/queue.cpp b/intern/cycles/device/oneapi/queue.cpp index 0951d3a3bdf..9b19fd20467 100644 --- a/intern/cycles/device/oneapi/queue.cpp +++ b/intern/cycles/device/oneapi/queue.cpp @@ -72,7 +72,7 @@ bool OneapiDeviceQueue::enqueue(DeviceKernel kernel, /* Update texture info in case memory moved to host. */ if (oneapi_device_->load_texture_info()) { - if (oneapi_device_->have_error()) { + if (!synchronize()) { return false; } }