Skip to content

Commit f99f958

Browse files
committed
Refactor: Cycles: Add host_alloc/free to device API
This may be used for device to do host memory allocation in a way that is more efficient for copy the host memory to the device. Also rename and group device memory allocation functions for clarity. Pull Request: https://projects.blender.org/blender/blender/pulls/134412
1 parent 455d419 commit f99f958

File tree

12 files changed

+132
-95
lines changed

12 files changed

+132
-95
lines changed

intern/cycles/device/cuda/device_impl.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,7 @@ void CUDADevice::free_device(void *device_pointer)
537537
cuda_assert(cuMemFree((CUdeviceptr)device_pointer));
538538
}
539539

540-
bool CUDADevice::alloc_host(void *&shared_pointer, const size_t size)
540+
bool CUDADevice::shared_alloc(void *&shared_pointer, const size_t size)
541541
{
542542
CUDAContextScope scope(this);
543543

@@ -546,14 +546,14 @@ bool CUDADevice::alloc_host(void *&shared_pointer, const size_t size)
546546
return mem_alloc_result == CUDA_SUCCESS;
547547
}
548548

549-
void CUDADevice::free_host(void *shared_pointer)
549+
void CUDADevice::shared_free(void *shared_pointer)
550550
{
551551
CUDAContextScope scope(this);
552552

553553
cuMemFreeHost(shared_pointer);
554554
}
555555

556-
void *CUDADevice::transform_host_to_device_pointer(const void *shared_pointer)
556+
void *CUDADevice::shared_to_device_pointer(const void *shared_pointer)
557557
{
558558
CUDAContextScope scope(this);
559559
void *device_pointer = nullptr;
@@ -646,7 +646,7 @@ void CUDADevice::mem_zero(device_memory &mem)
646646
return;
647647
}
648648

649-
if (!(mem.is_host_mapped(this) && mem.host_pointer == mem.shared_pointer)) {
649+
if (!(mem.is_shared(this) && mem.host_pointer == mem.shared_pointer)) {
650650
const CUDAContextScope scope(this);
651651
cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
652652
}

intern/cycles/device/cuda/device_impl.h

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -59,42 +59,43 @@ class CUDADevice : public GPUDevice {
5959
bool force_ptx = false);
6060

6161
bool load_kernels(const uint kernel_features) override;
62-
6362
void reserve_local_memory(const uint kernel_features);
6463

65-
void get_device_memory_info(size_t &total, size_t &free) override;
66-
bool alloc_device(void *&device_pointer, const size_t size) override;
67-
void free_device(void *device_pointer) override;
68-
bool alloc_host(void *&shared_pointer, const size_t size) override;
69-
void free_host(void *shared_pointer) override;
70-
void *transform_host_to_device_pointer(const void *shared_pointer) override;
71-
void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override;
72-
64+
/* All memory types. */
7365
void mem_alloc(device_memory &mem) override;
74-
7566
void mem_copy_to(device_memory &mem) override;
76-
7767
void mem_move_to_host(device_memory &mem) override;
78-
7968
void mem_copy_from(
8069
device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override;
81-
8270
void mem_zero(device_memory &mem) override;
83-
8471
void mem_free(device_memory &mem) override;
8572

8673
device_ptr mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/) override;
8774

88-
void const_copy_to(const char *name, void *host, const size_t size) override;
89-
75+
/* Global memory. */
9076
void global_alloc(device_memory &mem);
9177
void global_copy_to(device_memory &mem);
9278
void global_free(device_memory &mem);
9379

80+
/* Texture memory. */
9481
void tex_alloc(device_texture &mem);
9582
void tex_copy_to(device_texture &mem);
9683
void tex_free(device_texture &mem);
9784

85+
/* Device side memory. */
86+
void get_device_memory_info(size_t &total, size_t &free) override;
87+
bool alloc_device(void *&device_pointer, const size_t size) override;
88+
void free_device(void *device_pointer) override;
89+
90+
/* Shared memory. */
91+
bool shared_alloc(void *&shared_pointer, const size_t size) override;
92+
void shared_free(void *shared_pointer) override;
93+
void *shared_to_device_pointer(const void *shared_pointer) override;
94+
95+
/* Memory copy. */
96+
void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override;
97+
void const_copy_to(const char *name, void *host, const size_t size) override;
98+
9899
bool should_use_graphics_interop() override;
99100

100101
unique_ptr<DeviceQueue> gpu_queue_create() override;

intern/cycles/device/device.cpp

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,16 @@ OSLGlobals *Device::get_cpu_osl_memory()
496496
return nullptr;
497497
}
498498

499+
void *Device::host_alloc(const MemoryType /*type*/, const size_t size)
500+
{
501+
return util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES);
502+
}
503+
504+
void Device::host_free(const MemoryType /*type*/, void *host_pointer, const size_t size)
505+
{
506+
util_aligned_free(host_pointer, size);
507+
}
508+
499509
GPUDevice::~GPUDevice() noexcept(false) = default;
500510

501511
bool GPUDevice::load_texture_info()
@@ -572,7 +582,7 @@ void GPUDevice::move_textures_to_host(size_t size, const size_t headroom, const
572582

573583
/* Can only move textures allocated on this device (and not those from peer devices).
574584
* And need to ignore memory that is already on the host. */
575-
if (!mem.is_resident(this) || mem.is_host_mapped(this)) {
585+
if (!mem.is_resident(this) || mem.is_shared(this)) {
576586
continue;
577587
}
578588

@@ -677,14 +687,14 @@ GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, const size_t pitch_
677687
}
678688
else if (map_host_used + size < map_host_limit) {
679689
/* Allocate host memory ourselves. */
680-
mem_alloc_result = alloc_host(shared_pointer, size);
690+
mem_alloc_result = shared_alloc(shared_pointer, size);
681691

682692
assert((mem_alloc_result && shared_pointer != nullptr) ||
683693
(!mem_alloc_result && shared_pointer == nullptr));
684694
}
685695

686696
if (mem_alloc_result) {
687-
device_pointer = transform_host_to_device_pointer(shared_pointer);
697+
device_pointer = shared_to_device_pointer(shared_pointer);
688698
map_host_used += size;
689699
status = " in host memory";
690700
}
@@ -728,7 +738,7 @@ GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, const size_t pitch_
728738
mem.host_pointer != shared_pointer)
729739
{
730740
memcpy(shared_pointer, mem.host_pointer, size);
731-
util_aligned_free(mem.host_pointer, mem.memory_size());
741+
host_free(mem.type, mem.host_pointer, mem.memory_size());
732742
mem.host_pointer = shared_pointer;
733743
}
734744
mem.shared_pointer = shared_pointer;
@@ -752,7 +762,7 @@ void GPUDevice::generic_free(device_memory &mem)
752762
DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
753763

754764
/* For host mapped memory, reference counting is used to safely free it. */
755-
if (mem.is_host_mapped(this)) {
765+
if (mem.is_shared(this)) {
756766
assert(mem.shared_counter > 0);
757767
if (--mem.shared_counter == 0) {
758768
if (mem.host_pointer == mem.shared_pointer) {
@@ -764,7 +774,7 @@ void GPUDevice::generic_free(device_memory &mem)
764774
mem.host_pointer = mem.host_alloc(size);
765775
memcpy(mem.host_pointer, mem.shared_pointer, size);
766776
}
767-
free_host(mem.shared_pointer);
777+
shared_free(mem.shared_pointer);
768778
mem.shared_pointer = nullptr;
769779
}
770780
map_host_used -= mem.device_size;
@@ -791,17 +801,17 @@ void GPUDevice::generic_copy_to(device_memory &mem)
791801
/* If not host mapped, the current device only uses device memory allocated by backend
792802
* device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
793803
* copy data from mem.host_pointer. */
794-
if (!(mem.is_host_mapped(this) && mem.host_pointer == mem.shared_pointer)) {
804+
if (!(mem.is_shared(this) && mem.host_pointer == mem.shared_pointer)) {
795805
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size());
796806
}
797807
}
798808

799-
bool GPUDevice::is_host_mapped(const void *shared_pointer,
800-
const device_ptr device_pointer,
801-
Device * /*sub_device*/)
809+
bool GPUDevice::is_shared(const void *shared_pointer,
810+
const device_ptr device_pointer,
811+
Device * /*sub_device*/)
802812
{
803813
return (shared_pointer && device_pointer &&
804-
(device_ptr)transform_host_to_device_pointer(shared_pointer) == device_pointer);
814+
(device_ptr)shared_to_device_pointer(shared_pointer) == device_pointer);
805815
}
806816

807817
/* DeviceInfo */

intern/cycles/device/device.h

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -247,9 +247,9 @@ class Device {
247247
return false;
248248
}
249249

250-
virtual bool is_host_mapped(const void * /*shared_pointer*/,
251-
const device_ptr /*device_pointer*/,
252-
Device * /*sub_device*/)
250+
virtual bool is_shared(const void * /*shared_pointer*/,
251+
const device_ptr /*device_pointer*/,
252+
Device * /*sub_device*/)
253253
{
254254
return false;
255255
}
@@ -320,6 +320,9 @@ class Device {
320320
friend class DeviceServer;
321321
friend class device_memory;
322322

323+
virtual void *host_alloc(const MemoryType type, const size_t size);
324+
virtual void host_free(const MemoryType type, void *host_pointer, const size_t size);
325+
323326
virtual void mem_alloc(device_memory &mem) = 0;
324327
virtual void mem_copy_to(device_memory &mem) = 0;
325328
virtual void mem_move_to_host(device_memory &mem) = 0;
@@ -398,22 +401,21 @@ class GPUDevice : public Device {
398401
/* total - amount of device memory, free - amount of available device memory */
399402
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
400403

404+
/* Device side memory. */
401405
virtual bool alloc_device(void *&device_pointer, const size_t size) = 0;
402-
403406
virtual void free_device(void *device_pointer) = 0;
404407

405-
virtual bool alloc_host(void *&shared_pointer, const size_t size) = 0;
406-
407-
virtual void free_host(void *shared_pointer) = 0;
408-
409-
bool is_host_mapped(const void *shared_pointer,
410-
const device_ptr device_pointer,
411-
Device *sub_device) override;
412-
408+
/* Shared memory. */
409+
virtual bool shared_alloc(void *&shared_pointer, const size_t size) = 0;
410+
virtual void shared_free(void *shared_pointer) = 0;
411+
bool is_shared(const void *shared_pointer,
412+
const device_ptr device_pointer,
413+
Device *sub_device) override;
413414
/* This function should return device pointer corresponding to shared pointer, which
414-
* is host buffer, allocated in `alloc_host`. */
415-
virtual void *transform_host_to_device_pointer(const void *shared_pointer) = 0;
415+
* is host buffer, allocated in `shared_alloc`. */
416+
virtual void *shared_to_device_pointer(const void *shared_pointer) = 0;
416417

418+
/* Memory copy. */
417419
virtual void copy_host_to_device(void *device_pointer,
418420
void *host_pointer,
419421
const size_t size) = 0;

intern/cycles/device/hip/device_impl.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ void HIPDevice::free_device(void *device_pointer)
498498
hip_assert(hipFree((hipDeviceptr_t)device_pointer));
499499
}
500500

501-
bool HIPDevice::alloc_host(void *&shared_pointer, const size_t size)
501+
bool HIPDevice::shared_alloc(void *&shared_pointer, const size_t size)
502502
{
503503
HIPContextScope scope(this);
504504

@@ -508,14 +508,14 @@ bool HIPDevice::alloc_host(void *&shared_pointer, const size_t size)
508508
return mem_alloc_result == hipSuccess;
509509
}
510510

511-
void HIPDevice::free_host(void *shared_pointer)
511+
void HIPDevice::shared_free(void *shared_pointer)
512512
{
513513
HIPContextScope scope(this);
514514

515515
hipHostFree(shared_pointer);
516516
}
517517

518-
void *HIPDevice::transform_host_to_device_pointer(const void *shared_pointer)
518+
void *HIPDevice::shared_to_device_pointer(const void *shared_pointer)
519519
{
520520
HIPContextScope scope(this);
521521
void *device_pointer = nullptr;
@@ -608,7 +608,7 @@ void HIPDevice::mem_zero(device_memory &mem)
608608
return;
609609
}
610610

611-
if (!(mem.is_host_mapped(this) && mem.host_pointer == mem.shared_pointer)) {
611+
if (!(mem.is_shared(this) && mem.host_pointer == mem.shared_pointer)) {
612612
const HIPContextScope scope(this);
613613
hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
614614
}

intern/cycles/device/hip/device_impl.h

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -60,39 +60,41 @@ class HIPDevice : public GPUDevice {
6060
bool load_kernels(const uint kernel_features) override;
6161
void reserve_local_memory(const uint kernel_features);
6262

63-
void get_device_memory_info(size_t &total, size_t &free) override;
64-
bool alloc_device(void *&device_pointer, const size_t size) override;
65-
void free_device(void *device_pointer) override;
66-
bool alloc_host(void *&shared_pointer, const size_t size) override;
67-
void free_host(void *shared_pointer) override;
68-
void *transform_host_to_device_pointer(const void *shared_pointer) override;
69-
void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override;
70-
63+
/* All memory types. */
7164
void mem_alloc(device_memory &mem) override;
72-
7365
void mem_copy_to(device_memory &mem) override;
74-
7566
void mem_move_to_host(device_memory &mem) override;
76-
7767
void mem_copy_from(
7868
device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override;
79-
8069
void mem_zero(device_memory &mem) override;
81-
8270
void mem_free(device_memory &mem) override;
8371

8472
device_ptr mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/) override;
8573

86-
void const_copy_to(const char *name, void *host, const size_t size) override;
87-
74+
/* Global memory. */
8875
void global_alloc(device_memory &mem);
8976
void global_copy_to(device_memory &mem);
9077
void global_free(device_memory &mem);
9178

79+
/* Texture memory. */
9280
void tex_alloc(device_texture &mem);
9381
void tex_copy_to(device_texture &mem);
9482
void tex_free(device_texture &mem);
9583

84+
/* Device side memory. */
85+
void get_device_memory_info(size_t &total, size_t &free) override;
86+
bool alloc_device(void *&device_pointer, const size_t size) override;
87+
void free_device(void *device_pointer) override;
88+
89+
/* Shared memory. */
90+
bool shared_alloc(void *&shared_pointer, const size_t size) override;
91+
void shared_free(void *shared_pointer) override;
92+
void *shared_to_device_pointer(const void *shared_pointer) override;
93+
94+
/* Memory copy. */
95+
void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override;
96+
void const_copy_to(const char *name, void *host, const size_t size) override;
97+
9698
/* Graphics resources interoperability. */
9799
bool should_use_graphics_interop() override;
98100

intern/cycles/device/memory.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ void *device_memory::host_alloc(const size_t size)
4545
return nullptr;
4646
}
4747

48-
void *ptr = util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES);
48+
void *ptr = device->host_alloc(type, size);
4949

5050
if (ptr == nullptr) {
5151
throw std::bad_alloc();
@@ -58,7 +58,7 @@ void device_memory::host_and_device_free()
5858
{
5959
if (host_pointer) {
6060
if (host_pointer != shared_pointer) {
61-
util_aligned_free(host_pointer, memory_size());
61+
device->host_free(type, host_pointer, memory_size());
6262
}
6363
host_pointer = nullptr;
6464
}
@@ -136,9 +136,9 @@ bool device_memory::is_resident(Device *sub_device) const
136136
return device->is_resident(device_pointer, sub_device);
137137
}
138138

139-
bool device_memory::is_host_mapped(Device *sub_device) const
139+
bool device_memory::is_shared(Device *sub_device) const
140140
{
141-
return device->is_host_mapped(shared_pointer, device_pointer, sub_device);
141+
return device->is_shared(shared_pointer, device_pointer, sub_device);
142142
}
143143

144144
/* Device Sub `ptr`. */

intern/cycles/device/memory.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ class device_memory {
255255
void restore_device();
256256

257257
bool is_resident(Device *sub_device) const;
258-
bool is_host_mapped(Device *sub_device) const;
258+
bool is_shared(Device *sub_device) const;
259259

260260
/* No copying and allowed.
261261
*

intern/cycles/device/metal/device_impl.mm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -731,7 +731,7 @@
731731
if (mem.host_pointer && mem.host_pointer != mmem->hostPtr) {
732732
memcpy(mmem->hostPtr, mem.host_pointer, size);
733733

734-
util_aligned_free(mem.host_pointer, mem.memory_size());
734+
host_free(mem.type, mem.host_pointer, mem.memory_size());
735735
mem.host_pointer = mmem->hostPtr;
736736
}
737737
mem.shared_pointer = mmem->hostPtr;

0 commit comments

Comments
 (0)