Skip to content

Commit 455691c

Browse files
committed
cont : use MTLHeapTypePlacement
ggml-ci
1 parent 91d5dc5 commit 455691c

File tree

1 file changed

+43
-42
lines changed

1 file changed

+43
-42
lines changed

ggml/src/ggml-metal/ggml-metal.m

+43-42
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
// max number of MTLCommandBuffer used to submit a graph for processing
2020
#define GGML_METAL_MAX_COMMAND_BUFFERS 8
2121

22-
// max number of buffers that can be allocated on the heap per command buffer
23-
#define GGML_METAL_MAX_HEAP_BUFFERS 64
24-
2522
#ifndef TARGET_OS_VISION
2623
#define TARGET_OS_VISION 0
2724
#endif
@@ -472,14 +469,15 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
472469
};
473470

474471
struct ggml_metal_heap {
475-
int n;
476472
int fail;
477473

474+
size_t offs;
478475
size_t need;
479476

480477
id<MTLDevice> device;
481478
id<MTLHeap> obj;
482-
id<MTLBuffer> bufs[GGML_METAL_MAX_HEAP_BUFFERS];
479+
480+
NSMutableArray * bufs;
483481
};
484482

485483
static struct ggml_metal_heap * ggml_metal_heap_init(id<MTLDevice> device, size_t size) {
@@ -488,7 +486,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
488486
MTLHeapDescriptor * desc = [[MTLHeapDescriptor alloc] init];
489487
desc.storageMode = MTLStorageModePrivate;
490488
desc.cpuCacheMode = MTLCPUCacheModeDefaultCache;
491-
desc.type = MTLHeapTypeAutomatic; // TODO: use MTLHeapTypePlacement
489+
desc.type = MTLHeapTypePlacement;
492490
desc.size = size;
493491

494492
heap->device = device;
@@ -501,39 +499,35 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
501499
return false;
502500
}
503501

504-
for (int i = 0; i < GGML_METAL_MAX_HEAP_BUFFERS; ++i) {
505-
heap->bufs[i] = nil;
506-
}
507-
508502
[desc release];
509503

504+
heap->bufs = [[NSMutableArray alloc] init];
505+
510506
return heap;
511507
}
512508

509+
static void ggml_metal_heap_reset(struct ggml_metal_heap * heap) {
510+
heap->fail = 0;
511+
heap->offs = 0;
512+
heap->need = 0;
513+
514+
for (id<MTLBuffer> buf in heap->bufs) {
515+
[buf release];
516+
}
517+
[heap->bufs removeAllObjects];
518+
}
519+
513520
static void ggml_metal_heap_free(struct ggml_metal_heap * heap) {
514521
if (heap == nil) {
515522
return;
516523
}
517524

518-
[heap->obj release];
519-
520-
free(heap);
521-
}
522-
523-
static void ggml_metal_heap_reset(struct ggml_metal_heap * heap) {
524-
heap->n = 0;
525-
heap->fail = 0;
526-
heap->need = 0;
525+
ggml_metal_heap_reset(heap);
527526

528-
for (int i = 0; i < GGML_METAL_MAX_HEAP_BUFFERS; i++) {
529-
if (heap->bufs[i]) {
530-
[heap->bufs[i] release];
531-
heap->bufs[i] = nil;
532-
continue;
533-
}
527+
[heap->obj release];
528+
[heap->bufs release];
534529

535-
break;
536-
}
530+
free(heap);
537531
}
538532

539533
static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
@@ -546,7 +540,7 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
546540
MTLHeapDescriptor * desc = [[MTLHeapDescriptor alloc] init];
547541
desc.storageMode = MTLStorageModePrivate;
548542
desc.cpuCacheMode = MTLCPUCacheModeDefaultCache;
549-
desc.type = MTLHeapTypeAutomatic; // TODO: use MTLHeapTypePlacement
543+
desc.type = MTLHeapTypePlacement;
550544
desc.size = size;
551545

552546
heap->obj = [heap->device newHeapWithDescriptor:desc];
@@ -571,33 +565,32 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
571565

572566
const size_t size_aligned = GGML_PAD(size, alignment);
573567

574-
//GGML_LOG_INFO("%s: size = %zu, size_aligned = %zu, need = %zu, fail = %d\n", __func__, size, size_aligned, heap->need, heap->fail);
568+
heap->offs += size_aligned;
569+
heap->need = MAX(heap->need, heap->offs + size_aligned);
575570

576-
heap->need += size_aligned;
571+
//GGML_LOG_INFO("%s: size = %zu, size_aligned = %zu, offs = %zu, need = %zu\n", __func__, size, size_aligned, offs, heap->offs, heap->need);
577572

578573
if (no_alloc) {
579574
return nil;
580575
}
581576

582-
if (!heap->fail && size_aligned > [heap->obj maxAvailableSizeWithAlignment:alignment]) {
577+
if (!heap->fail && heap->offs + size_aligned > [heap->obj size]) {
583578
heap->fail = 1;
584579
}
585580

586-
if (!heap->fail && heap->n >= GGML_METAL_MAX_HEAP_BUFFERS) {
587-
heap->fail = 2;
588-
}
589-
590581
if (heap->fail) {
591582
return nil;
592583
}
593584

594-
id<MTLBuffer> buf = [heap->obj newBufferWithLength:size_aligned options:MTLResourceStorageModePrivate];
585+
id<MTLBuffer> buf = [heap->obj newBufferWithLength:size_aligned options:MTLResourceStorageModePrivate offset:heap->offs];
595586
if (!buf) {
596587
heap->fail = 3;
597588
return nil;
598589
}
599590

600-
heap->bufs[heap->n++] = buf;
591+
[heap->bufs addObject:buf];
592+
593+
//GGML_LOG_INFO("%s: allocated buffer, size = %zu, offs = %zu, heap size = %zu, heap used = %zu\n", __func__, size_aligned, offs, [heap->obj size], [heap->obj usedSize]);
601594

602595
return buf;
603596
}
@@ -634,7 +627,6 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
634627
void (^encode_async)(size_t ith);
635628

636629
// n_cb command buffers + 1 used by the main thread
637-
//id<MTLCommandBuffer> command_buffers[GGML_METAL_MAX_COMMAND_BUFFERS + 1];
638630
struct ggml_metal_command_buffer cmd_bufs[GGML_METAL_MAX_COMMAND_BUFFERS + 1];
639631

640632
// abort ggml_metal_graph_compute if callback returns true
@@ -1638,13 +1630,16 @@ static bool ggml_metal_encode_node(
16381630
// heap buffers for temporary data
16391631
id<MTLBuffer> h_src0 = nil;
16401632

1633+
// always allocate buffers from the start of the heap for the current node
1634+
heap->offs = 0;
1635+
16411636
switch (dst->op) {
16421637
case GGML_OP_SOFT_MAX:
16431638
{
16441639
h_src0 = ggml_metal_heap_alloc(heap, ggml_nbytes(src0), no_alloc);
16451640
if (!no_alloc && !h_src0) {
1646-
GGML_LOG_ERROR("%s: failed to allocate buffer, idx = %4d, size = %8zu, need = %8zu, max available = %9zu, heap size = %9zu, heap used = %zu, fail = %d\n",
1647-
__func__, idx, ggml_nbytes(src0), heap->need, [heap->obj maxAvailableSizeWithAlignment:0], [heap->obj size], [heap->obj usedSize], heap->fail);
1641+
GGML_LOG_ERROR("%s: failed to allocate buffer, idx = %4d, size = %8zu, offs = %8zu, max available = %9zu, heap size = %9zu, heap used = %zu, fail = %d\n",
1642+
__func__, idx, ggml_nbytes(src0), heap->offs, [heap->obj maxAvailableSizeWithAlignment:0], [heap->obj size], [heap->obj usedSize], heap->fail);
16481643
return false;
16491644
}
16501645
} break;
@@ -2250,8 +2245,6 @@ static bool ggml_metal_encode_node(
22502245
{
22512246
GGML_ASSERT(!src1 || src1->type == GGML_TYPE_F16 || src1->type == GGML_TYPE_F32);
22522247

2253-
GGML_ASSERT(ggml_is_contiguous(src0));
2254-
22552248
int nth = 32; // SIMD width
22562249

22572250
id<MTLComputePipelineState> pipeline = nil;
@@ -4836,6 +4829,12 @@ static enum ggml_status ggml_metal_graph_compute(
48364829
[next_buffer commit];
48374830
}
48384831

4832+
for (int i = 0; i <= n_cb; ++i) {
4833+
struct ggml_metal_heap * heap = ctx->cmd_bufs[i].heap;
4834+
4835+
[heap->obj setPurgeableState:MTLPurgeableStateEmpty];
4836+
}
4837+
48394838
if (!should_capture && ctx->capture_started) {
48404839
[ctx->capture_scope endScope];
48414840
[[MTLCaptureManager sharedCaptureManager] stopCapture];
@@ -5233,6 +5232,8 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
52335232
}
52345233
}
52355234

5235+
//GGML_LOG_INFO("XXXXXXXXXXXXXXXXXXXXXXXXX\n");
5236+
52365237
if (can_compute) {
52375238
for (int idx = node_start; idx < node_end; ++idx) {
52385239
if (should_capture) {

0 commit comments

Comments
 (0)