@@ -568,7 +568,7 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
568
568
569
569
heap->need += size_aligned;
570
570
571
- if (!heap->fail && heap-> need > [heap->obj maxAvailableSizeWithAlignment: alignment]) {
571
+ if (!heap->fail && size_aligned > [heap->obj maxAvailableSizeWithAlignment: alignment]) {
572
572
heap->fail = 1 ;
573
573
}
574
574
@@ -2278,11 +2278,13 @@ static bool ggml_metal_encode_node(
2278
2278
/* .nb3 =*/ nb03,
2279
2279
};
2280
2280
2281
- id <MTLBuffer > id_src0h = ggml_metal_heap_alloc (heap, ggml_nbytes (src0), 32 );
2281
+ id <MTLBuffer > id_src0h = ggml_metal_heap_alloc (heap, ggml_nbytes (src0), 64 * 1024 );
2282
2282
if (!id_src0h) {
2283
- // GGML_LOG_ERROR("%s: failed to allocate buffer for cpy, size = %zu, need = %zu, max available = %zu\n",
2284
- // __func__, ggml_nbytes(src0), heap->need, [heap->obj maxAvailableSizeWithAlignment:32]);
2285
- return false ;
2283
+ // GGML_LOG_ERROR("%s: failed to allocate buffer, idx = %4d, size = %8zu, need = %8zu, max available = %9zu, heap size = %9zu, heap used = %zu\n",
2284
+ // __func__, idx, ggml_nbytes(src0), heap->need, [heap->obj maxAvailableSizeWithAlignment:0], [heap->obj size], [heap->obj usedSize]);
2285
+ return true ;
2286
+ } else {
2287
+ // GGML_LOG_ERROR("%s: allocated %zu\n", __func__, ggml_nbytes(src0));
2286
2288
}
2287
2289
2288
2290
if (src0->type == GGML_TYPE_F16) {
@@ -4689,7 +4691,7 @@ static enum ggml_status ggml_metal_graph_compute(
4689
4691
// number of threads in addition to the main thread
4690
4692
const int n_cb = ctx->n_cb ;
4691
4693
4692
- int n_try = 64 ;
4694
+ int n_try = 2 ;
4693
4695
4694
4696
// submit the ggml compute graph to the GPU by creating command buffers and encoding the ops in them
4695
4697
// the first n_nodes_0 are encoded and submitted for processing directly by the calling thread
@@ -4816,7 +4818,7 @@ static enum ggml_status ggml_metal_graph_compute(
4816
4818
for (int i = 0 ; i <= n_cb; ++i) {
4817
4819
struct ggml_metal_heap * heap = ctx->cmd_bufs [i].heap ;
4818
4820
4819
- const size_t need = 4 * heap->need ;
4821
+ const size_t need = heap->need ;
4820
4822
4821
4823
// printf("\nXXXXXXXXXXXXXXXXX cb %d, need = %zu, fail = %d, size = %zu\n", i, need, heap->fail, [heap->obj currentAllocatedSize]);
4822
4824
0 commit comments