Skip to content

Commit e45b997

Browse files
Merge branch 'ggml-org:master' into master
2 parents 1a6fde1 + d6d2c2a commit e45b997

File tree

8 files changed

+177
-192
lines changed

8 files changed

+177
-192
lines changed

examples/llava/gemma3-cli.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,6 @@ int main(int argc, char ** argv) {
317317
is_first_msg = false;
318318
}
319319
}
320-
320+
llama_perf_context_print(ctx.lctx);
321321
return 0;
322322
}

examples/sycl/build.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ cd build
88
source /opt/intel/oneapi/setvars.sh
99

1010
#for FP16
11-
#cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON # faster for long-prompt inference
11+
#cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DLLAMA_CURL=OFF # faster for long-prompt inference
1212

1313
#for FP32
14-
cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
14+
cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=OFF
1515

1616
#build example/main
1717
#cmake --build . --config Release --target main

ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

Lines changed: 65 additions & 97 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,8 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
425425
}
426426
case GGML_OP_IM2COL_BACK:
427427
return src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32;
428+
case GGML_OP_GET_ROWS_BACK:
429+
return src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16;
428430
case GGML_OP_OUT_PROD:
429431
return (src0->type == GGML_TYPE_F32 || (ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
430432
src1->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "ggml-rpc.h"
22
#include "ggml-impl.h"
33
#include "ggml-backend-impl.h"
4+
#include "ggml-cpp.h"
45

56
#include <cinttypes>
67
#include <string>
@@ -853,12 +854,13 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
853854
/*.no_alloc =*/ true,
854855
};
855856

856-
struct ggml_context * ctx = ggml_init(params);
857+
ggml_context_ptr ctx_ptr { ggml_init(params) };
858+
GGML_ASSERT(ctx_ptr != nullptr);
859+
ggml_context * ctx = ctx_ptr.get();
857860
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
858861

859862
if (tensor == nullptr) {
860863
GGML_LOG_ERROR("Null tensor pointer passed to server get_alloc_size function.\n");
861-
ggml_free(ctx);
862864
return false;
863865
}
864866

@@ -871,7 +873,6 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
871873

872874
response.alloc_size = ggml_backend_buft_get_alloc_size(buft,tensor);
873875

874-
ggml_free(ctx);
875876
return true;
876877
}
877878

@@ -985,11 +986,12 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
985986
/*.mem_buffer =*/ NULL,
986987
/*.no_alloc =*/ true,
987988
};
988-
struct ggml_context * ctx = ggml_init(params);
989+
ggml_context_ptr ctx_ptr { ggml_init(params) };
990+
GGML_ASSERT(ctx_ptr != nullptr);
991+
ggml_context * ctx = ctx_ptr.get();
989992
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
990993
if (tensor == nullptr) {
991994
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
992-
ggml_free(ctx);
993995
return false;
994996
}
995997
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
@@ -1016,7 +1018,6 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
10161018
printf("[%s] saved to '%s'\n", __func__, cache_file.c_str());
10171019
}
10181020
ggml_backend_tensor_set(tensor, data, offset, size);
1019-
ggml_free(ctx);
10201021
return true;
10211022
}
10221023

@@ -1060,11 +1061,12 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
10601061
/*.mem_buffer =*/ NULL,
10611062
/*.no_alloc =*/ true,
10621063
};
1063-
struct ggml_context * ctx = ggml_init(params);
1064+
ggml_context_ptr ctx_ptr { ggml_init(params) };
1065+
GGML_ASSERT(ctx_ptr != nullptr);
1066+
ggml_context * ctx = ctx_ptr.get();
10641067
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
10651068
if (tensor == nullptr) {
10661069
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
1067-
ggml_free(ctx);
10681070
return false;
10691071
}
10701072
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size, *hash);
@@ -1080,7 +1082,6 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
10801082
}
10811083
ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
10821084
response.result = 1;
1083-
ggml_free(ctx);
10841085
return true;
10851086
}
10861087

@@ -1090,11 +1091,12 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
10901091
/*.mem_buffer =*/ NULL,
10911092
/*.no_alloc =*/ true,
10921093
};
1093-
struct ggml_context * ctx = ggml_init(params);
1094+
ggml_context_ptr ctx_ptr { ggml_init(params) };
1095+
GGML_ASSERT(ctx_ptr != nullptr);
1096+
ggml_context * ctx = ctx_ptr.get();
10941097
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
10951098
if (tensor == nullptr) {
10961099
GGML_LOG_ERROR("Null tensor pointer passed to server init_tensor function.\n");
1097-
ggml_free(ctx);
10981100
return false;
10991101
}
11001102

@@ -1110,11 +1112,9 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
11101112
// This pointer can either be passed around client/server, or probably better stored server-side and kept track of.
11111113
// Currently unimplemented.
11121114
GGML_LOG_ERROR("tensor->extra populated by the backend, this is currently unsupported.\n");
1113-
ggml_free(ctx);
11141115
return false;
11151116
}
11161117

1117-
ggml_free(ctx);
11181118
return true;
11191119
}
11201120

@@ -1124,11 +1124,12 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
11241124
/*.mem_buffer =*/ NULL,
11251125
/*.no_alloc =*/ true,
11261126
};
1127-
struct ggml_context * ctx = ggml_init(params);
1127+
ggml_context_ptr ctx_ptr { ggml_init(params) };
1128+
GGML_ASSERT(ctx_ptr != nullptr);
1129+
ggml_context * ctx = ctx_ptr.get();
11281130
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
11291131
if (tensor == nullptr) {
11301132
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
1131-
ggml_free(ctx);
11321133
return false;
11331134
}
11341135
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, request.offset, request.size);
@@ -1147,7 +1148,6 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
11471148

11481149
response.resize(request.size, 0);
11491150
ggml_backend_tensor_get(tensor, response.data(), request.offset, request.size);
1150-
ggml_free(ctx);
11511151
return true;
11521152
}
11531153

@@ -1157,12 +1157,14 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
11571157
/*.mem_buffer =*/ NULL,
11581158
/*.no_alloc =*/ true,
11591159
};
1160-
struct ggml_context * ctx = ggml_init(params);
1160+
ggml_context_ptr ctx_ptr { ggml_init(params) };
1161+
GGML_ASSERT(ctx_ptr != nullptr);
1162+
ggml_context * ctx = ctx_ptr.get();
1163+
11611164
ggml_tensor * src = deserialize_tensor(ctx, &request.src);
11621165
ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
11631166
if (src == nullptr || dst == nullptr) {
11641167
GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__);
1165-
ggml_free(ctx);
11661168
return false;
11671169
}
11681170

@@ -1180,15 +1182,13 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
11801182
dst_data + src_size,
11811183
dst_base,
11821184
dst_base + dst_buf_sz);
1183-
ggml_free(ctx);
11841185
return false;
11851186
}
11861187

11871188
GGML_PRINT_DEBUG("[%s] src->buffer: %p, dst->buffer: %p\n",
11881189
__func__, (void*) src->buffer, (void*) dst->buffer);
11891190

11901191
response.result = ggml_backend_buffer_copy_tensor(src, dst);
1191-
ggml_free(ctx);
11921192
return true;
11931193
}
11941194

@@ -1242,7 +1242,9 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
12421242
/*.mem_buffer =*/ NULL,
12431243
/*.no_alloc =*/ true,
12441244
};
1245-
struct ggml_context * ctx = ggml_init(params);
1245+
ggml_context_ptr ctx_ptr { ggml_init(params) };
1246+
GGML_ASSERT(ctx_ptr != nullptr);
1247+
ggml_context * ctx = ctx_ptr.get();
12461248
struct ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false);
12471249
graph->n_nodes = n_nodes;
12481250
std::unordered_map<uint64_t, const rpc_tensor*> tensor_ptrs;
@@ -1257,7 +1259,6 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
12571259
}
12581260
ggml_status status = ggml_backend_graph_compute(backend, graph);
12591261
response.result = status;
1260-
ggml_free(ctx);
12611262
return true;
12621263
}
12631264

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4018,8 +4018,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
40184018
return ggml_is_contiguous(op->src[0]);
40194019
}
40204020
case GGML_OP_IM2COL:
4021-
// TODO: add support for the new F32 operations
4022-
return op->src[0]->type == GGML_TYPE_F16;
4021+
return true;
40234022
case GGML_OP_UPSCALE:
40244023
return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
40254024
case GGML_OP_POOL_2D:

0 commit comments

Comments
 (0)