1
1
#include " ggml-rpc.h"
2
2
#include " ggml-impl.h"
3
3
#include " ggml-backend-impl.h"
4
+ #include " ggml-cpp.h"
4
5
5
6
#include < cinttypes>
6
7
#include < string>
@@ -853,12 +854,13 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
853
854
/* .no_alloc =*/ true ,
854
855
};
855
856
856
- struct ggml_context * ctx = ggml_init (params);
857
+ ggml_context_ptr ctx_ptr { ggml_init (params) };
858
+ GGML_ASSERT (ctx_ptr != nullptr );
859
+ ggml_context * ctx = ctx_ptr.get ();
857
860
ggml_tensor * tensor = deserialize_tensor (ctx, &request.tensor );
858
861
859
862
if (tensor == nullptr ) {
860
863
GGML_LOG_ERROR (" Null tensor pointer passed to server get_alloc_size function.\n " );
861
- ggml_free (ctx);
862
864
return false ;
863
865
}
864
866
@@ -871,7 +873,6 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
871
873
872
874
response.alloc_size = ggml_backend_buft_get_alloc_size (buft,tensor);
873
875
874
- ggml_free (ctx);
875
876
return true ;
876
877
}
877
878
@@ -985,11 +986,12 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
985
986
/* .mem_buffer =*/ NULL ,
986
987
/* .no_alloc =*/ true ,
987
988
};
988
- struct ggml_context * ctx = ggml_init (params);
989
+ ggml_context_ptr ctx_ptr { ggml_init (params) };
990
+ GGML_ASSERT (ctx_ptr != nullptr );
991
+ ggml_context * ctx = ctx_ptr.get ();
989
992
ggml_tensor * tensor = deserialize_tensor (ctx, in_tensor);
990
993
if (tensor == nullptr ) {
991
994
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
992
- ggml_free (ctx);
993
995
return false ;
994
996
}
995
997
GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %zu\n " , __func__, (void *)tensor->buffer , tensor->data , offset, size);
@@ -1016,7 +1018,6 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
1016
1018
printf (" [%s] saved to '%s'\n " , __func__, cache_file.c_str ());
1017
1019
}
1018
1020
ggml_backend_tensor_set (tensor, data, offset, size);
1019
- ggml_free (ctx);
1020
1021
return true ;
1021
1022
}
1022
1023
@@ -1060,11 +1061,12 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
1060
1061
/* .mem_buffer =*/ NULL ,
1061
1062
/* .no_alloc =*/ true ,
1062
1063
};
1063
- struct ggml_context * ctx = ggml_init (params);
1064
+ ggml_context_ptr ctx_ptr { ggml_init (params) };
1065
+ GGML_ASSERT (ctx_ptr != nullptr );
1066
+ ggml_context * ctx = ctx_ptr.get ();
1064
1067
ggml_tensor * tensor = deserialize_tensor (ctx, in_tensor);
1065
1068
if (tensor == nullptr ) {
1066
1069
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
1067
- ggml_free (ctx);
1068
1070
return false ;
1069
1071
}
1070
1072
GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %zu, hash: %" PRIx64 " \n " , __func__, (void *)tensor->buffer , tensor->data , offset, size, *hash);
@@ -1080,7 +1082,6 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
1080
1082
}
1081
1083
ggml_backend_tensor_set (tensor, cached_file.data (), offset, size);
1082
1084
response.result = 1 ;
1083
- ggml_free (ctx);
1084
1085
return true ;
1085
1086
}
1086
1087
@@ -1090,11 +1091,12 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
1090
1091
/* .mem_buffer =*/ NULL ,
1091
1092
/* .no_alloc =*/ true ,
1092
1093
};
1093
- struct ggml_context * ctx = ggml_init (params);
1094
+ ggml_context_ptr ctx_ptr { ggml_init (params) };
1095
+ GGML_ASSERT (ctx_ptr != nullptr );
1096
+ ggml_context * ctx = ctx_ptr.get ();
1094
1097
ggml_tensor * tensor = deserialize_tensor (ctx, &request.tensor );
1095
1098
if (tensor == nullptr ) {
1096
1099
GGML_LOG_ERROR (" Null tensor pointer passed to server init_tensor function.\n " );
1097
- ggml_free (ctx);
1098
1100
return false ;
1099
1101
}
1100
1102
@@ -1110,11 +1112,9 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
1110
1112
// This pointer can either be passed around client/server, or probably better stored server-side and kept track of.
1111
1113
// Currently unimplemented.
1112
1114
GGML_LOG_ERROR (" tensor->extra populated by the backend, this is currently unsupported.\n " );
1113
- ggml_free (ctx);
1114
1115
return false ;
1115
1116
}
1116
1117
1117
- ggml_free (ctx);
1118
1118
return true ;
1119
1119
}
1120
1120
@@ -1124,11 +1124,12 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
1124
1124
/* .mem_buffer =*/ NULL ,
1125
1125
/* .no_alloc =*/ true ,
1126
1126
};
1127
- struct ggml_context * ctx = ggml_init (params);
1127
+ ggml_context_ptr ctx_ptr { ggml_init (params) };
1128
+ GGML_ASSERT (ctx_ptr != nullptr );
1129
+ ggml_context * ctx = ctx_ptr.get ();
1128
1130
ggml_tensor * tensor = deserialize_tensor (ctx, &request.tensor );
1129
1131
if (tensor == nullptr ) {
1130
1132
GGML_LOG_ERROR (" [%s] error deserializing tensor\n " , __func__);
1131
- ggml_free (ctx);
1132
1133
return false ;
1133
1134
}
1134
1135
GGML_PRINT_DEBUG (" [%s] buffer: %p, data: %p, offset: %" PRIu64 " , size: %" PRIu64 " \n " , __func__, (void *)tensor->buffer , tensor->data , request.offset , request.size );
@@ -1147,7 +1148,6 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
1147
1148
1148
1149
response.resize (request.size , 0 );
1149
1150
ggml_backend_tensor_get (tensor, response.data (), request.offset , request.size );
1150
- ggml_free (ctx);
1151
1151
return true ;
1152
1152
}
1153
1153
@@ -1157,12 +1157,14 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
1157
1157
/* .mem_buffer =*/ NULL,
1158
1158
/* .no_alloc =*/ true,
1159
1159
};
1160
- struct ggml_context * ctx = ggml_init(params);
1160
+ ggml_context_ptr ctx_ptr { ggml_init (params) };
1161
+ GGML_ASSERT (ctx_ptr != nullptr );
1162
+ ggml_context * ctx = ctx_ptr.get();
1163
+
1161
1164
ggml_tensor * src = deserialize_tensor(ctx, &request.src);
1162
1165
ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
1163
1166
if (src == nullptr || dst == nullptr ) {
1164
1167
GGML_LOG_ERROR (" [%s] error deserializing tensors\n " , __func__);
1165
- ggml_free (ctx);
1166
1168
return false ;
1167
1169
}
1168
1170
@@ -1180,15 +1182,13 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
1180
1182
dst_data + src_size,
1181
1183
dst_base,
1182
1184
dst_base + dst_buf_sz);
1183
- ggml_free (ctx);
1184
1185
return false ;
1185
1186
}
1186
1187
1187
1188
GGML_PRINT_DEBUG (" [%s] src->buffer: %p, dst->buffer: %p\n " ,
1188
1189
__func__, (void *) src->buffer, (void *) dst->buffer);
1189
1190
1190
1191
response.result = ggml_backend_buffer_copy_tensor(src, dst);
1191
- ggml_free (ctx);
1192
1192
return true ;
1193
1193
}
1194
1194
@@ -1242,7 +1242,9 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
1242
1242
/* .mem_buffer =*/ NULL ,
1243
1243
/* .no_alloc =*/ true ,
1244
1244
};
1245
- struct ggml_context * ctx = ggml_init (params);
1245
+ ggml_context_ptr ctx_ptr { ggml_init (params) };
1246
+ GGML_ASSERT (ctx_ptr != nullptr );
1247
+ ggml_context * ctx = ctx_ptr.get ();
1246
1248
struct ggml_cgraph * graph = ggml_new_graph_custom (ctx, n_nodes, false );
1247
1249
graph->n_nodes = n_nodes;
1248
1250
std::unordered_map<uint64_t , const rpc_tensor*> tensor_ptrs;
@@ -1257,7 +1259,6 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
1257
1259
}
1258
1260
ggml_status status = ggml_backend_graph_compute (backend, graph);
1259
1261
response.result = status;
1260
- ggml_free (ctx);
1261
1262
return true ;
1262
1263
}
1263
1264
0 commit comments