Skip to content

Commit ff3ded1

Browse files
committed
gguf : read / write sample models
1 parent e46870f commit ff3ded1

File tree

3 files changed

+383
-70
lines changed

3 files changed

+383
-70
lines changed

examples/gguf/gguf.cpp

+313-5
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,321 @@
11
#include "ggml.h"
22

33
#include <cstdio>
4+
#include <cinttypes>
45
#include <string>
6+
#include <sstream>
7+
#include <fstream>
8+
#include <vector>
59

6-
bool gguf_write(const std::string & fname) {
10+
enum gguf_type {
11+
GGUF_TYPE_UINT8 = 0,
12+
GGUF_TYPE_INT8 = 1,
13+
GGUF_TYPE_UINT16 = 2,
14+
GGUF_TYPE_INT16 = 3,
15+
GGUF_TYPE_UINT32 = 4,
16+
GGUF_TYPE_INT32 = 5,
17+
GGUF_TYPE_FLOAT32 = 6,
18+
GGUF_TYPE_BOOL = 7,
19+
GGUF_TYPE_STRING = 8,
20+
GGUF_TYPE_ARRAY = 9,
21+
};
722

23+
template<typename T>
24+
static std::string to_string(const T & val) {
25+
std::stringstream ss;
26+
ss << val;
27+
return ss.str();
28+
}
29+
30+
void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
31+
const int32_t n = val.size();
32+
fout.write((const char *) &n, sizeof(n));
33+
fout.write(val.c_str(), n);
34+
}
35+
36+
void gguf_ex_write_i32(std::ofstream & fout, int32_t val) {
37+
fout.write((const char *) &val, sizeof(val));
38+
}
39+
40+
void gguf_ex_write_u64(std::ofstream & fout, size_t val) {
41+
fout.write((const char *) &val, sizeof(val));
42+
}
43+
44+
template<typename T>
45+
void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
46+
gguf_ex_write_str(fout, key);
47+
fout.write((const char *) &type, sizeof(type));
48+
fout.write((const char *) &val, sizeof(val));
49+
50+
fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), to_string(val).c_str());
51+
}
52+
53+
template<>
54+
void gguf_ex_write_param<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
55+
gguf_ex_write_str(fout, key);
56+
fout.write((const char *) &type, sizeof(type));
57+
58+
const int32_t n = val.size();
59+
fout.write((const char *) &n, sizeof(n));
60+
fout.write(val.c_str(), n);
61+
}
62+
63+
bool gguf_ex_write(const std::string & fname) {
64+
std::ofstream fout(fname.c_str(), std::ios::binary);
65+
66+
{
67+
const int32_t magic = GGUF_MAGIC;
68+
fout.write((const char *) &magic, sizeof(magic));
69+
}
70+
71+
{
72+
const int32_t version = GGUF_VERSION;
73+
fout.write((const char *) &version, sizeof(version));
74+
}
75+
76+
const int n_tensors = 10;
77+
const int n_kv = 9;
78+
79+
fout.write((const char*) &n_tensors, sizeof(n_tensors));
80+
fout.write((const char*) &n_kv, sizeof(n_kv));
81+
82+
fprintf(stdout, "%s: write header\n", __func__);
83+
84+
// kv data
85+
{
86+
gguf_ex_write_param< uint8_t>(fout, "some.parameter.uint8", GGUF_TYPE_UINT8, 0x12);
87+
gguf_ex_write_param< int8_t>(fout, "some.parameter.int8", GGUF_TYPE_INT8, -0x13);
88+
gguf_ex_write_param<uint16_t>(fout, "some.parameter.uint16", GGUF_TYPE_UINT16, 0x1234);
89+
gguf_ex_write_param< int16_t>(fout, "some.parameter.int16", GGUF_TYPE_INT16, -0x1235);
90+
gguf_ex_write_param<uint32_t>(fout, "some.parameter.uint32", GGUF_TYPE_UINT32, 0x12345678);
91+
gguf_ex_write_param< int32_t>(fout, "some.parameter.int32", GGUF_TYPE_INT32, -0x12345679);
92+
93+
gguf_ex_write_param<float> (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
94+
gguf_ex_write_param<bool> (fout, "some.parameter.bool", GGUF_TYPE_BOOL, true);
95+
96+
gguf_ex_write_param<std::string>(fout, "some.parameter.string", GGUF_TYPE_STRING, "hello world");
97+
}
98+
99+
uint64_t offset_tensor = 0;
100+
101+
struct ggml_init_params params = {
102+
/*.mem_size =*/ 128ull*1024ull*1024ull,
103+
/*.mem_buffer =*/ NULL,
104+
/*.no_alloc =*/ false,
105+
};
106+
107+
struct ggml_context * ctx_data = ggml_init(params);
108+
109+
// tensor infos
110+
for (int i = 0; i < n_tensors; ++i) {
111+
const std::string name = "tensor_" + to_string(i);
112+
113+
int64_t ne[GGML_MAX_DIMS] = { 1 };
114+
int32_t n_dims = rand() % GGML_MAX_DIMS + 1;
115+
116+
for (int j = 0; j < n_dims; ++j) {
117+
ne[j] = rand() % 10 + 1;
118+
}
119+
120+
struct ggml_tensor * cur = ggml_new_tensor(ctx_data, GGML_TYPE_F32, n_dims, ne);
121+
ggml_set_name(cur, name.c_str());
122+
123+
{
124+
float * data = (float *) cur->data;
125+
for (int j = 0; j < ggml_nelements(cur); ++j) {
126+
data[j] = 100 + i;
127+
}
128+
}
129+
130+
fprintf(stdout, "%s: tensor: %s, %d dims, ne = [", __func__, name.c_str(), n_dims);
131+
for (int j = 0; j < 4; ++j) {
132+
fprintf(stdout, "%s%3d", j == 0 ? "" : ", ", (int) cur->ne[j]);
133+
}
134+
fprintf(stdout, "], offset_tensor = %6" PRIu64 "\n", offset_tensor);
135+
136+
gguf_ex_write_str(fout, name);
137+
gguf_ex_write_i32(fout, n_dims);
138+
for (int j = 0; j < n_dims; ++j) {
139+
gguf_ex_write_i32(fout, cur->ne[j]);
140+
}
141+
gguf_ex_write_i32(fout, cur->type);
142+
gguf_ex_write_u64(fout, offset_tensor);
143+
144+
offset_tensor += GGML_PAD(ggml_nbytes(cur), GGUF_DEFAULT_ALIGNMENT);
145+
}
146+
147+
const uint64_t offset_data = GGML_PAD((uint64_t) fout.tellp(), GGUF_DEFAULT_ALIGNMENT);
148+
149+
fprintf(stdout, "%s: data offset = %" PRIu64 "\n", __func__, offset_data);
150+
151+
{
152+
const size_t pad = offset_data - fout.tellp();
153+
154+
for (size_t j = 0; j < pad; ++j) {
155+
fout.put(0);
156+
}
157+
}
158+
159+
for (int i = 0; i < n_tensors; ++i) {
160+
fprintf(stdout, "%s: writing tensor %d data\n", __func__, i);
161+
162+
const std::string name = "tensor_" + to_string(i);
163+
164+
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name.c_str());
165+
166+
fout.write((const char *) cur->data, ggml_nbytes(cur));
167+
168+
{
169+
const size_t pad = GGML_PAD(ggml_nbytes(cur), GGUF_DEFAULT_ALIGNMENT) - ggml_nbytes(cur);
170+
171+
for (size_t j = 0; j < pad; ++j) {
172+
fout.put(0);
173+
}
174+
}
175+
}
176+
177+
fout.close();
178+
179+
fprintf(stdout, "%s: wrote file '%s;\n", __func__, fname.c_str());
180+
181+
ggml_free(ctx_data);
8182

9183
return true;
10184
}
11185

12-
bool gguf_read(const std::string & fname) {
186+
// just read tensor info
187+
bool gguf_ex_read_0(const std::string & fname) {
188+
struct gguf_init_params params = {
189+
/*.no_alloc = */ false,
190+
/*.ctx = */ NULL,
191+
};
192+
193+
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
194+
195+
fprintf(stdout, "version: %d\n", gguf_get_version(ctx));
196+
fprintf(stdout, "alignment: %zu\n", gguf_get_alignment(ctx));
197+
fprintf(stdout, "data offset: %zu\n", gguf_get_data_offset(ctx));
198+
199+
// kv
200+
{
201+
const int n_kv = gguf_get_n_kv(ctx);
202+
203+
fprintf(stdout, "n_kv: %d\n", n_kv);
204+
205+
for (int i = 0; i < n_kv; ++i) {
206+
const char * key = gguf_get_key(ctx, i);
207+
208+
fprintf(stdout, "kv[%d]: key = %s\n", i, key);
209+
}
210+
}
211+
212+
// tensor info
213+
{
214+
const int n_tensors = gguf_get_n_tensors(ctx);
215+
216+
fprintf(stdout, "n_tensors: %d\n", n_tensors);
217+
218+
for (int i = 0; i < n_tensors; ++i) {
219+
const char * name = gguf_get_tensor_name(ctx, i);
220+
const size_t offset = gguf_get_tensor_offset(ctx, i);
221+
222+
fprintf(stdout, "tensor[%d]: name = %s, offset = %zu\n", i, name, offset);
223+
}
224+
}
225+
226+
return true;
227+
}
228+
229+
// read and create ggml_context containing the tensors
230+
bool gguf_ex_read_1(const std::string & fname) {
231+
struct ggml_context * ctx_data = NULL;
232+
233+
struct gguf_init_params params = {
234+
/*.no_alloc = */ false,
235+
/*.ctx = */ &ctx_data,
236+
};
237+
238+
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
239+
240+
fprintf(stdout, "version: %d\n", gguf_get_version(ctx));
241+
fprintf(stdout, "alignment: %zu\n", gguf_get_alignment(ctx));
242+
fprintf(stdout, "data offset: %zu\n", gguf_get_data_offset(ctx));
243+
244+
// kv
245+
{
246+
const int n_kv = gguf_get_n_kv(ctx);
247+
248+
fprintf(stdout, "n_kv: %d\n", n_kv);
249+
250+
for (int i = 0; i < n_kv; ++i) {
251+
const char * key = gguf_get_key(ctx, i);
252+
253+
fprintf(stdout, "kv[%d]: key = %s\n", i, key);
254+
}
255+
}
256+
257+
// tensor info
258+
{
259+
const int n_tensors = gguf_get_n_tensors(ctx);
260+
261+
fprintf(stdout, "n_tensors: %d\n", n_tensors);
262+
263+
for (int i = 0; i < n_tensors; ++i) {
264+
const char * name = gguf_get_tensor_name(ctx, i);
265+
const size_t offset = gguf_get_tensor_offset(ctx, i);
266+
267+
fprintf(stdout, "tensor[%d]: name = %s, offset = %zu\n", i, name, offset);
268+
}
269+
}
270+
271+
// data
272+
{
273+
const int n_tensors = gguf_get_n_tensors(ctx);
274+
275+
for (int i = 0; i < n_tensors; ++i) {
276+
fprintf(stdout, "%s: reading tensor %d data\n", __func__, i);
277+
278+
const std::string name = "tensor_" + to_string(i);
279+
280+
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name.c_str());
281+
282+
fprintf(stdout, "tensor[%d]: name = %s, data = %p\n", i, name.c_str(), cur->data);
283+
284+
// check data
285+
{
286+
const float * data = (const float *) cur->data;
287+
for (int j = 0; j < ggml_nelements(cur); ++j) {
288+
if (data[j] != 100 + i) {
289+
fprintf(stderr, "tensor[%d]: data[%d] = %f\n", i, j, data[j]);
290+
return false;
291+
}
292+
}
293+
}
294+
}
295+
}
296+
297+
ggml_free(ctx_data);
298+
gguf_free(ctx);
299+
300+
return true;
301+
}
302+
303+
// read tensor info and mmap the data
304+
bool gguf_ex_read_2(const std::string & fname) {
305+
struct ggml_context * ctx_data = NULL;
306+
307+
struct gguf_init_params params = {
308+
/*.no_alloc = */ true,
309+
/*.ctx = */ &ctx_data,
310+
};
311+
312+
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
313+
314+
// TODO
315+
316+
ggml_free(ctx_data);
317+
gguf_free(ctx);
318+
13319
return true;
14320
}
15321

@@ -20,14 +326,16 @@ int main(int argc, char ** argv) {
20326
}
21327

22328
const std::string fname(argv[1]);
23-
const std::string mode(argv[2]);
329+
const std::string mode (argv[2]);
24330

25331
GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
26332

27333
if (mode == "w") {
28-
GGML_ASSERT(gguf_write(fname) && "failed to write gguf file");
334+
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
29335
} else if (mode == "r") {
30-
GGML_ASSERT(gguf_read(fname) && "failed to read gguf file");
336+
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
337+
GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
338+
GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
31339
}
32340

33341
return 0;

0 commit comments

Comments
 (0)