Skip to content

Commit ef482eb

Browse files
committed
gguf : add comments
1 parent 5628ec7 commit ef482eb

File tree

1 file changed

+114
-98
lines changed

1 file changed

+114
-98
lines changed

ggml.c

+114-98
Original file line numberDiff line numberDiff line change
@@ -18407,106 +18407,120 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1840718407
// offset from start of file
1840818408
size_t offset = 0;
1840918409

18410-
// check the magic before making allocations
1841118410
uint32_t magic = 0;
18412-
gguf_fread_el(&magic, sizeof(magic), file, &offset);
18413-
if (magic != GGUF_MAGIC) {
18414-
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
18415-
fclose(file);
18416-
return NULL;
18411+
18412+
// check the magic before making allocations
18413+
{
18414+
gguf_fread_el(&magic, sizeof(magic), file, &offset);
18415+
18416+
if (magic != GGUF_MAGIC) {
18417+
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
18418+
fclose(file);
18419+
return NULL;
18420+
}
1841718421
}
1841818422

1841918423
bool ok = true;
1842018424

1842118425
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
1842218426

18423-
ctx->header.magic = magic;
18424-
ctx->header.kv = NULL;
18427+
// read the header
18428+
{
18429+
ctx->header.magic = magic;
18430+
ctx->header.kv = NULL;
1842518431

18426-
ctx->infos = NULL;
18427-
ctx->data = NULL;
18432+
ctx->infos = NULL;
18433+
ctx->data = NULL;
1842818434

18429-
ok = ok && gguf_fread_el(&ctx->header.version, sizeof(ctx->header.version), file, &offset);
18430-
ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
18431-
ok = ok && gguf_fread_el(&ctx->header.n_kv, sizeof(ctx->header.n_kv), file, &offset);
18435+
ok = ok && gguf_fread_el(&ctx->header.version, sizeof(ctx->header.version), file, &offset);
18436+
ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
18437+
ok = ok && gguf_fread_el(&ctx->header.n_kv, sizeof(ctx->header.n_kv), file, &offset);
1843218438

18433-
if (!ok) {
18434-
fprintf(stderr, "%s: failed to read header\n", __func__);
18435-
fclose(file);
18436-
gguf_free(ctx);
18437-
return NULL;
18439+
if (!ok) {
18440+
fprintf(stderr, "%s: failed to read header\n", __func__);
18441+
fclose(file);
18442+
gguf_free(ctx);
18443+
return NULL;
18444+
}
1843818445
}
1843918446

18440-
ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
18441-
18442-
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
18443-
struct gguf_kv * kv = &ctx->header.kv[i];
18444-
18445-
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
18447+
// read the kv pairs
18448+
{
18449+
ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
1844618450

18447-
ok = ok && gguf_fread_str(&kv->key, file, &offset);
18448-
//ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
18449-
ok = ok && gguf_fread_el (&kv->type, sizeof(kv->type), file, &offset);
18451+
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
18452+
struct gguf_kv * kv = &ctx->header.kv[i];
1845018453

18451-
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
18454+
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
18455+
18456+
ok = ok && gguf_fread_str(&kv->key, file, &offset);
18457+
//ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
18458+
ok = ok && gguf_fread_el (&kv->type, sizeof(kv->type), file, &offset);
18459+
18460+
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
18461+
18462+
switch (kv->type) {
18463+
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (&kv->value.uint8, sizeof(kv->value.uint8), file, &offset); break;
18464+
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (&kv->value.int8, sizeof(kv->value.int8), file, &offset); break;
18465+
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (&kv->value.uint16, sizeof(kv->value.uint16), file, &offset); break;
18466+
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (&kv->value.int16, sizeof(kv->value.int16), file, &offset); break;
18467+
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (&kv->value.uint32, sizeof(kv->value.uint32), file, &offset); break;
18468+
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (&kv->value.int32, sizeof(kv->value.int32), file, &offset); break;
18469+
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
18470+
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
18471+
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
18472+
case GGUF_TYPE_ARRAY:
18473+
GGML_ASSERT("gguf: array type not implemented");
18474+
break;
18475+
};
1845218476

18453-
switch (kv->type) {
18454-
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (&kv->value.uint8, sizeof(kv->value.uint8), file, &offset); break;
18455-
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (&kv->value.int8, sizeof(kv->value.int8), file, &offset); break;
18456-
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (&kv->value.uint16, sizeof(kv->value.uint16), file, &offset); break;
18457-
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (&kv->value.int16, sizeof(kv->value.int16), file, &offset); break;
18458-
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (&kv->value.uint32, sizeof(kv->value.uint32), file, &offset); break;
18459-
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (&kv->value.int32, sizeof(kv->value.int32), file, &offset); break;
18460-
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
18461-
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
18462-
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
18463-
case GGUF_TYPE_ARRAY:
18464-
GGML_ASSERT("gguf: array type not implemented");
18465-
break;
18466-
};
18477+
if (!ok) {
18478+
break;
18479+
}
18480+
}
1846718481

1846818482
if (!ok) {
18469-
break;
18483+
fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
18484+
fclose(file);
18485+
gguf_free(ctx);
18486+
return NULL;
1847018487
}
1847118488
}
1847218489

18473-
if (!ok) {
18474-
fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
18475-
fclose(file);
18476-
gguf_free(ctx);
18477-
return NULL;
18478-
}
18479-
18480-
ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
18490+
// read the tensor infos
18491+
{
18492+
ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
1848118493

18482-
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
18483-
struct gguf_tensor_info * info = &ctx->infos[i];
18494+
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
18495+
struct gguf_tensor_info * info = &ctx->infos[i];
1848418496

18485-
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
18486-
info->ne[j] = 1;
18487-
}
18497+
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
18498+
info->ne[j] = 1;
18499+
}
1848818500

18489-
ok = ok && gguf_fread_str(&info->name, file, &offset);
18490-
ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims), file, &offset);
18491-
for (uint32_t j = 0; j < info->n_dims; ++j) {
18492-
ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
18493-
}
18494-
//ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms), file, &offset);
18495-
ok = ok && gguf_fread_el (&info->type, sizeof(info->type), file, &offset);
18496-
ok = ok && gguf_fread_el (&info->offset, sizeof(info->offset), file, &offset);
18501+
ok = ok && gguf_fread_str(&info->name, file, &offset);
18502+
ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims), file, &offset);
18503+
for (uint32_t j = 0; j < info->n_dims; ++j) {
18504+
ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
18505+
}
18506+
//ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms), file, &offset);
18507+
ok = ok && gguf_fread_el (&info->type, sizeof(info->type), file, &offset);
18508+
ok = ok && gguf_fread_el (&info->offset, sizeof(info->offset), file, &offset);
1849718509

18498-
if (!ok) {
18499-
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
18500-
fclose(file);
18501-
gguf_free(ctx);
18502-
return NULL;
18510+
if (!ok) {
18511+
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
18512+
fclose(file);
18513+
gguf_free(ctx);
18514+
return NULL;
18515+
}
1850318516
}
1850418517
}
1850518518

1850618519
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
1850718520

1850818521
// TODO: determine new alignment from kv if available
1850918522

18523+
// we require the data section to be aligned, so take into account any padding
1851018524
{
1851118525
const size_t offset_pad = offset % ctx->alignment;
1851218526

@@ -18516,38 +18530,46 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1851618530
}
1851718531
}
1851818532

18533+
// store the current file offset - this is where the data section starts
1851918534
ctx->offset = offset;
1852018535

18521-
ctx->size_data = 0;
18536+
// compute the total size of the data section, taking into account the alignment
18537+
{
1852218538

18523-
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
18524-
struct gguf_tensor_info * info = &ctx->infos[i];
18539+
ctx->size_data = 0;
18540+
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
18541+
struct gguf_tensor_info * info = &ctx->infos[i];
1852518542

18526-
const int64_t ne =
18527-
(int64_t) info->ne[0] *
18528-
(int64_t) info->ne[1] *
18529-
(int64_t) info->ne[2] *
18530-
(int64_t) info->ne[3];
18543+
const int64_t ne =
18544+
(int64_t) info->ne[0] *
18545+
(int64_t) info->ne[1] *
18546+
(int64_t) info->ne[2] *
18547+
(int64_t) info->ne[3];
1853118548

18532-
if (ne % ggml_blck_size(info->type) != 0) {
18533-
fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n",
18534-
__func__, info->name.data, ne, ggml_blck_size(info->type));
18535-
fclose(file);
18536-
gguf_free(ctx);
18537-
return NULL;
18538-
}
18549+
if (ne % ggml_blck_size(info->type) != 0) {
18550+
fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n",
18551+
__func__, info->name.data, ne, ggml_blck_size(info->type));
18552+
fclose(file);
18553+
gguf_free(ctx);
18554+
return NULL;
18555+
}
1853918556

18540-
const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
18557+
const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
1854118558

18542-
ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
18559+
ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
18560+
}
1854318561
}
1854418562

18545-
// load the tensor data
18546-
// TODO: simplify
18563+
// load the tensor data only if requested
1854718564
if (params.ctx != NULL) {
18565+
// if the provided ggml_context is no_alloc, then we create "empty" tensors and do not read the binary blob
18566+
// otherwise, we load the binary blob into the provided ggml_context as well, and point the "data" members of
18567+
// the ggml_tensor structs to the appropriate locations in the binary blob
18568+
18569+
// compute the exact size needed for the ggml_context
1854818570
const size_t mem_size =
1854918571
params.no_alloc ?
18550-
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() :
18572+
(ctx->header.n_tensors )*ggml_tensor_overhead() :
1855118573
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;
1855218574

1855318575
struct ggml_init_params pdata = {
@@ -18567,7 +18589,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1856718589

1856818590
ok = ok && data != NULL;
1856918591

18570-
// read the tensor data
18592+
// read the binary blob with the tensor data
1857118593
ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);
1857218594

1857318595
if (!ok) {
@@ -18602,14 +18624,15 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1860218624
break;
1860318625
}
1860418626

18627+
// point the data member to the appropriate location in the binary blob using the tensor infos
1860518628
if (params.no_alloc == false) {
1860618629
//cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
1860718630
cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data
1860818631
}
1860918632
}
1861018633

1861118634
if (!ok) {
18612-
fprintf(stderr, "%s: failed to create tensors\n", __func__);
18635+
fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
1861318636
fclose(file);
1861418637
ggml_free(ctx_data);
1861518638
gguf_free(ctx);
@@ -18619,13 +18642,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1861918642
ggml_set_no_alloc(ctx_data, params.no_alloc);
1862018643
}
1862118644

18622-
if (!ok) {
18623-
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
18624-
fclose(file);
18625-
gguf_free(ctx);
18626-
return NULL;
18627-
}
18628-
1862918645
return ctx;
1863018646
}
1863118647

0 commit comments

Comments
 (0)