@@ -18407,106 +18407,120 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18407
18407
// offset from start of file
18408
18408
size_t offset = 0;
18409
18409
18410
- // check the magic before making allocations
18411
18410
uint32_t magic = 0;
18412
- gguf_fread_el(&magic, sizeof(magic), file, &offset);
18413
- if (magic != GGUF_MAGIC) {
18414
- fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
18415
- fclose(file);
18416
- return NULL;
18411
+
18412
+ // check the magic before making allocations
18413
+ {
18414
+ gguf_fread_el(&magic, sizeof(magic), file, &offset);
18415
+
18416
+ if (magic != GGUF_MAGIC) {
18417
+ fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
18418
+ fclose(file);
18419
+ return NULL;
18420
+ }
18417
18421
}
18418
18422
18419
18423
bool ok = true;
18420
18424
18421
18425
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
18422
18426
18423
- ctx->header.magic = magic;
18424
- ctx->header.kv = NULL;
18427
+ // read the header
18428
+ {
18429
+ ctx->header.magic = magic;
18430
+ ctx->header.kv = NULL;
18425
18431
18426
- ctx->infos = NULL;
18427
- ctx->data = NULL;
18432
+ ctx->infos = NULL;
18433
+ ctx->data = NULL;
18428
18434
18429
- ok = ok && gguf_fread_el(&ctx->header.version, sizeof(ctx->header.version), file, &offset);
18430
- ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
18431
- ok = ok && gguf_fread_el(&ctx->header.n_kv, sizeof(ctx->header.n_kv), file, &offset);
18435
+ ok = ok && gguf_fread_el(&ctx->header.version, sizeof(ctx->header.version), file, &offset);
18436
+ ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
18437
+ ok = ok && gguf_fread_el(&ctx->header.n_kv, sizeof(ctx->header.n_kv), file, &offset);
18432
18438
18433
- if (!ok) {
18434
- fprintf(stderr, "%s: failed to read header\n", __func__);
18435
- fclose(file);
18436
- gguf_free(ctx);
18437
- return NULL;
18439
+ if (!ok) {
18440
+ fprintf(stderr, "%s: failed to read header\n", __func__);
18441
+ fclose(file);
18442
+ gguf_free(ctx);
18443
+ return NULL;
18444
+ }
18438
18445
}
18439
18446
18440
- ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
18441
-
18442
- for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
18443
- struct gguf_kv * kv = &ctx->header.kv[i];
18444
-
18445
- //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
18447
+ // read the kv pairs
18448
+ {
18449
+ ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
18446
18450
18447
- ok = ok && gguf_fread_str(&kv->key, file, &offset);
18448
- //ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
18449
- ok = ok && gguf_fread_el (&kv->type, sizeof(kv->type), file, &offset);
18451
+ for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
18452
+ struct gguf_kv * kv = &ctx->header.kv[i];
18450
18453
18451
- //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
18454
+ //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
18455
+
18456
+ ok = ok && gguf_fread_str(&kv->key, file, &offset);
18457
+ //ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
18458
+ ok = ok && gguf_fread_el (&kv->type, sizeof(kv->type), file, &offset);
18459
+
18460
+ //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
18461
+
18462
+ switch (kv->type) {
18463
+ case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (&kv->value.uint8, sizeof(kv->value.uint8), file, &offset); break;
18464
+ case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (&kv->value.int8, sizeof(kv->value.int8), file, &offset); break;
18465
+ case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (&kv->value.uint16, sizeof(kv->value.uint16), file, &offset); break;
18466
+ case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (&kv->value.int16, sizeof(kv->value.int16), file, &offset); break;
18467
+ case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (&kv->value.uint32, sizeof(kv->value.uint32), file, &offset); break;
18468
+ case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (&kv->value.int32, sizeof(kv->value.int32), file, &offset); break;
18469
+ case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
18470
+ case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
18471
+ case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
18472
+ case GGUF_TYPE_ARRAY:
18473
+ GGML_ASSERT("gguf: array type not implemented");
18474
+ break;
18475
+ };
18452
18476
18453
- switch (kv->type) {
18454
- case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (&kv->value.uint8, sizeof(kv->value.uint8), file, &offset); break;
18455
- case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (&kv->value.int8, sizeof(kv->value.int8), file, &offset); break;
18456
- case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (&kv->value.uint16, sizeof(kv->value.uint16), file, &offset); break;
18457
- case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (&kv->value.int16, sizeof(kv->value.int16), file, &offset); break;
18458
- case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (&kv->value.uint32, sizeof(kv->value.uint32), file, &offset); break;
18459
- case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (&kv->value.int32, sizeof(kv->value.int32), file, &offset); break;
18460
- case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
18461
- case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
18462
- case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
18463
- case GGUF_TYPE_ARRAY:
18464
- GGML_ASSERT("gguf: array type not implemented");
18465
- break;
18466
- };
18477
+ if (!ok) {
18478
+ break;
18479
+ }
18480
+ }
18467
18481
18468
18482
if (!ok) {
18469
- break;
18483
+ fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
18484
+ fclose(file);
18485
+ gguf_free(ctx);
18486
+ return NULL;
18470
18487
}
18471
18488
}
18472
18489
18473
- if (!ok) {
18474
- fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
18475
- fclose(file);
18476
- gguf_free(ctx);
18477
- return NULL;
18478
- }
18479
-
18480
- ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
18490
+ // read the tensor infos
18491
+ {
18492
+ ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
18481
18493
18482
- for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
18483
- struct gguf_tensor_info * info = &ctx->infos[i];
18494
+ for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
18495
+ struct gguf_tensor_info * info = &ctx->infos[i];
18484
18496
18485
- for (int j = 0; j < GGML_MAX_DIMS; ++j) {
18486
- info->ne[j] = 1;
18487
- }
18497
+ for (int j = 0; j < GGML_MAX_DIMS; ++j) {
18498
+ info->ne[j] = 1;
18499
+ }
18488
18500
18489
- ok = ok && gguf_fread_str(&info->name, file, &offset);
18490
- ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims), file, &offset);
18491
- for (uint32_t j = 0; j < info->n_dims; ++j) {
18492
- ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
18493
- }
18494
- //ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms), file, &offset);
18495
- ok = ok && gguf_fread_el (&info->type, sizeof(info->type), file, &offset);
18496
- ok = ok && gguf_fread_el (&info->offset, sizeof(info->offset), file, &offset);
18501
+ ok = ok && gguf_fread_str(&info->name, file, &offset);
18502
+ ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims), file, &offset);
18503
+ for (uint32_t j = 0; j < info->n_dims; ++j) {
18504
+ ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
18505
+ }
18506
+ //ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms), file, &offset);
18507
+ ok = ok && gguf_fread_el (&info->type, sizeof(info->type), file, &offset);
18508
+ ok = ok && gguf_fread_el (&info->offset, sizeof(info->offset), file, &offset);
18497
18509
18498
- if (!ok) {
18499
- fprintf(stderr, "%s: failed to read tensor info\n", __func__);
18500
- fclose(file);
18501
- gguf_free(ctx);
18502
- return NULL;
18510
+ if (!ok) {
18511
+ fprintf(stderr, "%s: failed to read tensor info\n", __func__);
18512
+ fclose(file);
18513
+ gguf_free(ctx);
18514
+ return NULL;
18515
+ }
18503
18516
}
18504
18517
}
18505
18518
18506
18519
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
18507
18520
18508
18521
// TODO: determine new alignment from kv if available
18509
18522
18523
+ // we require the data section to be aligned, so take into account any padding
18510
18524
{
18511
18525
const size_t offset_pad = offset % ctx->alignment;
18512
18526
@@ -18516,38 +18530,46 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18516
18530
}
18517
18531
}
18518
18532
18533
+ // store the current file offset - this is where the data section starts
18519
18534
ctx->offset = offset;
18520
18535
18521
- ctx->size_data = 0;
18536
+ // compute the total size of the data section, taking into account the alignment
18537
+ {
18522
18538
18523
- for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
18524
- struct gguf_tensor_info * info = &ctx->infos[i];
18539
+ ctx->size_data = 0;
18540
+ for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
18541
+ struct gguf_tensor_info * info = &ctx->infos[i];
18525
18542
18526
- const int64_t ne =
18527
- (int64_t) info->ne[0] *
18528
- (int64_t) info->ne[1] *
18529
- (int64_t) info->ne[2] *
18530
- (int64_t) info->ne[3];
18543
+ const int64_t ne =
18544
+ (int64_t) info->ne[0] *
18545
+ (int64_t) info->ne[1] *
18546
+ (int64_t) info->ne[2] *
18547
+ (int64_t) info->ne[3];
18531
18548
18532
- if (ne % ggml_blck_size(info->type) != 0) {
18533
- fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n",
18534
- __func__, info->name.data, ne, ggml_blck_size(info->type));
18535
- fclose(file);
18536
- gguf_free(ctx);
18537
- return NULL;
18538
- }
18549
+ if (ne % ggml_blck_size(info->type) != 0) {
18550
+ fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n",
18551
+ __func__, info->name.data, ne, ggml_blck_size(info->type));
18552
+ fclose(file);
18553
+ gguf_free(ctx);
18554
+ return NULL;
18555
+ }
18539
18556
18540
- const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
18557
+ const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
18541
18558
18542
- ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
18559
+ ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
18560
+ }
18543
18561
}
18544
18562
18545
- // load the tensor data
18546
- // TODO: simplify
18563
+ // load the tensor data only if requested
18547
18564
if (params.ctx != NULL) {
18565
+ // if the provided ggml_context is no_alloc, then we create "empty" tensors and do not read the binary blob
18566
+ // otherwise, we load the binary blob into the provided ggml_context as well, and point the "data" members of
18567
+ // the ggml_tensor structs to the appropriate locations in the binary blob
18568
+
18569
+ // compute the exact size needed for the ggml_context
18548
18570
const size_t mem_size =
18549
18571
params.no_alloc ?
18550
- (ctx->header.n_tensors + 1 )*ggml_tensor_overhead() :
18572
+ (ctx->header.n_tensors )*ggml_tensor_overhead() :
18551
18573
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;
18552
18574
18553
18575
struct ggml_init_params pdata = {
@@ -18567,7 +18589,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18567
18589
18568
18590
ok = ok && data != NULL;
18569
18591
18570
- // read the tensor data
18592
+ // read the binary blob with the tensor data
18571
18593
ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);
18572
18594
18573
18595
if (!ok) {
@@ -18602,14 +18624,15 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18602
18624
break;
18603
18625
}
18604
18626
18627
+ // point the data member to the appropriate location in the binary blob using the tensor infos
18605
18628
if (params.no_alloc == false) {
18606
18629
//cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
18607
18630
cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data
18608
18631
}
18609
18632
}
18610
18633
18611
18634
if (!ok) {
18612
- fprintf(stderr, "%s: failed to create tensors \n", __func__);
18635
+ fprintf(stderr, "%s: failed to read the tensor data \n", __func__);
18613
18636
fclose(file);
18614
18637
ggml_free(ctx_data);
18615
18638
gguf_free(ctx);
@@ -18619,13 +18642,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18619
18642
ggml_set_no_alloc(ctx_data, params.no_alloc);
18620
18643
}
18621
18644
18622
- if (!ok) {
18623
- fprintf(stderr, "%s: failed to read tensor data\n", __func__);
18624
- fclose(file);
18625
- gguf_free(ctx);
18626
- return NULL;
18627
- }
18628
-
18629
18645
return ctx;
18630
18646
}
18631
18647
0 commit comments