gguf : add comments

ggerganov · ggerganov · commit ef482eb147b7 · 2023-07-26T22:59:38.000+03:00
diff --git a/ggml.c b/ggml.c
@@ -18407,106 +18407,120 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
     // offset from start of file
     size_t offset = 0;
 
-    // check the magic before making allocations
     uint32_t magic = 0;
-    gguf_fread_el(&magic, sizeof(magic), file, &offset);
-    if (magic != GGUF_MAGIC) {
-        fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
-        fclose(file);
-        return NULL;
+
+    // check the magic before making allocations
+    {
+        gguf_fread_el(&magic, sizeof(magic), file, &offset);
+
+        if (magic != GGUF_MAGIC) {
+            fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
+            fclose(file);
+            return NULL;
+        }
     }
 
     bool ok = true;
 
     struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
 
-    ctx->header.magic = magic;
-    ctx->header.kv    = NULL;
+    // read the header
+    {
+        ctx->header.magic = magic;
+        ctx->header.kv    = NULL;
 
-    ctx->infos = NULL;
-    ctx->data  = NULL;
+        ctx->infos = NULL;
+        ctx->data  = NULL;
 
-    ok = ok && gguf_fread_el(&ctx->header.version,   sizeof(ctx->header.version),   file, &offset);
-    ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
-    ok = ok && gguf_fread_el(&ctx->header.n_kv,      sizeof(ctx->header.n_kv),      file, &offset);
+        ok = ok && gguf_fread_el(&ctx->header.version,   sizeof(ctx->header.version),   file, &offset);
+        ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
+        ok = ok && gguf_fread_el(&ctx->header.n_kv,      sizeof(ctx->header.n_kv),      file, &offset);
 
-    if (!ok) {
-        fprintf(stderr, "%s: failed to read header\n", __func__);
-        fclose(file);
-        gguf_free(ctx);
-        return NULL;
+        if (!ok) {
+            fprintf(stderr, "%s: failed to read header\n", __func__);
+            fclose(file);
+            gguf_free(ctx);
+            return NULL;
+        }
     }
 
-    ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
-
-    for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
-        struct gguf_kv * kv = &ctx->header.kv[i];
-
-        //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
+    // read the kv pairs
+    {
+        ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
 
-        ok = ok && gguf_fread_str(&kv->key,                          file, &offset);
-      //ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
-        ok = ok && gguf_fread_el (&kv->type,    sizeof(kv->type),    file, &offset);
+        for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
+            struct gguf_kv * kv = &ctx->header.kv[i];
 
-        //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
+            //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
+
+            ok = ok && gguf_fread_str(&kv->key,                          file, &offset);
+          //ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
+            ok = ok && gguf_fread_el (&kv->type,    sizeof(kv->type),    file, &offset);
+
+            //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
+
+            switch (kv->type) {
+                case GGUF_TYPE_UINT8:   ok = ok && gguf_fread_el (&kv->value.uint8,   sizeof(kv->value.uint8),   file, &offset); break;
+                case GGUF_TYPE_INT8:    ok = ok && gguf_fread_el (&kv->value.int8,    sizeof(kv->value.int8),    file, &offset); break;
+                case GGUF_TYPE_UINT16:  ok = ok && gguf_fread_el (&kv->value.uint16,  sizeof(kv->value.uint16),  file, &offset); break;
+                case GGUF_TYPE_INT16:   ok = ok && gguf_fread_el (&kv->value.int16,   sizeof(kv->value.int16),   file, &offset); break;
+                case GGUF_TYPE_UINT32:  ok = ok && gguf_fread_el (&kv->value.uint32,  sizeof(kv->value.uint32),  file, &offset); break;
+                case GGUF_TYPE_INT32:   ok = ok && gguf_fread_el (&kv->value.int32,   sizeof(kv->value.int32),   file, &offset); break;
+                case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
+                case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (&kv->value.bool_,   sizeof(kv->value.bool_),   file, &offset); break;
+                case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(&kv->value.str,                                file, &offset); break;
+                case GGUF_TYPE_ARRAY:
+                                        GGML_ASSERT("gguf: array type not implemented");
+                                        break;
+            };
 
-        switch (kv->type) {
-            case GGUF_TYPE_UINT8:   ok = ok && gguf_fread_el (&kv->value.uint8,   sizeof(kv->value.uint8),   file, &offset); break;
-            case GGUF_TYPE_INT8:    ok = ok && gguf_fread_el (&kv->value.int8,    sizeof(kv->value.int8),    file, &offset); break;
-            case GGUF_TYPE_UINT16:  ok = ok && gguf_fread_el (&kv->value.uint16,  sizeof(kv->value.uint16),  file, &offset); break;
-            case GGUF_TYPE_INT16:   ok = ok && gguf_fread_el (&kv->value.int16,   sizeof(kv->value.int16),   file, &offset); break;
-            case GGUF_TYPE_UINT32:  ok = ok && gguf_fread_el (&kv->value.uint32,  sizeof(kv->value.uint32),  file, &offset); break;
-            case GGUF_TYPE_INT32:   ok = ok && gguf_fread_el (&kv->value.int32,   sizeof(kv->value.int32),   file, &offset); break;
-            case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
-            case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (&kv->value.bool_,   sizeof(kv->value.bool_),   file, &offset); break;
-            case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(&kv->value.str,                                file, &offset); break;
-            case GGUF_TYPE_ARRAY:
-                                    GGML_ASSERT("gguf: array type not implemented");
-                                    break;
-        };
+            if (!ok) {
+                break;
+            }
+        }
 
         if (!ok) {
-            break;
+            fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
+            fclose(file);
+            gguf_free(ctx);
+            return NULL;
         }
     }
 
-    if (!ok) {
-        fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
-        fclose(file);
-        gguf_free(ctx);
-        return NULL;
-    }
-
-    ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
+    // read the tensor infos
+    {
+        ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
 
-    for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
-        struct gguf_tensor_info * info = &ctx->infos[i];
+        for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
+            struct gguf_tensor_info * info = &ctx->infos[i];
 
-        for (int j = 0; j < GGML_MAX_DIMS; ++j) {
-            info->ne[j] = 1;
-        }
+            for (int j = 0; j < GGML_MAX_DIMS; ++j) {
+                info->ne[j] = 1;
+            }
 
-        ok = ok && gguf_fread_str(&info->name,                          file, &offset);
-        ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims),  file, &offset);
-        for (uint32_t j = 0; j < info->n_dims; ++j) {
-            ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
-        }
-      //ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms),  file, &offset);
-        ok = ok && gguf_fread_el (&info->type,    sizeof(info->type),   file, &offset);
-        ok = ok && gguf_fread_el (&info->offset,  sizeof(info->offset), file, &offset);
+            ok = ok && gguf_fread_str(&info->name,                          file, &offset);
+            ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims),  file, &offset);
+            for (uint32_t j = 0; j < info->n_dims; ++j) {
+                ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
+            }
+          //ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms),  file, &offset);
+            ok = ok && gguf_fread_el (&info->type,   sizeof(info->type),    file, &offset);
+            ok = ok && gguf_fread_el (&info->offset, sizeof(info->offset),  file, &offset);
 
-        if (!ok) {
-            fprintf(stderr, "%s: failed to read tensor info\n", __func__);
-            fclose(file);
-            gguf_free(ctx);
-            return NULL;
+            if (!ok) {
+                fprintf(stderr, "%s: failed to read tensor info\n", __func__);
+                fclose(file);
+                gguf_free(ctx);
+                return NULL;
+            }
         }
     }
 
     ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
 
     // TODO: determine new alignment from kv if available
 
+    // we require the data section to be aligned, so take into account any padding
     {
         const size_t offset_pad = offset % ctx->alignment;
 
@@ -18516,38 +18530,46 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
         }
     }
 
+    // store the current file offset - this is where the data section starts
     ctx->offset = offset;
 
-    ctx->size_data = 0;
+    // compute the total size of the data section, taking into account the alignment
+    {
 
-    for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
-        struct gguf_tensor_info * info = &ctx->infos[i];
+        ctx->size_data = 0;
+        for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
+            struct gguf_tensor_info * info = &ctx->infos[i];
 
-        const int64_t ne =
-            (int64_t) info->ne[0] *
-            (int64_t) info->ne[1] *
-            (int64_t) info->ne[2] *
-            (int64_t) info->ne[3];
+            const int64_t ne =
+                (int64_t) info->ne[0] *
+                (int64_t) info->ne[1] *
+                (int64_t) info->ne[2] *
+                (int64_t) info->ne[3];
 
-        if (ne % ggml_blck_size(info->type) != 0) {
-            fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n",
-                    __func__, info->name.data, ne, ggml_blck_size(info->type));
-            fclose(file);
-            gguf_free(ctx);
-            return NULL;
-        }
+            if (ne % ggml_blck_size(info->type) != 0) {
+                fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n",
+                        __func__, info->name.data, ne, ggml_blck_size(info->type));
+                fclose(file);
+                gguf_free(ctx);
+                return NULL;
+            }
 
-        const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
+            const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
 
-        ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
+            ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
+        }
     }
 
-    // load the tensor data
-    // TODO: simplify
+    // load the tensor data only if requested
     if (params.ctx != NULL) {
+        // if the provided ggml_context is no_alloc, then we create "empty" tensors and do not read the binary blob
+        // otherwise, we load the binary blob into the provided ggml_context as well, and point the "data" members of
+        // the ggml_tensor structs to the appropriate locations in the binary blob
+
+        // compute the exact size needed for the ggml_context
         const size_t mem_size =
             params.no_alloc ?
-            (ctx->header.n_tensors + 1)*ggml_tensor_overhead() :
+            (ctx->header.n_tensors    )*ggml_tensor_overhead() :
             (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;
 
         struct ggml_init_params pdata = {
@@ -18567,7 +18589,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 
             ok = ok && data != NULL;
 
-            // read the tensor data
+            // read the binary blob with the tensor data
             ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);
 
             if (!ok) {
@@ -18602,14 +18624,15 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                 break;
             }
 
+            // point the data member to the appropriate location in the binary blob using the tensor infos
             if (params.no_alloc == false) {
               //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
                 cur->data = (char *) data->data + ctx->infos[i].offset;               // offset from data
             }
         }
 
         if (!ok) {
-            fprintf(stderr, "%s: failed to create tensors\n", __func__);
+            fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
             fclose(file);
             ggml_free(ctx_data);
             gguf_free(ctx);
@@ -18619,13 +18642,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
         ggml_set_no_alloc(ctx_data, params.no_alloc);
     }
 
-    if (!ok) {
-        fprintf(stderr, "%s: failed to read tensor data\n", __func__);
-        fclose(file);
-        gguf_free(ctx);
-        return NULL;
-    }
-
     return ctx;
 }