Skip to content

Commit 8425001

Browse files
authored
gguf-split: add --no-tensor-first-split (#7072)
1 parent cf768b7 commit 8425001

File tree

3 files changed

+23
-12
lines changed

3 files changed

+23
-12
lines changed

Diff for: examples/gguf-split/gguf-split.cpp

+15-4
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ struct split_params {
3232
int n_split_tensors = 128;
3333
std::string input;
3434
std::string output;
35+
bool no_tensor_first_split = false;
3536
bool dry_run = false;
3637
};
3738

@@ -49,6 +50,7 @@ static void split_print_usage(const char * executable) {
4950
printf(" --merge merge multiple GGUF to a single GGUF\n");
5051
printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
5152
printf(" --split-max-size N(M|G) max size per split\n");
53+
printf(" --no-tensor-first-split do not add tensors to the first split (disabled by default)\n");
5254
printf(" --dry-run only print out a split plan and exit, without writing any new files\n");
5355
printf("\n");
5456
}
@@ -100,6 +102,10 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
100102
arg_found = true;
101103
params.dry_run = true;
102104
}
105+
if (arg == "--no-tensor-first-split") {
106+
arg_found = true;
107+
params.no_tensor_first_split = true;
108+
}
103109

104110
if (is_op_set) {
105111
throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
@@ -200,10 +206,10 @@ struct split_strategy {
200206
// because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
201207
int i_split = -1;
202208
struct gguf_context * ctx_out = NULL;
203-
auto new_ctx_out = [&]() {
209+
auto new_ctx_out = [&](bool allow_no_tensors) {
204210
i_split++;
205211
if (ctx_out != NULL) {
206-
if (gguf_get_n_tensors(ctx_out) == 0) {
212+
if (gguf_get_n_tensors(ctx_out) == 0 && !allow_no_tensors) {
207213
fprintf(stderr, "error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n");
208214
exit(EXIT_FAILURE);
209215
}
@@ -220,7 +226,12 @@ struct split_strategy {
220226
};
221227

222228
// initialize ctx_out for the first split
223-
new_ctx_out();
229+
new_ctx_out(false);
230+
231+
// skip first split if no_tensor_first_split is set
232+
if (params.no_tensor_first_split) {
233+
new_ctx_out(true);
234+
}
224235

225236
// process tensors one by one
226237
size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
@@ -230,7 +241,7 @@ struct split_strategy {
230241
size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
231242
size_t next_tensors_size = curr_tensors_size + n_bytes;
232243
if (should_split(i, next_tensors_size)) {
233-
new_ctx_out();
244+
new_ctx_out(false);
234245
curr_tensors_size = n_bytes;
235246
} else {
236247
curr_tensors_size = next_tensors_size;

Diff for: examples/gguf-split/tests.sh

+7-7
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,15 @@ $MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32
5555
echo PASS
5656
echo
5757

58-
# 4. Split with no tensor in metadata
59-
#$SPLIT --split-max-tensors 32 --no-tensor-in-metadata $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors
60-
#echo PASS
61-
#echo
58+
# 4. Split with no tensors in the first split
59+
$SPLIT --split-max-tensors 32 --no-tensor-first-split $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors
60+
echo PASS
61+
echo
6262

6363
# 4b. Test the sharded model is loading properly
64-
#$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf --random-prompt --n-predict 32
65-
#echo PASS
66-
#echo
64+
$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --random-prompt --n-predict 32
65+
echo PASS
66+
echo
6767

6868
# 5. Merge
6969
#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf

Diff for: ggml.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -21139,7 +21139,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
2113921139
}
2114021140

2114121141
// read the tensor infos
21142-
{
21142+
if (ctx->header.n_tensors > 0) {
2114321143
ctx->infos = GGML_CALLOC(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
2114421144

2114521145
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {

0 commit comments

Comments
 (0)