@@ -32,6 +32,7 @@ struct split_params {
32
32
int n_split_tensors = 128 ;
33
33
std::string input;
34
34
std::string output;
35
+ bool no_tensor_first_split = false ;
35
36
bool dry_run = false ;
36
37
};
37
38
@@ -49,6 +50,7 @@ static void split_print_usage(const char * executable) {
49
50
printf (" --merge merge multiple GGUF to a single GGUF\n " );
50
51
printf (" --split-max-tensors max tensors in each split (default: %d)\n " , default_params.n_split_tensors );
51
52
printf (" --split-max-size N(M|G) max size per split\n " );
53
+ printf (" --no-tensor-first-split do not add tensors to the first split (disabled by default)\n " );
52
54
printf (" --dry-run only print out a split plan and exit, without writing any new files\n " );
53
55
printf (" \n " );
54
56
}
@@ -100,6 +102,10 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
100
102
arg_found = true ;
101
103
params.dry_run = true ;
102
104
}
105
+ if (arg == " --no-tensor-first-split" ) {
106
+ arg_found = true ;
107
+ params.no_tensor_first_split = true ;
108
+ }
103
109
104
110
if (is_op_set) {
105
111
throw std::invalid_argument (" error: either --split or --merge can be specified, but not both" );
@@ -200,10 +206,10 @@ struct split_strategy {
200
206
// because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
201
207
int i_split = -1 ;
202
208
struct gguf_context * ctx_out = NULL ;
203
- auto new_ctx_out = [&]() {
209
+ auto new_ctx_out = [&](bool allow_no_tensors ) {
204
210
i_split++;
205
211
if (ctx_out != NULL ) {
206
- if (gguf_get_n_tensors (ctx_out) == 0 ) {
212
+ if (gguf_get_n_tensors (ctx_out) == 0 && !allow_no_tensors ) {
207
213
fprintf (stderr, " error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n " );
208
214
exit (EXIT_FAILURE);
209
215
}
@@ -220,7 +226,12 @@ struct split_strategy {
220
226
};
221
227
222
228
// initialize ctx_out for the first split
223
- new_ctx_out ();
229
+ new_ctx_out (false );
230
+
231
+ // skip first split if no_tensor_first_split is set
232
+ if (params.no_tensor_first_split ) {
233
+ new_ctx_out (true );
234
+ }
224
235
225
236
// process tensors one by one
226
237
size_t curr_tensors_size = 0 ; // current size by counting only tensors size (without metadata)
@@ -230,7 +241,7 @@ struct split_strategy {
230
241
size_t n_bytes = GGML_PAD (ggml_nbytes (t), GGUF_DEFAULT_ALIGNMENT);
231
242
size_t next_tensors_size = curr_tensors_size + n_bytes;
232
243
if (should_split (i, next_tensors_size)) {
233
- new_ctx_out ();
244
+ new_ctx_out (false );
234
245
curr_tensors_size = n_bytes;
235
246
} else {
236
247
curr_tensors_size = next_tensors_size;
0 commit comments