Skip to content

Commit 8d8ff71

Browse files
authored
llama : remove Tail-Free sampling (#10071)
ggml-ci
1 parent 61715d5 commit 8d8ff71

File tree

16 files changed

+15
-172
lines changed

16 files changed

+15
-172
lines changed

common/arg.cpp

+1-8
Original file line numberDiff line numberDiff line change
@@ -943,13 +943,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
943943
params.sparams.min_p = std::stof(value);
944944
}
945945
).set_sparam());
946-
add_opt(common_arg(
947-
{"--tfs"}, "N",
948-
string_format("tail free sampling, parameter z (default: %.1f, 1.0 = disabled)", (double)params.sparams.tfs_z),
949-
[](common_params & params, const std::string & value) {
950-
params.sparams.tfs_z = std::stof(value);
951-
}
952-
).set_sparam());
953946
add_opt(common_arg(
954947
{"--xtc-probability"}, "N",
955948
string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sparams.xtc_probability),
@@ -1074,7 +1067,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
10741067
).set_sparam());
10751068
add_opt(common_arg(
10761069
{"--mirostat"}, "N",
1077-
string_format("use Mirostat sampling.\nTop K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"
1070+
string_format("use Mirostat sampling.\nTop K, Nucleus and Locally Typical samplers are ignored if used.\n"
10781071
"(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sparams.mirostat),
10791072
[](common_params & params, int value) {
10801073
params.sparams.mirostat = value;

common/common.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -2090,7 +2090,6 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
20902090
const std::vector<float> tensor_split_vector(params.tensor_split, params.tensor_split + llama_max_devices());
20912091
yaml_dump_vector_float(stream, "tensor_split", tensor_split_vector);
20922092

2093-
fprintf(stream, "tfs: %f # default: 1.0\n", sparams.tfs_z);
20942093
fprintf(stream, "threads: %d # default: %u\n", params.cpuparams.n_threads, std::thread::hardware_concurrency());
20952094
fprintf(stream, "top_k: %d # default: 40\n", sparams.top_k);
20962095
fprintf(stream, "top_p: %f # default: 0.95\n", sparams.top_p);

common/common.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ enum common_sampler_type {
8888
COMMON_SAMPLER_TYPE_TOP_K = 2,
8989
COMMON_SAMPLER_TYPE_TOP_P = 3,
9090
COMMON_SAMPLER_TYPE_MIN_P = 4,
91-
COMMON_SAMPLER_TYPE_TFS_Z = 5,
91+
//COMMON_SAMPLER_TYPE_TFS_Z = 5,
9292
COMMON_SAMPLER_TYPE_TYPICAL_P = 6,
9393
COMMON_SAMPLER_TYPE_TEMPERATURE = 7,
9494
COMMON_SAMPLER_TYPE_XTC = 8,
@@ -113,7 +113,6 @@ struct common_sampler_params {
113113
float min_p = 0.05f; // 0.0 = disabled
114114
float xtc_probability = 0.00f; // 0.0 = disabled
115115
float xtc_threshold = 0.10f; // > 0.5 disables XTC
116-
float tfs_z = 1.00f; // 1.0 = disabled
117116
float typ_p = 1.00f; // typical_p, 1.0 = disabled
118117
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
119118
float dynatemp_range = 0.00f; // 0.0 = disabled
@@ -139,7 +138,6 @@ struct common_sampler_params {
139138
std::vector<enum common_sampler_type> samplers = {
140139
COMMON_SAMPLER_TYPE_DRY,
141140
COMMON_SAMPLER_TYPE_TOP_K,
142-
COMMON_SAMPLER_TYPE_TFS_Z,
143141
COMMON_SAMPLER_TYPE_TYPICAL_P,
144142
COMMON_SAMPLER_TYPE_TOP_P,
145143
COMMON_SAMPLER_TYPE_MIN_P,

common/sampling.cpp

+2-11
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,11 @@ std::string common_sampler_params::print() const {
131131
snprintf(result, sizeof(result),
132132
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
133133
"\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
134-
"\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
134+
"\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
135135
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
136136
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
137137
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
138-
top_k, tfs_z, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
138+
top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
139139
mirostat, mirostat_eta, mirostat_tau);
140140

141141
return std::string(result);
@@ -199,9 +199,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
199199
case COMMON_SAMPLER_TYPE_XTC:
200200
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
201201
break;
202-
case COMMON_SAMPLER_TYPE_TFS_Z:
203-
llama_sampler_chain_add(result->chain, llama_sampler_init_tail_free(params.tfs_z, params.min_keep));
204-
break;
205202
case COMMON_SAMPLER_TYPE_TYPICAL_P:
206203
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
207204
break;
@@ -373,7 +370,6 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
373370
switch (cnstr) {
374371
case COMMON_SAMPLER_TYPE_DRY: return 'd';
375372
case COMMON_SAMPLER_TYPE_TOP_K: return 'k';
376-
case COMMON_SAMPLER_TYPE_TFS_Z: return 'f';
377373
case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y';
378374
case COMMON_SAMPLER_TYPE_TOP_P: return 'p';
379375
case COMMON_SAMPLER_TYPE_MIN_P: return 'm';
@@ -388,7 +384,6 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
388384
switch (cnstr) {
389385
case COMMON_SAMPLER_TYPE_DRY: return "dry";
390386
case COMMON_SAMPLER_TYPE_TOP_K: return "top_k";
391-
case COMMON_SAMPLER_TYPE_TFS_Z: return "tfs_z";
392387
case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
393388
case COMMON_SAMPLER_TYPE_TOP_P: return "top_p";
394389
case COMMON_SAMPLER_TYPE_MIN_P: return "min_p";
@@ -406,7 +401,6 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
406401
{ "top_p", COMMON_SAMPLER_TYPE_TOP_P },
407402
{ "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P },
408403
{ "min_p", COMMON_SAMPLER_TYPE_MIN_P },
409-
{ "tfs_z", COMMON_SAMPLER_TYPE_TFS_Z },
410404
{ "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
411405
{ "xtc", COMMON_SAMPLER_TYPE_XTC },
412406
{ "infill", COMMON_SAMPLER_TYPE_INFILL },
@@ -423,8 +417,6 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
423417
{ "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
424418
{ "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
425419
{ "min-p", COMMON_SAMPLER_TYPE_MIN_P },
426-
{ "tfs-z", COMMON_SAMPLER_TYPE_TFS_Z },
427-
{ "tfs", COMMON_SAMPLER_TYPE_TFS_Z },
428420
{ "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
429421
};
430422

@@ -452,7 +444,6 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
452444
std::unordered_map<char, common_sampler_type> sampler_name_map = {
453445
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_DRY), COMMON_SAMPLER_TYPE_DRY },
454446
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_K), COMMON_SAMPLER_TYPE_TOP_K },
455-
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TFS_Z), COMMON_SAMPLER_TYPE_TFS_Z },
456447
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P },
457448
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P },
458449
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P },

examples/main/README.md

-8
Original file line numberDiff line numberDiff line change
@@ -235,14 +235,6 @@ The Min-P sampling method was designed as an alternative to Top-P, and aims to e
235235

236236
Example usage: `--min-p 0.05`
237237

238-
### Tail-Free Sampling (TFS)
239-
240-
- `--tfs N`: Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled).
241-
242-
Tail-free sampling (TFS) is a text generation technique that aims to reduce the impact of less likely tokens, which may be less relevant, less coherent, or nonsensical, on the output. Similar to Top-P it tries to determine the bulk of the most likely tokens dynamically. But TFS filters out logits based on the second derivative of their probabilities. Adding tokens is stopped after the sum of the second derivatives reaches the parameter z. In short: TFS looks at how quickly the probabilities of the tokens decrease and cuts off the tail of unlikely tokens using the parameter z. Typical values for z are in the range of 0.9 to 0.95. A value of 1.0 would include all tokens and thus disables the effect of TFS.
243-
244-
Example usage: `--tfs 0.95`
245-
246238
### Locally Typical Sampling
247239

248240
- `--typical N`: Enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled).

examples/server/README.md

+3-8
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ The project is under active development, and we are [looking for feedback and co
9999

100100
| Argument | Explanation |
101101
| -------- | ----------- |
102-
| `--samplers SAMPLERS` | samplers that will be used for generation in the order, separated by ';'<br/>(default: top_k;tfs_z;typ_p;top_p;min_p;temperature) |
102+
| `--samplers SAMPLERS` | samplers that will be used for generation in the order, separated by ';'<br/>(default: top_k;typ_p;top_p;min_p;temperature) |
103103
| `-s, --seed SEED` | RNG seed (default: -1, use random seed for -1) |
104104
| `--sampling-seq SEQUENCE` | simplified sequence for samplers that will be used (default: kfypmt) |
105105
| `--ignore-eos` | ignore end of stream token and continue generating (implies --logit-bias EOS-inf) |
@@ -108,7 +108,6 @@ The project is under active development, and we are [looking for feedback and co
108108
| `--top-k N` | top-k sampling (default: 40, 0 = disabled) |
109109
| `--top-p N` | top-p sampling (default: 0.9, 1.0 = disabled) |
110110
| `--min-p N` | min-p sampling (default: 0.1, 0.0 = disabled) |
111-
| `--tfs N` | tail free sampling, parameter z (default: 1.0, 1.0 = disabled) |
112111
| `--typical N` | locally typical sampling, parameter p (default: 1.0, 1.0 = disabled) |
113112
| `--repeat-last-n N` | last n tokens to consider for penalize (default: 64, 0 = disabled, -1 = ctx_size) |
114113
| `--repeat-penalty N` | penalize repeat sequence of tokens (default: 1.0, 1.0 = disabled) |
@@ -121,7 +120,7 @@ The project is under active development, and we are [looking for feedback and co
121120
| `--dry-sequence-breaker STRING` | add sequence breaker for DRY sampling, clearing out default breakers (`['\n', ':', '"', '*']`) in the process; use `"none"` to not use any sequence breakers
122121
| `--dynatemp-range N` | dynamic temperature range (default: 0.0, 0.0 = disabled) |
123122
| `--dynatemp-exp N` | dynamic temperature exponent (default: 1.0) |
124-
| `--mirostat N` | use Mirostat sampling.<br/>Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.<br/>(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) |
123+
| `--mirostat N` | use Mirostat sampling.<br/>Top K, Nucleus and Locally Typical samplers are ignored if used.<br/>(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) |
125124
| `--mirostat-lr N` | Mirostat learning rate, parameter eta (default: 0.1) |
126125
| `--mirostat-ent N` | Mirostat target entropy, parameter tau (default: 5.0) |
127126
| `-l, --logit-bias TOKEN_ID(+/-)BIAS` | modifies the likelihood of token appearing in the completion,<br/>i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',<br/>or `--logit-bias 15043-1` to decrease likelihood of token ' Hello' |
@@ -360,8 +359,6 @@ node index.js
360359
`stop`: Specify a JSON array of stopping strings.
361360
These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`
362361

363-
`tfs_z`: Enable tail free sampling with parameter z. Default: `1.0`, which is disabled.
364-
365362
`typical_p`: Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.
366363

367364
`repeat_penalty`: Control the repetition of token sequences in the generated text. Default: `1.1`
@@ -412,7 +409,7 @@ node index.js
412409

413410
`cache_prompt`: Re-use KV cache from a previous request if possible. This way the common prefix does not have to be re-processed, only the suffix that differs between the requests. Because (depending on the backend) the logits are **not** guaranteed to be bit-for-bit identical for different batch sizes (prompt processing vs. token generation) enabling this option can cause nondeterministic results. Default: `false`
414411

415-
`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. Default: `["top_k", "tfs_z", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values.
412+
`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. Default: `["top_k", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values.
416413

417414
**Response format**
418415

@@ -738,7 +735,6 @@ Example:
738735
"repeat_penalty": 1.100000023841858,
739736
"samplers": [
740737
"top_k",
741-
"tfs_z",
742738
"typical_p",
743739
"top_p",
744740
"min_p",
@@ -752,7 +748,6 @@ Example:
752748
"stream": false,
753749
"task_id": 0,
754750
"temperature": 0.0,
755-
"tfs_z": 1.0,
756751
"top_k": 40,
757752
"top_p": 0.949999988079071,
758753
"typical_p": 1.0

examples/server/public/index-new.html

-3
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
min_p: 0.05, // 0 = disabled; recommended for non-english: ~ 0.4
5050
xtc_probability: 0.0, // 0 = disabled;
5151
xtc_threshold: 0.1, // > 0.5 disables XTC;
52-
tfs_z: 1.0, // 1.0 = disabled
5352
typical_p: 1.0, // 1.0 = disabled
5453
presence_penalty: 0.0, // 0.0 = disabled
5554
frequency_penalty: 0.0, // 0.0 = disabled
@@ -847,7 +846,6 @@
847846
${FloatField({ label: "DRY Base", title: "Set the DRY repetition penalty base value. Default is 1.75", max: 3.0, min: 1.0, name: "dry_base", step: 0.01, value: params.value.dry_base })}
848847
${IntField({ label: "DRY Allowed Length", title: "Tokens that extend repetition beyond this receive exponentially increasing penalty. Default is 2", max: 10, min: 1, step: 1, name: "dry_allowed_length", value: params.value.dry_allowed_length })}
849848
${IntField({ label: "DRY Penalty Last N", title: "How many tokens to scan for repetitions. Default is -1, where 0 is disabled and -1 is context size", max: 2048, min: -1, step: 16, name: "dry_penalty_last_n", value: params.value.dry_penalty_last_n })}
850-
${FloatField({ label: "TFS-Z", title: "Activates tail-free sampling, a method used to limit the prediction of tokens that are too frequent. The parameter z controls the strength of this limitation. A value of 1.0 means that this function is deactivated.", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
851849
${IntField({ label: "Min Keep", title: "If greater than 0, samplers are forced to return N possible tokens at minimum. Default is 0", max: 10, min: 0, name: "min_keep", value: params.value.min_keep })}
852850
</fieldset>
853851
@@ -1147,7 +1145,6 @@ <h2>llama.cpp</h2>
11471145
xtc_probability: { snapValue: 0.0, snapRangeMultiplier: 4 },
11481146
xtc_threshold: { snapValue: 0.5, snapRangeMultiplier: 4 },
11491147
top_p: { snapValue: 1.0, snapRangeMultiplier: 4 },
1150-
tfs_z: { snapValue: 1.0, snapRangeMultiplier: 4 },
11511148
typical_p: { snapValue: 1.0, snapRangeMultiplier: 4 },
11521149
repeat_penalty: { snapValue: 1.0, snapRangeMultiplier: 4 },
11531150
presence_penalty: { snapValue: 0.0, snapRangeMultiplier: 4 },

examples/server/public/index.html

-2
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,6 @@
313313
min_p: 0.05, // 0 = disabled
314314
xtc_probability: 0.0, // 0 = disabled;
315315
xtc_threshold: 0.1, // > 0.5 disables XTC;
316-
tfs_z: 1.0, // 1.0 = disabled
317316
typical_p: 1.0, // 1.0 = disabled
318317
presence_penalty: 0.0, // 0.0 = disabled
319318
frequency_penalty: 0.0, // 0.0 = disabled
@@ -1015,7 +1014,6 @@
10151014
<details>
10161015
<summary>More options</summary>
10171016
<fieldset class="two">
1018-
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
10191017
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
10201018
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
10211019
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}

examples/server/server.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -809,7 +809,6 @@ struct server_context {
809809
slot.sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
810810
slot.sparams.xtc_probability = json_value(data, "xtc_probability", default_sparams.xtc_probability);
811811
slot.sparams.xtc_threshold = json_value(data, "xtc_threshold", default_sparams.xtc_threshold);
812-
slot.sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
813812
slot.sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
814813
slot.sparams.temp = json_value(data, "temperature", default_sparams.temp);
815814
slot.sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
@@ -1149,7 +1148,6 @@ struct server_context {
11491148
{"min_p", slot.sparams.min_p},
11501149
{"xtc_probability", slot.sparams.xtc_probability},
11511150
{"xtc_threshold", slot.sparams.xtc_threshold},
1152-
{"tfs_z", slot.sparams.tfs_z},
11531151
{"typical_p", slot.sparams.typ_p},
11541152
{"repeat_last_n", slot.sparams.penalty_last_n},
11551153
{"repeat_penalty", slot.sparams.penalty_repeat},

examples/server/themes/buttons-top/index.html

-2
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@
226226
top_k: 40, // <= 0 to use vocab size
227227
top_p: 0.95, // 1.0 = disabled
228228
min_p: 0.05, // 0 = disabled
229-
tfs_z: 1.0, // 1.0 = disabled
230229
typical_p: 1.0, // 1.0 = disabled
231230
presence_penalty: 0.0, // 0.0 = disabled
232231
frequency_penalty: 0.0, // 0.0 = disabled
@@ -788,7 +787,6 @@
788787
<details>
789788
<summary>More options</summary>
790789
<fieldset class="two">
791-
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
792790
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
793791
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
794792
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}

examples/server/themes/wild/index.html

-2
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,6 @@
229229
top_k: 40, // <= 0 to use vocab size
230230
top_p: 0.95, // 1.0 = disabled
231231
min_p: 0.05, // 0 = disabled
232-
tfs_z: 1.0, // 1.0 = disabled
233232
typical_p: 1.0, // 1.0 = disabled
234233
presence_penalty: 0.0, // 0.0 = disabled
235234
frequency_penalty: 0.0, // 0.0 = disabled
@@ -791,7 +790,6 @@
791790
<details>
792791
<summary>More options</summary>
793792
<fieldset class="two">
794-
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
795793
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
796794
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
797795
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}

0 commit comments

Comments
 (0)