Skip to content

Commit dcd7899

Browse files
minor fixup
1 parent 15dea7b commit dcd7899

File tree

3 files changed

+6
-22
lines changed

3 files changed

+6
-22
lines changed

examples/training/finetune.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ int main(int argc, char ** argv) {
2727
LOG_INF("%s: force disabling memory mapping because it would result in-read-only pointers to the weights\n", __func__);
2828
params.use_mmap = false;
2929
}
30-
if (params.cache_type_k == GGML_TYPE_F16) {
30+
if (params.cache_type_k != GGML_TYPE_F32) {
3131
LOG_INF("%s: force changing k cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
3232
params.cache_type_k = GGML_TYPE_F32;
3333
}
34-
if (params.cache_type_v == GGML_TYPE_F16) {
34+
if (params.cache_type_v != GGML_TYPE_F32) {
3535
LOG_INF("%s: force changing v cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
3636
params.cache_type_v = GGML_TYPE_F32;
3737
}

include/llama.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -1437,7 +1437,7 @@ extern "C" {
14371437
// training
14381438
//
14391439

1440-
// function that returns whether or not a given tensor is a trainable parameter
1440+
// function that returns whether or not a given tensor contains trainable parameters
14411441
typedef bool (*llama_opt_param_filter)(const struct ggml_tensor * tensor, void * userdata);
14421442

14431443
// always returns true
@@ -1446,8 +1446,8 @@ extern "C" {
14461446
struct llama_opt_params {
14471447
uint32_t n_ctx_train; // assumed context size post training, use context size specified in llama_context if 0
14481448

1449-
llama_opt_param_filter param_filter; // callback for determining which tensors are trainable parameters
1450-
void * param_filter_ud; // userdata for determining which tensors are trainable parameters
1449+
llama_opt_param_filter param_filter; // callback for determining which tensors contain trainable parameters
1450+
void * param_filter_ud; // userdata for determining which tensors contain trainable parameters
14511451

14521452
ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
14531453
void * get_opt_pars_ud; // userdata for calculating optimizer parameters

src/llama-context.cpp

+1-17
Original file line numberDiff line numberDiff line change
@@ -1906,10 +1906,6 @@ void llama_context::opt_epoch_iter(
19061906

19071907
llama_kv_cache * kv_self = static_cast<llama_kv_cache *>(memory.get());
19081908

1909-
//is_encoding = false;
1910-
//llama_kv_cache_clear(lctx);
1911-
//llama_kv_slot_restorer kv_slot_restorer(lctx->kv_self);
1912-
19131909
kv_self->clear();
19141910
llama_kv_cache_guard kv_guard(kv_self);
19151911

@@ -1923,11 +1919,6 @@ void llama_context::opt_epoch_iter(
19231919
batch.logits [pos_batch] = true;
19241920
}
19251921

1926-
//{
1927-
// const int err_code = llama_prepare_sbatch(*lctx, batch, n_outputs);
1928-
// GGML_ASSERT(err_code == 0);
1929-
//}
1930-
19311922
const auto n_tokens_all = batch.n_tokens;
19321923

19331924
n_queued_tokens += n_tokens_all;
@@ -1939,7 +1930,7 @@ void llama_context::opt_epoch_iter(
19391930

19401931
int64_t n_outputs_all = n_tokens_all;
19411932

1942-
llama_sbatch sbatch = kv_self->sbatch_init(batch, /* logits_all */ true);
1933+
llama_sbatch sbatch = kv_self->sbatch_init(batch, /*logits_all =*/ true);
19431934

19441935
// reserve output buffer
19451936
if (output_reserve(n_outputs_all) < n_outputs_all) {
@@ -1948,11 +1939,6 @@ void llama_context::opt_epoch_iter(
19481939
};
19491940

19501941
for (uint32_t pos_batch = 0; pos_batch < n_batch; pos_batch += n_ubatch) {
1951-
//struct llama_ubatch ubatch;
1952-
//{
1953-
// const int err_code = llama_prepare_ubatch(*lctx, kv_slot_restorer, ubatch, n_outputs, batch.n_tokens);
1954-
// GGML_ASSERT(err_code == 0);
1955-
//}
19561942
llama_ubatch ubatch = kv_self->ubatch_next(sbatch, cparams.n_ubatch, embd_pooled);
19571943

19581944
n_outputs = ubatch.n_tokens;
@@ -1967,7 +1953,6 @@ void llama_context::opt_epoch_iter(
19671953
auto * gf = graph_init();
19681954
auto res = graph_build(ctx_compute.get(), gf, ubatch, LLM_GRAPH_TYPE_DEFAULT);
19691955

1970-
//struct ggml_cgraph * gf = llama_build_graph(*lctx, ubatch, false);
19711956
struct ggml_context * ctx_compute_opt;
19721957
{
19731958
const size_t size_gf = ggml_graph_size(gf);
@@ -1981,7 +1966,6 @@ void llama_context::opt_epoch_iter(
19811966
}
19821967
ggml_opt_prepare_alloc(opt_ctx, ctx_compute_opt, gf, res->get_tokens(), res->get_logits());
19831968
ggml_opt_alloc(opt_ctx, train);
1984-
//llama_set_inputs(*lctx, ubatch);
19851969
res->set_inputs(&ubatch);
19861970
{
19871971
struct ggml_tensor * labels = ggml_opt_labels(opt_ctx);

0 commit comments

Comments
 (0)