minor fixup

JohannesGaessler · JohannesGaessler · commit dcd7899cf91d · 2025-05-06T17:50:33.000+02:00
diff --git a/examples/training/finetune.cpp b/examples/training/finetune.cpp
@@ -27,11 +27,11 @@ int main(int argc, char ** argv) {
         LOG_INF("%s: force disabling memory mapping because it would result in-read-only pointers to the weights\n", __func__);
         params.use_mmap = false;
     }
-    if (params.cache_type_k == GGML_TYPE_F16) {
+    if (params.cache_type_k != GGML_TYPE_F32) {
         LOG_INF("%s: force changing k cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
         params.cache_type_k = GGML_TYPE_F32;
     }
-    if (params.cache_type_v == GGML_TYPE_F16) {
+    if (params.cache_type_v != GGML_TYPE_F32) {
         LOG_INF("%s: force changing v cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);
         params.cache_type_v = GGML_TYPE_F32;
     }
diff --git a/include/llama.h b/include/llama.h
@@ -1437,7 +1437,7 @@ extern "C" {
     // training
     //
 
-    // function that returns whether or not a given tensor is a trainable parameter
+    // function that returns whether or not a given tensor contains trainable parameters
     typedef bool (*llama_opt_param_filter)(const struct ggml_tensor * tensor, void * userdata);
 
     // always returns true
@@ -1446,8 +1446,8 @@ extern "C" {
     struct llama_opt_params {
         uint32_t n_ctx_train; // assumed context size post training, use context size specified in llama_context if 0
 
-        llama_opt_param_filter param_filter; // callback for determining which tensors are trainable parameters
-        void * param_filter_ud;              // userdata for determining which tensors are trainable parameters
+        llama_opt_param_filter param_filter; // callback for determining which tensors contain trainable parameters
+        void * param_filter_ud;              // userdata for determining which tensors contain trainable parameters
 
         ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
         void * get_opt_pars_ud;                     // userdata for calculating optimizer parameters
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -1906,10 +1906,6 @@ void llama_context::opt_epoch_iter(
 
     llama_kv_cache * kv_self = static_cast<llama_kv_cache *>(memory.get());
 
-    //is_encoding = false;
-    //llama_kv_cache_clear(lctx);
-    //llama_kv_slot_restorer kv_slot_restorer(lctx->kv_self);
-
     kv_self->clear();
     llama_kv_cache_guard kv_guard(kv_self);
 
@@ -1923,11 +1919,6 @@ void llama_context::opt_epoch_iter(
             batch.logits  [pos_batch]    = true;
         }
 
-        //{
-        //    const int err_code = llama_prepare_sbatch(*lctx, batch, n_outputs);
-        //    GGML_ASSERT(err_code == 0);
-        //}
-
         const auto n_tokens_all = batch.n_tokens;
 
         n_queued_tokens += n_tokens_all;
@@ -1939,7 +1930,7 @@ void llama_context::opt_epoch_iter(
 
         int64_t n_outputs_all = n_tokens_all;
 
-        llama_sbatch sbatch = kv_self->sbatch_init(batch, /* logits_all */ true);
+        llama_sbatch sbatch = kv_self->sbatch_init(batch, /*logits_all =*/ true);
 
         // reserve output buffer
         if (output_reserve(n_outputs_all) < n_outputs_all) {
@@ -1948,11 +1939,6 @@ void llama_context::opt_epoch_iter(
         };
 
         for (uint32_t pos_batch = 0; pos_batch < n_batch; pos_batch += n_ubatch) {
-            //struct llama_ubatch ubatch;
-            //{
-            //    const int err_code = llama_prepare_ubatch(*lctx, kv_slot_restorer, ubatch, n_outputs, batch.n_tokens);
-            //    GGML_ASSERT(err_code == 0);
-            //}
             llama_ubatch ubatch = kv_self->ubatch_next(sbatch, cparams.n_ubatch, embd_pooled);
 
             n_outputs = ubatch.n_tokens;
@@ -1967,7 +1953,6 @@ void llama_context::opt_epoch_iter(
             auto * gf = graph_init();
             auto res = graph_build(ctx_compute.get(), gf, ubatch, LLM_GRAPH_TYPE_DEFAULT);
 
-            //struct ggml_cgraph * gf = llama_build_graph(*lctx, ubatch, false);
             struct ggml_context * ctx_compute_opt;
             {
                 const size_t size_gf = ggml_graph_size(gf);
@@ -1981,7 +1966,6 @@ void llama_context::opt_epoch_iter(
             }
             ggml_opt_prepare_alloc(opt_ctx, ctx_compute_opt, gf, res->get_tokens(), res->get_logits());
             ggml_opt_alloc(opt_ctx, train);
-            //llama_set_inputs(*lctx, ubatch);
             res->set_inputs(&ubatch);
             {
                 struct ggml_tensor * labels = ggml_opt_labels(opt_ctx);

Original file line number	Diff line number	Diff line change
`@@ -27,11 +27,11 @@ int main(int argc, char ** argv) {`
`27`	`27`	`LOG_INF("%s: force disabling memory mapping because it would result in-read-only pointers to the weights\n", __func__);`
`28`	`28`	`params.use_mmap = false;`
`29`	`29`	`}`
`30`		`- if (params.cache_type_k == GGML_TYPE_F16) {`
	`30`	`+ if (params.cache_type_k != GGML_TYPE_F32) {`
`31`	`31`	`LOG_INF("%s: force changing k cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);`
`32`	`32`	`params.cache_type_k = GGML_TYPE_F32;`
`33`	`33`	`}`
`34`		`- if (params.cache_type_v == GGML_TYPE_F16) {`
	`34`	`+ if (params.cache_type_v != GGML_TYPE_F32) {`
`35`	`35`	`LOG_INF("%s: force changing v cache type to f32 due to a lack of f16 support for OUT_PROD\n", __func__);`
`36`	`36`	`params.cache_type_v = GGML_TYPE_F32;`
`37`	`37`	`}`