@@ -1906,10 +1906,6 @@ void llama_context::opt_epoch_iter(
1906
1906
1907
1907
llama_kv_cache * kv_self = static_cast <llama_kv_cache *>(memory.get ());
1908
1908
1909
- // is_encoding = false;
1910
- // llama_kv_cache_clear(lctx);
1911
- // llama_kv_slot_restorer kv_slot_restorer(lctx->kv_self);
1912
-
1913
1909
kv_self->clear ();
1914
1910
llama_kv_cache_guard kv_guard (kv_self);
1915
1911
@@ -1923,11 +1919,6 @@ void llama_context::opt_epoch_iter(
1923
1919
batch.logits [pos_batch] = true ;
1924
1920
}
1925
1921
1926
- // {
1927
- // const int err_code = llama_prepare_sbatch(*lctx, batch, n_outputs);
1928
- // GGML_ASSERT(err_code == 0);
1929
- // }
1930
-
1931
1922
const auto n_tokens_all = batch.n_tokens ;
1932
1923
1933
1924
n_queued_tokens += n_tokens_all;
@@ -1939,7 +1930,7 @@ void llama_context::opt_epoch_iter(
1939
1930
1940
1931
int64_t n_outputs_all = n_tokens_all;
1941
1932
1942
- llama_sbatch sbatch = kv_self->sbatch_init (batch, /* logits_all */ true );
1933
+ llama_sbatch sbatch = kv_self->sbatch_init (batch, /* logits_all = */ true );
1943
1934
1944
1935
// reserve output buffer
1945
1936
if (output_reserve (n_outputs_all) < n_outputs_all) {
@@ -1948,11 +1939,6 @@ void llama_context::opt_epoch_iter(
1948
1939
};
1949
1940
1950
1941
for (uint32_t pos_batch = 0 ; pos_batch < n_batch; pos_batch += n_ubatch) {
1951
- // struct llama_ubatch ubatch;
1952
- // {
1953
- // const int err_code = llama_prepare_ubatch(*lctx, kv_slot_restorer, ubatch, n_outputs, batch.n_tokens);
1954
- // GGML_ASSERT(err_code == 0);
1955
- // }
1956
1942
llama_ubatch ubatch = kv_self->ubatch_next (sbatch, cparams.n_ubatch , embd_pooled);
1957
1943
1958
1944
n_outputs = ubatch.n_tokens ;
@@ -1967,7 +1953,6 @@ void llama_context::opt_epoch_iter(
1967
1953
auto * gf = graph_init ();
1968
1954
auto res = graph_build (ctx_compute.get (), gf, ubatch, LLM_GRAPH_TYPE_DEFAULT);
1969
1955
1970
- // struct ggml_cgraph * gf = llama_build_graph(*lctx, ubatch, false);
1971
1956
struct ggml_context * ctx_compute_opt;
1972
1957
{
1973
1958
const size_t size_gf = ggml_graph_size (gf);
@@ -1981,7 +1966,6 @@ void llama_context::opt_epoch_iter(
1981
1966
}
1982
1967
ggml_opt_prepare_alloc (opt_ctx, ctx_compute_opt, gf, res->get_tokens (), res->get_logits ());
1983
1968
ggml_opt_alloc (opt_ctx, train);
1984
- // llama_set_inputs(*lctx, ubatch);
1985
1969
res->set_inputs (&ubatch);
1986
1970
{
1987
1971
struct ggml_tensor * labels = ggml_opt_labels (opt_ctx);
0 commit comments