@@ -44,7 +44,8 @@ int get_embeddings(void* params_ptr, void* state_pr, float * res_embeddings) {
44
44
params.seed = time (NULL );
45
45
}
46
46
47
- std::mt19937 rng (params.seed );
47
+ // no need for a rng
48
+ // std::mt19937 rng(params.seed);
48
49
49
50
int n_past = 0 ;
50
51
@@ -127,7 +128,8 @@ int llama_predict(void* params_ptr, void* state_pr, char* result, bool debug) {
127
128
params.seed = time (NULL );
128
129
}
129
130
130
- std::mt19937 rng (params.seed );
131
+ // no need for a rng
132
+ // std::mt19937 rng(params.seed);
131
133
132
134
if (params.rope_freq_base != 10000.0 ) {
133
135
fprintf (stderr, " %s: warning: changing RoPE frequency base to %g (default 10000.0)\n " , __func__, params.rope_freq_base );
@@ -171,7 +173,8 @@ int llama_predict(void* params_ptr, void* state_pr, char* result, bool debug) {
171
173
return 1 ;
172
174
}
173
175
session_tokens.resize (n_token_count_out);
174
- llama_set_rng_seed (ctx, params.seed );
176
+ // no need to set the seed here --- we'll always set it later
177
+ // llama_set_rng_seed(ctx, params.seed);
175
178
if (debug) {
176
179
fprintf (stderr, " %s: loaded a session with prompt size of %d tokens\n " , __func__, (int ) session_tokens.size ());
177
180
}
@@ -311,6 +314,9 @@ int llama_predict(void* params_ptr, void* state_pr, char* result, bool debug) {
311
314
llama_reset_timings (ctx);
312
315
}
313
316
317
+ // set the seed before actually predicting
318
+ llama_set_rng_seed (ctx, params.seed );
319
+
314
320
while (n_remain != 0 ) {
315
321
// predict
316
322
if (embd.size () > 0 ) {
@@ -878,4 +884,4 @@ void* load_binding_model(const char *fname, int n_ctx, int n_seed, bool memory_f
878
884
state->model= model;
879
885
return state;
880
886
}
881
- */
887
+ */
0 commit comments