36
36
#include < thread>
37
37
#include < vector>
38
38
39
- #include < iostream>
40
-
41
39
static void init_tensor_uniform (ggml_tensor * tensor, float min = -1 .0f , float max = 1 .0f ) {
42
40
size_t nels = ggml_nelements (tensor);
43
41
std::vector<float > data (nels);
@@ -49,8 +47,8 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
49
47
std::random_device rd;
50
48
std::vector<std::default_random_engine> vec;
51
49
vec.reserve (n_threads);
52
- for (size_t i = 0 ; i < n_threads; i++) { vec.emplace_back (1234 + i); } // fixed seed
53
- // for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(rd()); }
50
+ // for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(1234 + i); } // fixed seed
51
+ for (size_t i = 0 ; i < n_threads; i++) { vec.emplace_back (rd ()); }
54
52
return vec;
55
53
}();
56
54
@@ -561,54 +559,6 @@ struct test_case {
561
559
}
562
560
}
563
561
564
- struct err_t {
565
- float a_val, b_val, err;
566
- size_t i;
567
- };
568
- std::vector<err_t > top_k_abs_err;
569
- std::vector<err_t > top_k_rel_err;
570
- size_t k = 10 ;
571
- auto a = f1.data ();
572
- auto b = f2.data (); // ref (cpu backend)
573
- auto save_top_k_err = [=](size_t i, float a_i, float b_i, float err, std::vector<err_t >& top_k_err) {
574
- if (top_k_err.size () < k) {
575
- top_k_err.push_back ({a_i, b_i, err, i});
576
- if (top_k_err.size () == k) {
577
- std::sort (top_k_err.begin (), top_k_err.end (), [](const err_t & x, const err_t & y) {
578
- return x.err > y.err ;
579
- });
580
- }
581
- } else if (top_k_err.back ().err < err) {
582
- top_k_err.back () = {a_i, b_i, err, i};
583
- std::sort (top_k_err.begin (), top_k_err.end (), [](const err_t & x, const err_t & y) {
584
- return x.err > y.err ;
585
- });
586
- }
587
- };
588
- double avg_abs_err = 0 .f ;
589
- double avg_rel_err = 0 .f ;
590
- for (size_t i = 0 ; i < f1.size (); i++) {
591
- float a_i = a[i];
592
- float b_i = b[i];
593
- float abs_err = std::fabs (a_i - b_i);
594
- float rel_err = (a_i - b_i) / std::fabs (b_i);
595
- save_top_k_err (i, a_i, b_i, abs_err, top_k_abs_err);
596
- save_top_k_err (i, a_i, b_i, rel_err, top_k_rel_err);
597
- avg_abs_err += abs_err;
598
- avg_rel_err += rel_err;
599
- }
600
- avg_abs_err /= f1.size ();
601
- avg_rel_err /= f1.size ();
602
- std::cout << " \n Avg abs err=" << avg_abs_err << " Top " << k << " abs err:\n " ;
603
- for (const auto & err : top_k_abs_err) {
604
- std::cout << " i=" << err.i << " a=" << err.a_val << " b=" << err.b_val << " abs err=" << err.err << " \n " ;
605
- }
606
- std::cout << " \n Avg rel err=" << avg_rel_err << " Top " << k << " rel err:\n " ;
607
- for (const auto & err : top_k_rel_err) {
608
- std::cout << " i=" << err.i << " a=" << err.a_val << " b=" << err.b_val << " rel err=" << err.err << " \n " ;
609
- }
610
- std::cout << std::endl;
611
-
612
562
double err = nmse (f1.data (), f2.data (), f1.size ());
613
563
if (err > ud->max_err ) {
614
564
printf (" [%s] NMSE = %.9f > %.9f " , ggml_op_desc (t1), err, ud->max_err );
@@ -2121,7 +2071,7 @@ struct test_mul_mat_id : public test_case {
2121
2071
const ggml_type type_b;
2122
2072
const int n_mats;
2123
2073
const int n_used;
2124
- const bool b; // brodcast b matrix
2074
+ const bool b; // broadcast b matrix
2125
2075
const int64_t m;
2126
2076
const int64_t n;
2127
2077
const int64_t k;
@@ -2656,6 +2606,8 @@ struct test_rope : public test_case {
2656
2606
} else {
2657
2607
out = ggml_rope_ext_back (ctx, a, pos, freq, n_dims, mode, 0 , 10000 .0f , fs, ef, af, 1 .0f , 1 .0f );
2658
2608
}
2609
+
2610
+ // TODO: add test with a non-contiguous view as input ; this case is needed for build_rope_2d in clip.cpp
2659
2611
}
2660
2612
ggml_set_name (out, " out" );
2661
2613
@@ -4195,13 +4147,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
4195
4147
test_cases.emplace_back (new test_mul_mat (type_a, GGML_TYPE_F32, 16 , i, 256 , { 1 , 1 }, {1 , 1 }));
4196
4148
}
4197
4149
}
4198
- // TODO: Romain
4199
- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 11008 , 1 , 4096 , {1 , 1 }, {1 , 1 }));
4200
- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 11008 , 2 , 4096 , {1 , 1 }, {1 , 1 }));
4201
- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 4096 , 1 , 11008 , {1 , 1 }, {1 , 1 }));
4202
- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 4096 , 1 , 4096 , {1 , 1 }, {1 , 1 }));
4203
- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 4096 , 2 , 11008 , {1 , 1 }, {1 , 1 }));
4204
- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 4096 , 2 , 4096 , {1 , 1 }, {1 , 1 }));
4205
4150
4206
4151
#if 1
4207
4152
for (ggml_type type_a : base_types) {
@@ -4485,10 +4430,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
4485
4430
test_cases.emplace_back (new test_timestep_embedding ());
4486
4431
test_cases.emplace_back (new test_leaky_relu ());
4487
4432
4488
- for (int hsk : { 64 , 80 , 128 , 192 , 256 , }) {
4489
- for (int hsv : { 64 , 80 , 128 , 192 , 256 , }) {
4490
- if (hsk != 192 && hsk != hsv) continue ;
4433
+ for (int hsk : { 64 , 80 , 128 , 192 , 256 , 576 }) {
4434
+ for (int hsv : { 64 , 80 , 128 , 192 , 256 , 512 }) {
4435
+ if (hsk != 192 && hsk != 576 && hsk != hsv) continue ;
4491
4436
if (hsk == 192 && (hsv != 128 && hsv != 192 )) continue ;
4437
+ if (hsk == 576 && hsv != 512 ) continue ; // DeepSeek MLA
4492
4438
4493
4439
for (bool mask : { true , false } ) {
4494
4440
for (float max_bias : { 0 .0f , 8 .0f }) {
0 commit comments