@@ -2601,6 +2601,80 @@ BOOST_AUTO_TEST_CASE(regression_switches_20231226)
2601
2601
#endif
2602
2602
2603
2603
2604
+ // Matrix A is inaccurate when optimizations are on and the code contains FMA4
2605
+ // instructions.
2606
+ // The problem can be avoided by compiling BLAS/SRC/sgemv.f with the
2607
+ // `-ffp-contract=off` flag or by disabling FMA (`-mno-fma -mno-fma4`).
2608
+ BOOST_AUTO_TEST_CASE (regression_switches_20240109)
2609
+ {
2610
+ auto m = 4 ;
2611
+ auto n = 5 ;
2612
+ auto p = 4 ;
2613
+ // auto rank_A = 2;
2614
+ // auto rank_B = 3;
2615
+ // auto rank_G = 5;
2616
+ auto hintprepa = ' N' ;
2617
+ auto hintprepb = ' N' ;
2618
+ auto hintprepcols = ' N' ;
2619
+ // auto w = std::ldexp(float{1}, 11);
2620
+ // auto seed = 687822830u;
2621
+
2622
+ auto caller = ggqrcs::Caller<float >(m, n, p);
2623
+ auto A = caller.A ;
2624
+ auto B = caller.B ;
2625
+
2626
+ A (0 , 0 ) = -1.228234741e+03 ;
2627
+ A (0 , 1 ) = -3.926374817e+01 ;
2628
+ A (0 , 2 ) = -7.065171387e+03 ;
2629
+ A (0 , 3 ) = +2.666662891e+04 ;
2630
+ A (0 , 4 ) = +1.932183789e+04 ;
2631
+ A (1 , 0 ) = +2.795048828e+03 ;
2632
+ A (1 , 1 ) = -5.566356812e+02 ;
2633
+ A (1 , 2 ) = +9.025947266e+03 ;
2634
+ A (1 , 3 ) = -3.554810938e+04 ;
2635
+ A (1 , 4 ) = -2.016102930e+04 ;
2636
+ A (2 , 0 ) = -1.109454004e+04 ;
2637
+ A (2 , 1 ) = -8.200556641e+03 ;
2638
+ A (2 , 2 ) = -8.091530273e+03 ;
2639
+ A (2 , 3 ) = +2.746268164e+04 ;
2640
+ A (2 , 4 ) = +1.388618359e+04 ;
2641
+ A (3 , 0 ) = -2.942973145e+03 ;
2642
+ A (3 , 1 ) = -1.481493164e+03 ;
2643
+ A (3 , 2 ) = -3.871124023e+03 ;
2644
+ A (3 , 3 ) = +1.440458008e+04 ;
2645
+ A (3 , 4 ) = +7.697737793e+03 ;
2646
+ B (0 , 0 ) = +1.285251465e+04 ;
2647
+ B (0 , 1 ) = +1.484784180e+03 ;
2648
+ B (0 , 2 ) = -3.879659668e+03 ;
2649
+ B (0 , 3 ) = +2.811796387e+03 ;
2650
+ B (0 , 4 ) = -1.277614648e+04 ;
2651
+ B (1 , 0 ) = +5.715941406e+03 ;
2652
+ B (1 , 1 ) = +6.548829346e+02 ;
2653
+ B (1 , 2 ) = -1.724442871e+03 ;
2654
+ B (1 , 3 ) = +1.284799194e+03 ;
2655
+ B (1 , 4 ) = -5.690834961e+03 ;
2656
+ B (2 , 0 ) = -4.446235352e+03 ;
2657
+ B (2 , 1 ) = -5.011452637e+02 ;
2658
+ B (2 , 2 ) = +1.339912354e+03 ;
2659
+ B (2 , 3 ) = -1.051421387e+03 ;
2660
+ B (2 , 4 ) = +4.440138672e+03 ;
2661
+ B (3 , 0 ) = +3.242603027e+03 ;
2662
+ B (3 , 1 ) = +3.580648804e+02 ;
2663
+ B (3 , 2 ) = -9.758657227e+02 ;
2664
+ B (3 , 3 ) = +8.134671021e+02 ;
2665
+ B (3 , 4 ) = -3.250209473e+03 ;
2666
+
2667
+ caller.A = A;
2668
+ caller.B = B;
2669
+ caller.hint_preprocess_a = hintprepa;
2670
+ caller.hint_preprocess_b = hintprepb;
2671
+ caller.hint_preprocess_cols = hintprepcols;
2672
+
2673
+ auto ret = caller ();
2674
+ check_results (ret, A, B, caller);
2675
+ }
2676
+
2677
+
2604
2678
// expect failures because xLANGE overflows when it should not
2605
2679
BOOST_TEST_DECORATOR (* boost::unit_test::expected_failures (3 ))
2606
2680
BOOST_AUTO_TEST_CASE_TEMPLATE(
0 commit comments