|
| 1 | +local tap = require('tap') |
| 2 | + |
| 3 | +-- Test file to demonstrate possible numerical inaccuracy if FMA |
| 4 | +-- optimization takes place. |
| 5 | +-- XXX: The JIT consistency is checked in the |
| 6 | +-- <lj-918-fma-numerical-accuracy-jit.test.lua>. |
| 7 | +-- See also: https://github.com/LuaJIT/LuaJIT/issues/918. |
| 8 | +local test = tap.test('lj-918-fma-numerical-accuracy') |
| 9 | + |
| 10 | +test:plan(2) |
| 11 | + |
| 12 | +local _2pow52 = 2 ^ 52 |
| 13 | + |
| 14 | +-- XXX: Before this commit the LuaJIT arm64 VM uses `fmsub` [1] |
| 15 | +-- instruction for the modulo operation, which is the fused |
| 16 | +-- multiply-add (FMA [2]) operation (more precisely, |
| 17 | +-- multiply-sub). Hence, it may produce different results compared |
| 18 | +-- to the unfused one. For the test, let's just use 2 numbers in |
| 19 | +-- modulo for which the single rounding is different from the |
| 20 | +-- double rounding. The numbers from the original issue are good |
| 21 | +-- enough. |
| 22 | +-- |
| 23 | +-- [1]:https://developer.arm.com/documentation/dui0801/g/A64-Floating-point-Instructions/FMSUB |
| 24 | +-- [2]:https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation |
| 25 | +-- |
| 26 | +-- IEEE754 components to double: |
| 27 | +-- sign * (2 ^ (exp - 1023)) * (mantissa / _2pow52 + normal). |
| 28 | +local a = 1 * (2 ^ (1083 - 1023)) * (4080546448249347 / _2pow52 + 1) |
| 29 | +assert(a == 2197541395358679800) |
| 30 | + |
| 31 | +local b = -1 * (2 ^ (1052 - 1023)) * (3927497732209973 / _2pow52 + 1) |
| 32 | +assert(b == -1005065126.3690554) |
| 33 | + |
| 34 | +-- These tests fail on ARM64 before this patch or with FMA |
| 35 | +-- optimization enabled. |
| 36 | +-- The first test may not fail if the compiler doesn't generate |
| 37 | +-- an ARM64 FMA operation in `lj_vm_foldarith()`. |
| 38 | +test:is(2197541395358679800 % -1005065126.3690554, -606337536, |
| 39 | + 'FMA in the lj_vm_foldarith() during parsing') |
| 40 | + |
| 41 | +test:is(a % b, -606337536, 'FMA in the VM') |
| 42 | + |
| 43 | +test:done(true) |
0 commit comments