From ddab4b3159ee277fbd5c27dc240e4b5db9137d46 Mon Sep 17 00:00:00 2001 From: Hyeongseok Oh Date: Tue, 19 Dec 2023 09:42:05 +0900 Subject: [PATCH] [onert] Update half library (#12315) This commit updates half library to version 2.2.0 and adds README.md. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh --- runtime/3rdparty/half/README.md | 7 + .../3rdparty/half/include/half/ChangeLog.txt | 10 +- .../3rdparty/half/include/half/LICENSE.txt | 2 +- runtime/3rdparty/half/include/half/README.txt | 2 +- .../half/include/half/include/half.hpp | 124 +++++++++++------- 5 files changed, 93 insertions(+), 52 deletions(-) create mode 100644 runtime/3rdparty/half/README.md diff --git a/runtime/3rdparty/half/README.md b/runtime/3rdparty/half/README.md new file mode 100644 index 00000000000..b1000b71365 --- /dev/null +++ b/runtime/3rdparty/half/README.md @@ -0,0 +1,7 @@ +# Origin of source code + +This library is based on half sourceforge(https://sourceforge.net/projects/half/files/half/) + +# Version + +- 2.2.0 : https://sourceforge.net/projects/half/files/half/2.2.0/ diff --git a/runtime/3rdparty/half/include/half/ChangeLog.txt b/runtime/3rdparty/half/include/half/ChangeLog.txt index d9fc2bbbb82..c7489b2f3c9 100644 --- a/runtime/3rdparty/half/include/half/ChangeLog.txt +++ b/runtime/3rdparty/half/include/half/ChangeLog.txt @@ -1,6 +1,14 @@ -Release Notes +Release Notes {#changelog} ============= +2.2.0 release (2021-06-12): +--------------------------- + +- Added `rsqrt` function for inverse square root. +- Improved performance of `pow` function. +- Fixed bug that forgot to include `` for F16C intrinsics. + + 2.1.0 release (2019-08-05): --------------------------- diff --git a/runtime/3rdparty/half/include/half/LICENSE.txt b/runtime/3rdparty/half/include/half/LICENSE.txt index 36c20d1e418..6023222b0ab 100644 --- a/runtime/3rdparty/half/include/half/LICENSE.txt +++ b/runtime/3rdparty/half/include/half/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License -Copyright (c) 2012-2019 Christian Rau +Copyright (c) 2012-2021 Christian Rau Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/runtime/3rdparty/half/include/half/README.txt b/runtime/3rdparty/half/include/half/README.txt index b8ca7ead18b..c7bf315fa16 100644 --- a/runtime/3rdparty/half/include/half/README.txt +++ b/runtime/3rdparty/half/include/half/README.txt @@ -1,4 +1,4 @@ -HALF-PRECISION FLOATING-POINT LIBRARY (Version 2.1.0) +HALF-PRECISION FLOATING-POINT LIBRARY (Version 2.2.0) ----------------------------------------------------- This is a C++ header-only library to provide an IEEE 754 conformant 16-bit diff --git a/runtime/3rdparty/half/include/half/include/half.hpp b/runtime/3rdparty/half/include/half/include/half.hpp index 0f60403e900..d0a882dd642 100644 --- a/runtime/3rdparty/half/include/half/include/half.hpp +++ b/runtime/3rdparty/half/include/half/include/half.hpp @@ -1,6 +1,6 @@ // half - IEEE 754-based half-precision floating-point library. // -// Copyright (c) 2012-2019 Christian Rau +// Copyright (c) 2012-2021 Christian Rau // // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, @@ -14,7 +14,7 @@ // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// Version 2.1.0 +// Version 2.2.0 /// \file /// Main header file for half-precision functionality. @@ -266,9 +266,6 @@ #if HALF_ENABLE_CPP11_HASH #include #endif -#if HALF_ENABLE_F16C_INTRINSICS - #include -#endif #ifndef HALF_ENABLE_F16C_INTRINSICS @@ -280,6 +277,9 @@ /// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on supporting platforms. #define HALF_ENABLE_F16C_INTRINSICS __F16C__ #endif +#if HALF_ENABLE_F16C_INTRINSICS + #include +#endif #ifdef HALF_DOXYGEN_ONLY /// Type for internal floating-point computations. @@ -869,12 +869,12 @@ namespace half_float /// Convert fixed point to half-precision floating-point. /// \tparam R rounding mode to use - /// \tparam F number of fractional bits (at least 11) + /// \tparam F number of fractional bits in [11,31] /// \tparam S `true` for signed, `false` for unsigned /// \tparam N `true` for additional normalization step, `false` if already normalized to 1.F /// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results /// \param m mantissa in Q1.F fixed point format - /// \param exp exponent + /// \param exp biased exponent - 1 /// \param sign half-precision value with sign bit only /// \param s sticky bit (or of all but the most significant already discarded bits) /// \return value converted to half-precision @@ -1676,34 +1676,34 @@ namespace half_float /// Postprocessing for binary exponential. /// \tparam R rounding mode to use - /// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results - /// \param m mantissa as Q1.31 + /// \param m fractional part of as Q0.31 /// \param exp absolute value of unbiased exponent /// \param esign sign of actual exponent /// \param sign sign bit of result + /// \param n number of BKM iterations (at most 32) /// \return value converted to half-precision /// \exception FE_OVERFLOW on overflows /// \exception FE_UNDERFLOW on underflows /// \exception FE_INEXACT if value had to be rounded or \a I is `true` - template unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0) + template unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0, unsigned int n = 32) { - int s = 0; if(esign) { - if(m > 0x80000000) - { - m = divide64(0x80000000, m, s); - ++exp; - } - if(exp > 25) + exp = -exp - (m!=0); + if(exp < -25) return underflow(sign); - else if(exp == 25) - return rounded(sign, 1, (m&0x7FFFFFFF)!=0); - exp = -exp; + else if(exp == -25) + return rounded(sign, 1, m!=0); } else if(exp > 15) return overflow(sign); - return fixed2half(m, exp+14, sign, s); + if(!m) + return sign | (((exp+=15)>0) ? (exp<<10) : check_underflow(0x200>>-exp)); + m = exp2(m, n); + int s = 0; + if(esign) + m = divide64(0x80000000, m, s); + return fixed2half(m, exp+14, sign, s); } /// Postprocessing for binary logarithm. @@ -1737,7 +1737,7 @@ namespace half_float /// Hypotenuse square root and postprocessing. /// \tparam R rounding mode to use /// \param r mantissa as Q2.30 - /// \param exp unbiased exponent + /// \param exp biased exponent /// \return square root converted to half-precision /// \exception FE_OVERFLOW on overflows /// \exception FE_UNDERFLOW on underflows @@ -2201,6 +2201,7 @@ namespace half_float friend half log2(half); friend half log1p(half); friend half sqrt(half); + friend half rsqrt(half); friend half cbrt(half); friend half hypot(half, half); friend half hypot(half, half, half); @@ -2937,7 +2938,7 @@ namespace half_float #ifdef HALF_ARITHMETIC_TYPE return half(detail::binary, detail::float2half(std::exp(detail::half2float(arg.data_)))); #else - int abs = arg.data_ & 0x7FFF; + int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp; if(!abs) return half(detail::binary, 0x3C00); if(abs >= 0x7C00) @@ -2945,7 +2946,6 @@ namespace half_float if(abs >= 0x4C80) return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); - int e = (abs>>10) + (abs<=0x3FF), exp; if(e < 14) { exp = 0; @@ -2956,7 +2956,7 @@ namespace half_float exp = m >> (45-e); m = (m<<(e-14)) & 0x7FFFFFFF; } - return half(detail::binary, detail::exp2_post(detail::exp2(m, 26), exp, (arg.data_&0x8000)!=0)); + return half(detail::binary, detail::exp2_post(m, exp, (arg.data_&0x8000)!=0, 0, 26)); #endif } @@ -2973,25 +2973,15 @@ namespace half_float #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH return half(detail::binary, detail::float2half(std::exp2(detail::half2float(arg.data_)))); #else - int abs = arg.data_ & 0x7FFF; + int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10); if(!abs) return half(detail::binary, 0x3C00); if(abs >= 0x7C00) return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); if(abs >= 0x4E40) return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); - int e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10); - detail::uint32 m = detail::exp2((static_cast(exp)<<(6+e))&0x7FFFFFFF, 28); - exp >>= 25 - e; - if(m == 0x80000000) - { - if(arg.data_&0x8000) - exp = -exp; - else if(exp > 15) - return half(detail::binary, detail::overflow()); - return half(detail::binary, detail::fixed2half(m, exp+14)); - } - return half(detail::binary, detail::exp2_post(m, exp, (arg.data_&0x8000)!=0)); + return half(detail::binary, detail::exp2_post( + (static_cast(exp)<<(6+e))&0x7FFFFFFF, exp>>(25-e), (arg.data_&0x8000)!=0, 0, 28)); #endif } @@ -3009,7 +2999,7 @@ namespace half_float #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH return half(detail::binary, detail::float2half(std::expm1(detail::half2float(arg.data_)))); #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000, e = (abs>>10) + (abs<=0x3FF), exp; if(!abs) return arg; if(abs >= 0x7C00) @@ -3017,7 +3007,6 @@ namespace half_float if(abs >= 0x4A00) return half(detail::binary, (arg.data_&0x8000) ? detail::rounded(0xBBFF, 1, 1) : detail::overflow()); detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); - int e = (abs>>10) + (abs<=0x3FF), exp; if(e < 14) { exp = 0; @@ -3213,7 +3202,7 @@ namespace half_float /// \param arg function argument /// \return square root of \a arg /// \exception FE_INVALID for signaling NaN and negative arguments - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + /// \exception FE_INEXACT according to rounding inline half sqrt(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3228,6 +3217,42 @@ namespace half_float #endif } + /// Inverse square root. + /// This function is exact to rounding for all rounding modes and thus generally more accurate than directly computing + /// 1 / sqrt(\a arg) in half-precision, in addition to also being faster. + /// \param arg function argument + /// \return reciprocal of square root of \a arg + /// \exception FE_INVALID for signaling NaN and negative arguments + /// \exception FE_INEXACT according to rounding + inline half rsqrt(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::internal_t(1)/std::sqrt(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, bias = 0x4000; + if(!abs || arg.data_ >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? + detail::invalid() : !abs ? detail::pole(arg.data_&0x8000) : 0); + for(; abs<0x400; abs<<=1,bias-=0x400) ; + unsigned int frac = (abs+=bias) & 0x7FF; + if(frac == 0x400) + return half(detail::binary, 0x7A00-(abs>>1)); + if((half::round_style == std::round_to_nearest && (frac == 0x3FE || frac == 0x76C)) || + (half::round_style != std::round_to_nearest && (frac == 0x15A || frac == 0x3FC || frac == 0x401 || frac == 0x402 || frac == 0x67B))) + return pow(arg, half(detail::binary, 0xB800)); + detail::uint32 f = 0x17376 - abs, mx = (abs&0x3FF) | 0x400, my = ((f>>1)&0x3FF) | 0x400, mz = my * my; + int expy = (f>>11) - 31, expx = 32 - (abs>>10), i = mz >> 21; + for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; + i = (my*=mz>>10) >> 31; + expy += i; + my = (my>>(20+i)) + 1; + i = (mz=my*my) >> 21; + for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; + i = (my*=(mz>>10)+1) >> 31; + return half(detail::binary, detail::fixed2half(my>>i, expy+i+14)); + #endif + } + /// Cubic root. /// This function is exact to rounding for all rounding modes. /// @@ -3235,7 +3260,7 @@ namespace half_float /// \param arg function argument /// \return cubic root of \a arg /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + /// \exception FE_INEXACT according to rounding inline half cbrt(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -3419,12 +3444,13 @@ namespace half_float return half(detail::binary, detail::invalid()); if(x.data_ == 0xBC00) return half(detail::binary, sign|0x3C00); - if(y.data_ == 0x3800) - return sqrt(x); - if(y.data_ == 0x3C00) - return half(detail::binary, detail::check_underflow(x.data_)); - if(y.data_ == 0x4000) - return x * x; + switch(y.data_) + { + case 0x3800: return sqrt(x); + case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_)); + case 0x4000: return x * x; + case 0xBC00: return half(detail::binary, 0x3C00) / x; + } for(; absx<0x400; absx<<=1,--exp) ; detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m = (((ilog<<27)+((detail::log2(static_cast((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign; @@ -3444,7 +3470,7 @@ namespace half_float f = (m<> (31-exp); } - return half(detail::binary, detail::exp2_post(detail::exp2(f), exp, ((msign&1)^(y.data_>>15))!=0, sign)); + return half(detail::binary, detail::exp2_post(f, exp, ((msign&1)^(y.data_>>15))!=0, sign)); #endif }