From ddab4b3159ee277fbd5c27dc240e4b5db9137d46 Mon Sep 17 00:00:00 2001
From: Hyeongseok Oh <hseok82.oh@samsung.com>
Date: Tue, 19 Dec 2023 09:42:05 +0900
Subject: [PATCH] [onert] Update half library (#12315)

This commit updates half library to version 2.2.0 and adds README.md.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
---
 runtime/3rdparty/half/README.md               |   7 +
 .../3rdparty/half/include/half/ChangeLog.txt  |  10 +-
 .../3rdparty/half/include/half/LICENSE.txt    |   2 +-
 runtime/3rdparty/half/include/half/README.txt |   2 +-
 .../half/include/half/include/half.hpp        | 124 +++++++++++-------
 5 files changed, 93 insertions(+), 52 deletions(-)
 create mode 100644 runtime/3rdparty/half/README.md
diff --git a/runtime/3rdparty/half/README.md b/runtime/3rdparty/half/README.md
new file mode 100644
index 00000000000..b1000b71365
--- /dev/null
+++ b/runtime/3rdparty/half/README.md
@@ -0,0 +1,7 @@
+# Origin of source code
+
+This library is based on half sourceforge(https://sourceforge.net/projects/half/files/half/)
+
+# Version
+
+- 2.2.0 : https://sourceforge.net/projects/half/files/half/2.2.0/
diff --git a/runtime/3rdparty/half/include/half/ChangeLog.txt b/runtime/3rdparty/half/include/half/ChangeLog.txt
index d9fc2bbbb82..c7489b2f3c9 100644
--- a/runtime/3rdparty/half/include/half/ChangeLog.txt
+++ b/runtime/3rdparty/half/include/half/ChangeLog.txt
@@ -1,6 +1,14 @@
-Release Notes
+Release Notes											{#changelog}
 =============
 
+2.2.0 release (2021-06-12):
+---------------------------
+
+- Added `rsqrt` function for inverse square root.
+- Improved performance of `pow` function.
+- Fixed bug that forgot to include `<immintrin.h>` for F16C intrinsics.
+
+
 2.1.0 release (2019-08-05):
 ---------------------------
 
diff --git a/runtime/3rdparty/half/include/half/LICENSE.txt b/runtime/3rdparty/half/include/half/LICENSE.txt
index 36c20d1e418..6023222b0ab 100644
--- a/runtime/3rdparty/half/include/half/LICENSE.txt
+++ b/runtime/3rdparty/half/include/half/LICENSE.txt
@@ -1,6 +1,6 @@
 The MIT License
 
-Copyright (c) 2012-2019 Christian Rau
+Copyright (c) 2012-2021 Christian Rau
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/runtime/3rdparty/half/include/half/README.txt b/runtime/3rdparty/half/include/half/README.txt
index b8ca7ead18b..c7bf315fa16 100644
--- a/runtime/3rdparty/half/include/half/README.txt
+++ b/runtime/3rdparty/half/include/half/README.txt
@@ -1,4 +1,4 @@
-HALF-PRECISION FLOATING-POINT LIBRARY (Version 2.1.0)
+HALF-PRECISION FLOATING-POINT LIBRARY (Version 2.2.0)
 -----------------------------------------------------
 
 This is a C++ header-only library to provide an IEEE 754 conformant 16-bit 
diff --git a/runtime/3rdparty/half/include/half/include/half.hpp b/runtime/3rdparty/half/include/half/include/half.hpp
index 0f60403e900..d0a882dd642 100644
--- a/runtime/3rdparty/half/include/half/include/half.hpp
+++ b/runtime/3rdparty/half/include/half/include/half.hpp
@@ -1,6 +1,6 @@
 // half - IEEE 754-based half-precision floating-point library.
 //
-// Copyright (c) 2012-2019 Christian Rau <rauy@users.sourceforge.net>
+// Copyright (c) 2012-2021 Christian Rau <rauy@users.sourceforge.net>
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
@@ -14,7 +14,7 @@
 // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-// Version 2.1.0
+// Version 2.2.0
 
 /// \file
 /// Main header file for half-precision functionality.
@@ -266,9 +266,6 @@
 #if HALF_ENABLE_CPP11_HASH
 	#include <functional>
 #endif
-#if HALF_ENABLE_F16C_INTRINSICS
-	#include <immintrin.h>
-#endif
 
 
 #ifndef HALF_ENABLE_F16C_INTRINSICS
@@ -280,6 +277,9 @@
 	/// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on supporting platforms.
 	#define HALF_ENABLE_F16C_INTRINSICS __F16C__
 #endif
+#if HALF_ENABLE_F16C_INTRINSICS
+	#include <immintrin.h>
+#endif
 
 #ifdef HALF_DOXYGEN_ONLY
 /// Type for internal floating-point computations.
@@ -869,12 +869,12 @@ namespace half_float
 
 		/// Convert fixed point to half-precision floating-point.
 		/// \tparam R rounding mode to use
-		/// \tparam F number of fractional bits (at least 11)
+		/// \tparam F number of fractional bits in [11,31]
 		/// \tparam S `true` for signed, `false` for unsigned
 		/// \tparam N `true` for additional normalization step, `false` if already normalized to 1.F
 		/// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results
 		/// \param m mantissa in Q1.F fixed point format
-		/// \param exp exponent
+		/// \param exp biased exponent - 1
 		/// \param sign half-precision value with sign bit only
 		/// \param s sticky bit (or of all but the most significant already discarded bits)
 		/// \return value converted to half-precision
@@ -1676,34 +1676,34 @@ namespace half_float
 
 		/// Postprocessing for binary exponential.
 		/// \tparam R rounding mode to use
-		/// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results
-		/// \param m mantissa as Q1.31
+		/// \param m fractional part of as Q0.31
 		/// \param exp absolute value of unbiased exponent
 		/// \param esign sign of actual exponent
 		/// \param sign sign bit of result
+		/// \param n number of BKM iterations (at most 32)
 		/// \return value converted to half-precision
 		/// \exception FE_OVERFLOW on overflows
 		/// \exception FE_UNDERFLOW on underflows
 		/// \exception FE_INEXACT if value had to be rounded or \a I is `true`
-		template<std::float_round_style R,bool I> unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0)
+		template<std::float_round_style R> unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0, unsigned int n = 32)
 		{
-			int s = 0;
 			if(esign)
 			{
-				if(m > 0x80000000)
-				{
-					m = divide64(0x80000000, m, s);
-					++exp;
-				}
-				if(exp > 25)
+				exp = -exp - (m!=0);
+				if(exp < -25)
 					return underflow<R>(sign);
-				else if(exp == 25)
-					return rounded<R,I>(sign, 1, (m&0x7FFFFFFF)!=0);
-				exp = -exp;
+				else if(exp == -25)
+					return rounded<R,false>(sign, 1, m!=0);
 			}
 			else if(exp > 15)
 				return overflow<R>(sign);
-			return fixed2half<R,31,false,false,I>(m, exp+14, sign, s);
+			if(!m)
+				return sign | (((exp+=15)>0) ? (exp<<10) : check_underflow(0x200>>-exp));
+			m = exp2(m, n);
+			int s = 0;
+			if(esign)
+				m = divide64(0x80000000, m, s);
+			return fixed2half<R,31,false,false,true>(m, exp+14, sign, s);
 		}
 
 		/// Postprocessing for binary logarithm.
@@ -1737,7 +1737,7 @@ namespace half_float
 		/// Hypotenuse square root and postprocessing.
 		/// \tparam R rounding mode to use
 		/// \param r mantissa as Q2.30
-		/// \param exp unbiased exponent
+		/// \param exp biased exponent
 		/// \return square root converted to half-precision
 		/// \exception FE_OVERFLOW on overflows
 		/// \exception FE_UNDERFLOW on underflows
@@ -2201,6 +2201,7 @@ namespace half_float
 		friend half log2(half);
 		friend half log1p(half);
 		friend half sqrt(half);
+		friend half rsqrt(half);
 		friend half cbrt(half);
 		friend half hypot(half, half);
 		friend half hypot(half, half, half);
@@ -2937,7 +2938,7 @@ namespace half_float
 	#ifdef HALF_ARITHMETIC_TYPE
 		return half(detail::binary, detail::float2half<half::round_style>(std::exp(detail::half2float<detail::internal_t>(arg.data_))));
 	#else
-		int abs = arg.data_ & 0x7FFF;
+		int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp;
 		if(!abs)
 			return half(detail::binary, 0x3C00);
 		if(abs >= 0x7C00)
@@ -2945,7 +2946,6 @@ namespace half_float
 		if(abs >= 0x4C80)
 			return half(detail::binary, (arg.data_&0x8000) ? detail::underflow<half::round_style>() : detail::overflow<half::round_style>());
 		detail::uint32 m = detail::multiply64(static_cast<detail::uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29);
-		int e = (abs>>10) + (abs<=0x3FF), exp;
 		if(e < 14)
 		{
 			exp = 0;
@@ -2956,7 +2956,7 @@ namespace half_float
 			exp = m >> (45-e);
 			m = (m<<(e-14)) & 0x7FFFFFFF;
 		}
-		return half(detail::binary, detail::exp2_post<half::round_style,true>(detail::exp2(m, 26), exp, (arg.data_&0x8000)!=0));
+		return half(detail::binary, detail::exp2_post<half::round_style>(m, exp, (arg.data_&0x8000)!=0, 0, 26));
 	#endif
 	}
 
@@ -2973,25 +2973,15 @@ namespace half_float
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
 		return half(detail::binary, detail::float2half<half::round_style>(std::exp2(detail::half2float<detail::internal_t>(arg.data_))));
 	#else
-		int abs = arg.data_ & 0x7FFF;
+		int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10);
 		if(!abs)
 			return half(detail::binary, 0x3C00);
 		if(abs >= 0x7C00)
 			return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_));
 		if(abs >= 0x4E40)
 			return half(detail::binary, (arg.data_&0x8000) ? detail::underflow<half::round_style>() : detail::overflow<half::round_style>());
-		int e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10);
-		detail::uint32 m = detail::exp2((static_cast<detail::uint32>(exp)<<(6+e))&0x7FFFFFFF, 28);
-		exp >>= 25 - e;
-		if(m == 0x80000000)
-		{
-			if(arg.data_&0x8000)
-				exp = -exp;
-			else if(exp > 15)
-				return half(detail::binary, detail::overflow<half::round_style>());
-			return half(detail::binary, detail::fixed2half<half::round_style,31,false,false,false>(m, exp+14));
-		}
-		return half(detail::binary, detail::exp2_post<half::round_style,true>(m, exp, (arg.data_&0x8000)!=0));
+		return half(detail::binary, detail::exp2_post<half::round_style>(
+			(static_cast<detail::uint32>(exp)<<(6+e))&0x7FFFFFFF, exp>>(25-e), (arg.data_&0x8000)!=0, 0, 28));
 	#endif
 	}
 
@@ -3009,7 +2999,7 @@ namespace half_float
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
 		return half(detail::binary, detail::float2half<half::round_style>(std::expm1(detail::half2float<detail::internal_t>(arg.data_))));
 	#else
-		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000, e = (abs>>10) + (abs<=0x3FF), exp;
 		if(!abs)
 			return arg;
 		if(abs >= 0x7C00)
@@ -3017,7 +3007,6 @@ namespace half_float
 		if(abs >= 0x4A00)
 			return half(detail::binary, (arg.data_&0x8000) ? detail::rounded<half::round_style,true>(0xBBFF, 1, 1) : detail::overflow<half::round_style>());
 		detail::uint32 m = detail::multiply64(static_cast<detail::uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29);
-		int e = (abs>>10) + (abs<=0x3FF), exp;
 		if(e < 14)
 		{
 			exp = 0;
@@ -3213,7 +3202,7 @@ namespace half_float
 	/// \param arg function argument
 	/// \return square root of \a arg
 	/// \exception FE_INVALID for signaling NaN and negative arguments
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	/// \exception FE_INEXACT according to rounding
 	inline half sqrt(half arg)
 	{
 	#ifdef HALF_ARITHMETIC_TYPE
@@ -3228,6 +3217,42 @@ namespace half_float
 	#endif
 	}
 
+	/// Inverse square root.
+	/// This function is exact to rounding for all rounding modes and thus generally more accurate than directly computing 
+	/// 1 / sqrt(\a arg) in half-precision, in addition to also being faster.
+	/// \param arg function argument
+	/// \return reciprocal of square root of \a arg
+	/// \exception FE_INVALID for signaling NaN and negative arguments
+	/// \exception FE_INEXACT according to rounding
+	inline half rsqrt(half arg)
+	{
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(detail::internal_t(1)/std::sqrt(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, bias = 0x4000;
+		if(!abs || arg.data_ >= 0x7C00)
+			return half(detail::binary,	(abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ?
+										detail::invalid() : !abs ? detail::pole(arg.data_&0x8000) : 0);
+		for(; abs<0x400; abs<<=1,bias-=0x400) ;
+		unsigned int frac = (abs+=bias) & 0x7FF;
+		if(frac == 0x400)
+			return half(detail::binary, 0x7A00-(abs>>1));
+		if((half::round_style == std::round_to_nearest && (frac == 0x3FE || frac == 0x76C)) ||
+		   (half::round_style != std::round_to_nearest && (frac == 0x15A || frac == 0x3FC || frac == 0x401 || frac == 0x402 || frac == 0x67B)))
+			return pow(arg, half(detail::binary, 0xB800));
+		detail::uint32 f = 0x17376 - abs, mx = (abs&0x3FF) | 0x400, my = ((f>>1)&0x3FF) | 0x400, mz = my * my;
+		int expy = (f>>11) - 31, expx = 32 - (abs>>10), i = mz >> 21;
+		for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ;
+		i = (my*=mz>>10) >> 31;
+		expy += i;
+		my = (my>>(20+i)) + 1;
+		i = (mz=my*my) >> 21;
+		for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ;
+		i = (my*=(mz>>10)+1) >> 31;
+		return half(detail::binary, detail::fixed2half<half::round_style,30,false,false,true>(my>>i, expy+i+14));
+	#endif
+	}
+
 	/// Cubic root.
 	/// This function is exact to rounding for all rounding modes.
 	///
@@ -3235,7 +3260,7 @@ namespace half_float
 	/// \param arg function argument
 	/// \return cubic root of \a arg
 	/// \exception FE_INVALID for signaling NaN
-	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	/// \exception FE_INEXACT according to rounding
 	inline half cbrt(half arg)
 	{
 	#if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH
@@ -3419,12 +3444,13 @@ namespace half_float
 			return half(detail::binary, detail::invalid());
 		if(x.data_ == 0xBC00)
 			return half(detail::binary, sign|0x3C00);
-		if(y.data_ == 0x3800)
-			return sqrt(x);
-		if(y.data_ == 0x3C00)
-			return half(detail::binary, detail::check_underflow(x.data_));
-		if(y.data_ == 0x4000)
-			return x * x;
+		switch(y.data_)
+		{
+			case 0x3800: return sqrt(x);
+			case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_));
+			case 0x4000: return x * x;
+			case 0xBC00: return half(detail::binary, 0x3C00) / x;
+		}
 		for(; absx<0x400; absx<<=1,--exp) ;
 		detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m = 
 			(((ilog<<27)+((detail::log2(static_cast<detail::uint32>((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign;
@@ -3444,7 +3470,7 @@ namespace half_float
 			f = (m<<exp) & 0x7FFFFFFF;
 			exp = m >> (31-exp);
 		}
-		return half(detail::binary, detail::exp2_post<half::round_style,false>(detail::exp2(f), exp, ((msign&1)^(y.data_>>15))!=0, sign));
+		return half(detail::binary, detail::exp2_post<half::round_style>(f, exp, ((msign&1)^(y.data_>>15))!=0, sign));
 	#endif
 	}