diff --git a/src/typed-geometry/detail/intrinsics.hh b/src/typed-geometry/detail/intrinsics.hh new file mode 100644 index 0000000..b722afb --- /dev/null +++ b/src/typed-geometry/detail/intrinsics.hh @@ -0,0 +1,61 @@ +#pragma once + +#include + +#ifdef _MSC_VER +#include +#elif __has_include("x86intrin.h") +#include +#include +#endif + + +namespace tg::detail +{ +CC_FORCE_INLINE char add_with_carry(char carry, unsigned long long a, unsigned long long b, unsigned long long* out) +{ +#if defined(__x86_64__) || defined(_M_X64) + return _addcarry_u64(carry, a, b, out); +#else +#pragma message("[typed-geometry] Using fallback for add_with_carry(...). This may be slow!") + unsigned long long tmp = b + carry; + a += tmp; + *out = a; + return (tmp < carry) + (a < tmp); +#endif +} + +CC_FORCE_INLINE unsigned long long mul128(unsigned long long a, unsigned long long b, unsigned long long* high) +{ +#if defined(_MSC_VER) + return _umul128(a, b, high); +#elif __has_include("x86intrin.h") + return _mulx_u64(a, b, high); +#elif defined(__SIZEOF_INT128__) + __uint128_t res = __uint128_t(a) * __uint128_t(b); + *high = res >> 64; + return static_cast(res); +#else +#pragma message("[typed-geometry] Using fallback for mul128(...). This may be slow!") + using u64 = unsigned long long; + using u32 = unsigned int; + static_assert(sizeof(u64) == 8, "u64 must be exactly 64 bits"); + static_assert(sizeof(u32) == 4, "u32 must be exactly 32 bits"); + + u64 a_lo = u64(u32(a)); + u64 a_hi = a >> 32; + u64 b_lo = u64(u32(b)); + u64 b_hi = b >> 32; + + u64 p0 = a_lo * b_lo; + u64 p1 = a_lo * b_hi; + u64 p2 = a_hi * b_lo; + u64 p3 = a_hi * b_hi; + + u32 cy = u32(((p0 >> 32) + u32(p1) + u32(p2)) >> 32); + + *high = p3 + (p1 >> 32) + (p2 >> 32) + cy; + return p0 + (p1 << 32) + (p2 << 32); +#endif +} +} diff --git a/src/typed-geometry/feature/fixed_int.hh b/src/typed-geometry/feature/fixed_int.hh index 15f8af3..ced83c4 100644 --- a/src/typed-geometry/feature/fixed_int.hh +++ b/src/typed-geometry/feature/fixed_int.hh @@ -2,12 +2,6 @@ #include -#ifdef CC_COMPILER_MSVC -#define TG_MUL_U128 _umul128 -#else -#define TG_MUL_U128 _mulx_u64 -#endif - #include #include #include diff --git a/src/typed-geometry/functions/fixed_int/fixed_int_gen.hh b/src/typed-geometry/functions/fixed_int/fixed_int_gen.hh index 46a529c..6ade999 100644 --- a/src/typed-geometry/functions/fixed_int/fixed_int_gen.hh +++ b/src/typed-geometry/functions/fixed_int/fixed_int_gen.hh @@ -2,13 +2,7 @@ // This file was generated by generate_fixed_uint_multiplications.cc in TGSamples. -#ifdef _MSC_VER -#include -#else -#include -#include -#endif - +#include #include namespace tg::detail @@ -22,7 +16,6 @@ using intrinsic_i128 = __int128; #endif using u64_word = fixed_int<1>::word_t; - template <> inline i128 imul(i128 lhs, i128 rhs) { @@ -31,11 +24,11 @@ inline i128 imul(i128 lhs, i128 rhs) u64_word l01 = 0; u64_word l10 = 0; u64_word h00 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); l01 = u64_word(lhs.d[0]) * u64_word(rhs.d[1]); l10 = u64_word(lhs.d[1]) * u64_word(rhs.d[0]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); res.d[1] = c + h00 + l01 + l10; return res; } @@ -106,14 +99,14 @@ inline i192 imul(i128 lhs, i64 rhs) u64_word l10 = 0; u64_word h00 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs), &h00); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs), &h10); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs), &h00); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs), &h10); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h10; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -147,15 +140,15 @@ inline i192 imul(i192 lhs, i64 rhs) u64_word l20 = 0; u64_word h00 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs), &h00); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs), &h10); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs), &h00); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs), &h10); l20 = u64_word(lhs.d[2]) * u64_word(rhs); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h10 + l20; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -186,14 +179,14 @@ inline i192 imul(i64 lhs, i128 rhs) u64_word l01 = 0; u64_word h00 = 0; u64_word h01 = 0; - l00 = TG_MUL_U128(u64_word(lhs), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs), u64_word(rhs.d[1]), &h01); + l00 = detail::mul128(u64_word(lhs), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs), u64_word(rhs.d[1]), &h01); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); res.d[2] = c + h01; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -229,17 +222,17 @@ inline i192 imul(i128 lhs, i128 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); l11 = u64_word(lhs.d[1]) * u64_word(rhs.d[1]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h01 + h10 + l11; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -278,18 +271,18 @@ inline i192 imul(i192 lhs, i128 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); l11 = u64_word(lhs.d[1]) * u64_word(rhs.d[1]); l20 = u64_word(lhs.d[2]) * u64_word(rhs.d[0]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h01 + h10 + l11 + l20; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -323,15 +316,15 @@ inline i192 imul(i64 lhs, i192 rhs) u64_word l02 = 0; u64_word h00 = 0; u64_word h01 = 0; - l00 = TG_MUL_U128(u64_word(lhs), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs), u64_word(rhs.d[1]), &h01); + l00 = detail::mul128(u64_word(lhs), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs), u64_word(rhs.d[1]), &h01); l02 = u64_word(lhs) * u64_word(rhs.d[2]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); res.d[2] = c + h01 + l02; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -370,18 +363,18 @@ inline i192 imul(i128 lhs, i192 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); l02 = u64_word(lhs.d[0]) * u64_word(rhs.d[2]); l11 = u64_word(lhs.d[1]) * u64_word(rhs.d[1]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h01 + l02 + h10 + l11; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -406,19 +399,19 @@ inline i192 imul(i192 lhs, i192 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); l02 = u64_word(lhs.d[0]) * u64_word(rhs.d[2]); l11 = u64_word(lhs.d[1]) * u64_word(rhs.d[1]); l20 = u64_word(lhs.d[2]) * u64_word(rhs.d[0]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h01 + l02 + h10 + l11 + l20; return res; } @@ -446,19 +439,19 @@ inline i256 imul(i192 lhs, i64 rhs) u64_word h00 = 0; u64_word h10 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs), &h00); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs), &h10); - l20 = TG_MUL_U128(u64_word(lhs.d[2]), u64_word(rhs), &h20); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs), &h00); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs), &h10); + l20 = detail::mul128(u64_word(lhs.d[2]), u64_word(rhs), &h20); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h20; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -498,20 +491,20 @@ inline i256 imul(i256 lhs, i64 rhs) u64_word h00 = 0; u64_word h10 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs), &h00); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs), &h10); - l20 = TG_MUL_U128(u64_word(lhs.d[2]), u64_word(rhs), &h20); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs), &h00); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs), &h10); + l20 = detail::mul128(u64_word(lhs.d[2]), u64_word(rhs), &h20); l30 = u64_word(lhs.d[3]) * u64_word(rhs); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h20 + l30; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -550,22 +543,22 @@ inline i256 imul(i128 lhs, i128 rhs) u64_word h01 = 0; u64_word h10 = 0; u64_word h11 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); - l11 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l11 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); res.d[3] = c + h11; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -609,25 +602,25 @@ inline i256 imul(i192 lhs, i128 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); - l11 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); - l20 = TG_MUL_U128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l11 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); + l20 = detail::mul128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); l21 = u64_word(lhs.d[2]) * u64_word(rhs.d[1]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h11 + h20 + l21; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -674,26 +667,26 @@ inline i256 imul(i256 lhs, i128 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); - l11 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); - l20 = TG_MUL_U128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l11 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); + l20 = detail::mul128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); l21 = u64_word(lhs.d[2]) * u64_word(rhs.d[1]); l30 = u64_word(lhs.d[3]) * u64_word(rhs.d[0]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h11 + h20 + l21 + l30; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -730,19 +723,19 @@ inline i256 imul(i64 lhs, i192 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h02 = 0; - l00 = TG_MUL_U128(u64_word(lhs), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs), u64_word(rhs.d[1]), &h01); - l02 = TG_MUL_U128(u64_word(lhs), u64_word(rhs.d[2]), &h02); + l00 = detail::mul128(u64_word(lhs), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs), u64_word(rhs.d[1]), &h01); + l02 = detail::mul128(u64_word(lhs), u64_word(rhs.d[2]), &h02); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); res.d[3] = c + h02; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -786,25 +779,25 @@ inline i256 imul(i128 lhs, i192 rhs) u64_word h02 = 0; u64_word h10 = 0; u64_word h11 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l02 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); - l11 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l02 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l11 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); l12 = u64_word(lhs.d[1]) * u64_word(rhs.d[2]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); res.d[3] = c + h02 + h11 + l12; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -853,28 +846,28 @@ inline i256 imul(i192 lhs, i192 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l02 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); - l11 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); - l20 = TG_MUL_U128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l02 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l11 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); + l20 = detail::mul128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); l12 = u64_word(lhs.d[1]) * u64_word(rhs.d[2]); l21 = u64_word(lhs.d[2]) * u64_word(rhs.d[1]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h02 + h11 + l12 + h20 + l21; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -926,29 +919,29 @@ inline i256 imul(i256 lhs, i192 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l02 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); - l11 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); - l20 = TG_MUL_U128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l02 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l11 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); + l20 = detail::mul128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); l12 = u64_word(lhs.d[1]) * u64_word(rhs.d[2]); l21 = u64_word(lhs.d[2]) * u64_word(rhs.d[1]); l30 = u64_word(lhs.d[3]) * u64_word(rhs.d[0]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h02 + h11 + l12 + h20 + l21 + l30; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -988,20 +981,20 @@ inline i256 imul(i64 lhs, i256 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h02 = 0; - l00 = TG_MUL_U128(u64_word(lhs), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs), u64_word(rhs.d[1]), &h01); - l02 = TG_MUL_U128(u64_word(lhs), u64_word(rhs.d[2]), &h02); + l00 = detail::mul128(u64_word(lhs), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs), u64_word(rhs.d[1]), &h01); + l02 = detail::mul128(u64_word(lhs), u64_word(rhs.d[2]), &h02); l03 = u64_word(lhs) * u64_word(rhs.d[3]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); res.d[3] = c + h02 + l03; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -1048,26 +1041,26 @@ inline i256 imul(i128 lhs, i256 rhs) u64_word h02 = 0; u64_word h10 = 0; u64_word h11 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l02 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); - l11 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l02 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l11 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); l03 = u64_word(lhs.d[0]) * u64_word(rhs.d[3]); l12 = u64_word(lhs.d[1]) * u64_word(rhs.d[2]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); res.d[3] = c + h02 + l03 + h11 + l12; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -1119,29 +1112,29 @@ inline i256 imul(i192 lhs, i256 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l02 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); - l11 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); - l20 = TG_MUL_U128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l02 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l11 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); + l20 = detail::mul128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); l03 = u64_word(lhs.d[0]) * u64_word(rhs.d[3]); l12 = u64_word(lhs.d[1]) * u64_word(rhs.d[2]); l21 = u64_word(lhs.d[2]) * u64_word(rhs.d[1]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h02 + l03 + h11 + l12 + h20 + l21; { // conditional inversion res.d[0] = ((u64_word(res.d[0]) ^ s_res) - s_res); @@ -1175,30 +1168,30 @@ inline i256 imul(i256 lhs, i256 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); - l01 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); - l02 = TG_MUL_U128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); - l10 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); - l11 = TG_MUL_U128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); - l20 = TG_MUL_U128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); + l00 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[0]), &h00); + l01 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[1]), &h01); + l02 = detail::mul128(u64_word(lhs.d[0]), u64_word(rhs.d[2]), &h02); + l10 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[0]), &h10); + l11 = detail::mul128(u64_word(lhs.d[1]), u64_word(rhs.d[1]), &h11); + l20 = detail::mul128(u64_word(lhs.d[2]), u64_word(rhs.d[0]), &h20); l03 = u64_word(lhs.d[0]) * u64_word(rhs.d[3]); l12 = u64_word(lhs.d[1]) * u64_word(rhs.d[2]); l21 = u64_word(lhs.d[2]) * u64_word(rhs.d[1]); l30 = u64_word(lhs.d[3]) * u64_word(rhs.d[0]); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h02 + l03 + h11 + l12 + h20 + l21 + l30; return res; } diff --git a/src/typed-geometry/functions/fixed_int/fixed_uint_gen.hh b/src/typed-geometry/functions/fixed_int/fixed_uint_gen.hh index fad16c9..786f44c 100644 --- a/src/typed-geometry/functions/fixed_int/fixed_uint_gen.hh +++ b/src/typed-geometry/functions/fixed_int/fixed_uint_gen.hh @@ -2,27 +2,21 @@ // This file was generated by generate_fixed_uint_multiplications.cc in TGSamples. -#ifdef _MSC_VER -#include -#else -#include -#endif - +#include #include namespace tg::detail { using u64_word = fixed_uint<1>::word_t; - template <> inline u128 mul(u64 lhs, u64 rhs) { u128 res; u64_word l00 = 0; u64_word h00 = 0; - l00 = TG_MUL_U128(lhs, rhs, &h00); + l00 = detail::mul128(lhs, rhs, &h00); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); res.d[1] = c + h00; return res; } @@ -34,10 +28,10 @@ inline u128 mul(u128 lhs, u64 rhs) u64_word l00 = 0; u64_word l10 = 0; u64_word h00 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs, &h00); + l00 = detail::mul128(lhs.d[0], rhs, &h00); l10 = lhs.d[1] * rhs; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); res.d[1] = c + h00 + l10; return res; } @@ -49,10 +43,10 @@ inline u128 mul(u64 lhs, u128 rhs) u64_word l00 = 0; u64_word l01 = 0; u64_word h00 = 0; - l00 = TG_MUL_U128(lhs, rhs.d[0], &h00); + l00 = detail::mul128(lhs, rhs.d[0], &h00); l01 = lhs * rhs.d[1]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); res.d[1] = c + h00 + l01; return res; } @@ -65,11 +59,11 @@ inline u128 mul(u128 lhs, u128 rhs) u64_word l01 = 0; u64_word l10 = 0; u64_word h00 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); l01 = lhs.d[0] * rhs.d[1]; l10 = lhs.d[1] * rhs.d[0]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); res.d[1] = c + h00 + l01 + l10; return res; } @@ -82,14 +76,14 @@ inline u192 mul(u128 lhs, u64 rhs) u64_word l10 = 0; u64_word h00 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs, &h00); - l10 = TG_MUL_U128(lhs.d[1], rhs, &h10); + l00 = detail::mul128(lhs.d[0], rhs, &h00); + l10 = detail::mul128(lhs.d[1], rhs, &h10); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h10; return res; } @@ -103,15 +97,15 @@ inline u192 mul(u192 lhs, u64 rhs) u64_word l20 = 0; u64_word h00 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs, &h00); - l10 = TG_MUL_U128(lhs.d[1], rhs, &h10); + l00 = detail::mul128(lhs.d[0], rhs, &h00); + l10 = detail::mul128(lhs.d[1], rhs, &h10); l20 = lhs.d[2] * rhs; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h10 + l20; return res; } @@ -124,14 +118,14 @@ inline u192 mul(u64 lhs, u128 rhs) u64_word l01 = 0; u64_word h00 = 0; u64_word h01 = 0; - l00 = TG_MUL_U128(lhs, rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs, rhs.d[1], &h01); + l00 = detail::mul128(lhs, rhs.d[0], &h00); + l01 = detail::mul128(lhs, rhs.d[1], &h01); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); res.d[2] = c + h01; return res; } @@ -147,17 +141,17 @@ inline u192 mul(u128 lhs, u128 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); l11 = lhs.d[1] * rhs.d[1]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h01 + h10 + l11; return res; } @@ -174,18 +168,18 @@ inline u192 mul(u192 lhs, u128 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); l11 = lhs.d[1] * rhs.d[1]; l20 = lhs.d[2] * rhs.d[0]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h01 + h10 + l11 + l20; return res; } @@ -199,15 +193,15 @@ inline u192 mul(u64 lhs, u192 rhs) u64_word l02 = 0; u64_word h00 = 0; u64_word h01 = 0; - l00 = TG_MUL_U128(lhs, rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs, rhs.d[1], &h01); + l00 = detail::mul128(lhs, rhs.d[0], &h00); + l01 = detail::mul128(lhs, rhs.d[1], &h01); l02 = lhs * rhs.d[2]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); res.d[2] = c + h01 + l02; return res; } @@ -224,18 +218,18 @@ inline u192 mul(u128 lhs, u192 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); l02 = lhs.d[0] * rhs.d[2]; l11 = lhs.d[1] * rhs.d[1]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h01 + l02 + h10 + l11; return res; } @@ -253,19 +247,19 @@ inline u192 mul(u192 lhs, u192 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h10 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); l02 = lhs.d[0] * rhs.d[2]; l11 = lhs.d[1] * rhs.d[1]; l20 = lhs.d[2] * rhs.d[0]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); res.d[2] = c + h01 + l02 + h10 + l11 + l20; return res; } @@ -280,19 +274,19 @@ inline u256 mul(u192 lhs, u64 rhs) u64_word h00 = 0; u64_word h10 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs, &h00); - l10 = TG_MUL_U128(lhs.d[1], rhs, &h10); - l20 = TG_MUL_U128(lhs.d[2], rhs, &h20); + l00 = detail::mul128(lhs.d[0], rhs, &h00); + l10 = detail::mul128(lhs.d[1], rhs, &h10); + l20 = detail::mul128(lhs.d[2], rhs, &h20); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h20; return res; } @@ -308,20 +302,20 @@ inline u256 mul(u256 lhs, u64 rhs) u64_word h00 = 0; u64_word h10 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs, &h00); - l10 = TG_MUL_U128(lhs.d[1], rhs, &h10); - l20 = TG_MUL_U128(lhs.d[2], rhs, &h20); + l00 = detail::mul128(lhs.d[0], rhs, &h00); + l10 = detail::mul128(lhs.d[1], rhs, &h10); + l20 = detail::mul128(lhs.d[2], rhs, &h20); l30 = lhs.d[3] * rhs; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h20 + l30; return res; } @@ -338,22 +332,22 @@ inline u256 mul(u128 lhs, u128 rhs) u64_word h01 = 0; u64_word h10 = 0; u64_word h11 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); - l11 = TG_MUL_U128(lhs.d[1], rhs.d[1], &h11); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); + l11 = detail::mul128(lhs.d[1], rhs.d[1], &h11); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); res.d[3] = c + h11; return res; } @@ -373,25 +367,25 @@ inline u256 mul(u192 lhs, u128 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); - l11 = TG_MUL_U128(lhs.d[1], rhs.d[1], &h11); - l20 = TG_MUL_U128(lhs.d[2], rhs.d[0], &h20); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); + l11 = detail::mul128(lhs.d[1], rhs.d[1], &h11); + l20 = detail::mul128(lhs.d[2], rhs.d[0], &h20); l21 = lhs.d[2] * rhs.d[1]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); - c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); - c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); + c = 0; + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); + c = 0; + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h11 + h20 + l21; return res; } @@ -412,26 +406,26 @@ inline u256 mul(u256 lhs, u128 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); - l11 = TG_MUL_U128(lhs.d[1], rhs.d[1], &h11); - l20 = TG_MUL_U128(lhs.d[2], rhs.d[0], &h20); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); + l11 = detail::mul128(lhs.d[1], rhs.d[1], &h11); + l20 = detail::mul128(lhs.d[2], rhs.d[0], &h20); l21 = lhs.d[2] * rhs.d[1]; l30 = lhs.d[3] * rhs.d[0]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); - c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); - c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); + c = 0; + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); + c = 0; + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h11 + h20 + l21 + l30; return res; } @@ -446,19 +440,19 @@ inline u256 mul(u64 lhs, u192 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h02 = 0; - l00 = TG_MUL_U128(lhs, rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs, rhs.d[1], &h01); - l02 = TG_MUL_U128(lhs, rhs.d[2], &h02); + l00 = detail::mul128(lhs, rhs.d[0], &h00); + l01 = detail::mul128(lhs, rhs.d[1], &h01); + l02 = detail::mul128(lhs, rhs.d[2], &h02); unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); res.d[3] = c + h02; return res; } @@ -478,25 +472,25 @@ inline u256 mul(u128 lhs, u192 rhs) u64_word h02 = 0; u64_word h10 = 0; u64_word h11 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l02 = TG_MUL_U128(lhs.d[0], rhs.d[2], &h02); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); - l11 = TG_MUL_U128(lhs.d[1], rhs.d[1], &h11); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l02 = detail::mul128(lhs.d[0], rhs.d[2], &h02); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); + l11 = detail::mul128(lhs.d[1], rhs.d[1], &h11); l12 = lhs.d[1] * rhs.d[2]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); - c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); - c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); + c = 0; + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); + c = 0; + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); res.d[3] = c + h02 + h11 + l12; return res; } @@ -519,28 +513,28 @@ inline u256 mul(u192 lhs, u192 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l02 = TG_MUL_U128(lhs.d[0], rhs.d[2], &h02); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); - l11 = TG_MUL_U128(lhs.d[1], rhs.d[1], &h11); - l20 = TG_MUL_U128(lhs.d[2], rhs.d[0], &h20); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l02 = detail::mul128(lhs.d[0], rhs.d[2], &h02); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); + l11 = detail::mul128(lhs.d[1], rhs.d[1], &h11); + l20 = detail::mul128(lhs.d[2], rhs.d[0], &h20); l12 = lhs.d[1] * rhs.d[2]; l21 = lhs.d[2] * rhs.d[1]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); - c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); - c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); + c = 0; + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); + c = 0; + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h02 + h11 + l12 + h20 + l21; return res; } @@ -564,29 +558,29 @@ inline u256 mul(u256 lhs, u192 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l02 = TG_MUL_U128(lhs.d[0], rhs.d[2], &h02); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); - l11 = TG_MUL_U128(lhs.d[1], rhs.d[1], &h11); - l20 = TG_MUL_U128(lhs.d[2], rhs.d[0], &h20); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l02 = detail::mul128(lhs.d[0], rhs.d[2], &h02); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); + l11 = detail::mul128(lhs.d[1], rhs.d[1], &h11); + l20 = detail::mul128(lhs.d[2], rhs.d[0], &h20); l12 = lhs.d[1] * rhs.d[2]; l21 = lhs.d[2] * rhs.d[1]; l30 = lhs.d[3] * rhs.d[0]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); - c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); - c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); + c = 0; + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); + c = 0; + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h02 + h11 + l12 + h20 + l21 + l30; return res; } @@ -602,20 +596,20 @@ inline u256 mul(u64 lhs, u256 rhs) u64_word h00 = 0; u64_word h01 = 0; u64_word h02 = 0; - l00 = TG_MUL_U128(lhs, rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs, rhs.d[1], &h01); - l02 = TG_MUL_U128(lhs, rhs.d[2], &h02); + l00 = detail::mul128(lhs, rhs.d[0], &h00); + l01 = detail::mul128(lhs, rhs.d[1], &h01); + l02 = detail::mul128(lhs, rhs.d[2], &h02); l03 = lhs * rhs.d[3]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); res.d[3] = c + h02 + l03; return res; } @@ -636,26 +630,26 @@ inline u256 mul(u128 lhs, u256 rhs) u64_word h02 = 0; u64_word h10 = 0; u64_word h11 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l02 = TG_MUL_U128(lhs.d[0], rhs.d[2], &h02); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); - l11 = TG_MUL_U128(lhs.d[1], rhs.d[1], &h11); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l02 = detail::mul128(lhs.d[0], rhs.d[2], &h02); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); + l11 = detail::mul128(lhs.d[1], rhs.d[1], &h11); l03 = lhs.d[0] * rhs.d[3]; l12 = lhs.d[1] * rhs.d[2]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); - c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); - c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); + c = 0; + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); + c = 0; + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); res.d[3] = c + h02 + l03 + h11 + l12; return res; } @@ -679,29 +673,29 @@ inline u256 mul(u192 lhs, u256 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l02 = TG_MUL_U128(lhs.d[0], rhs.d[2], &h02); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); - l11 = TG_MUL_U128(lhs.d[1], rhs.d[1], &h11); - l20 = TG_MUL_U128(lhs.d[2], rhs.d[0], &h20); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l02 = detail::mul128(lhs.d[0], rhs.d[2], &h02); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); + l11 = detail::mul128(lhs.d[1], rhs.d[1], &h11); + l20 = detail::mul128(lhs.d[2], rhs.d[0], &h20); l03 = lhs.d[0] * rhs.d[3]; l12 = lhs.d[1] * rhs.d[2]; l21 = lhs.d[2] * rhs.d[1]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); - c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); - c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); + c = 0; + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); + c = 0; + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h02 + l03 + h11 + l12 + h20 + l21; return res; } @@ -726,30 +720,30 @@ inline u256 mul(u256 lhs, u256 rhs) u64_word h10 = 0; u64_word h11 = 0; u64_word h20 = 0; - l00 = TG_MUL_U128(lhs.d[0], rhs.d[0], &h00); - l01 = TG_MUL_U128(lhs.d[0], rhs.d[1], &h01); - l02 = TG_MUL_U128(lhs.d[0], rhs.d[2], &h02); - l10 = TG_MUL_U128(lhs.d[1], rhs.d[0], &h10); - l11 = TG_MUL_U128(lhs.d[1], rhs.d[1], &h11); - l20 = TG_MUL_U128(lhs.d[2], rhs.d[0], &h20); + l00 = detail::mul128(lhs.d[0], rhs.d[0], &h00); + l01 = detail::mul128(lhs.d[0], rhs.d[1], &h01); + l02 = detail::mul128(lhs.d[0], rhs.d[2], &h02); + l10 = detail::mul128(lhs.d[1], rhs.d[0], &h10); + l11 = detail::mul128(lhs.d[1], rhs.d[1], &h11); + l20 = detail::mul128(lhs.d[2], rhs.d[0], &h20); l03 = lhs.d[0] * rhs.d[3]; l12 = lhs.d[1] * rhs.d[2]; l21 = lhs.d[2] * rhs.d[1]; l30 = lhs.d[3] * rhs.d[0]; unsigned char c = 0; - c += _addcarry_u64(0, res.d[0], l00, &res.d[0]); - c += _addcarry_u64(0, res.d[1], c, &res.d[1]); - c = 0; - c += _addcarry_u64(0, res.d[1], h00, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l01, &res.d[1]); - c += _addcarry_u64(0, res.d[1], l10, &res.d[1]); - c += _addcarry_u64(0, res.d[2], c, &res.d[2]); - c = 0; - c += _addcarry_u64(0, res.d[2], h01, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l02, &res.d[2]); - c += _addcarry_u64(0, res.d[2], h10, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l11, &res.d[2]); - c += _addcarry_u64(0, res.d[2], l20, &res.d[2]); + c += detail::add_with_carry(0, res.d[0], l00, &res.d[0]); + c += detail::add_with_carry(0, res.d[1], c, &res.d[1]); + c = 0; + c += detail::add_with_carry(0, res.d[1], h00, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l01, &res.d[1]); + c += detail::add_with_carry(0, res.d[1], l10, &res.d[1]); + c += detail::add_with_carry(0, res.d[2], c, &res.d[2]); + c = 0; + c += detail::add_with_carry(0, res.d[2], h01, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l02, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], h10, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l11, &res.d[2]); + c += detail::add_with_carry(0, res.d[2], l20, &res.d[2]); res.d[3] = c + h02 + l03 + h11 + l12 + h20 + l21 + l30; return res; }