Merge improvements to isfinite

Thomas Grützmacher · web-flow · commit e8de52780858 · 2020-03-02T11:58:31.000+01:00
Improves and simplifies the implementation of the hand-written `isfinite` function. Now, it produces the same PTX code as the actual function (at least in a small example). Related PR: #465
diff --git a/common/base/math.hpp.inc b/common/base/math.hpp.inc
@@ -30,6 +30,16 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+// We need this struct, because otherwise we would call a __host__ function in a
+// __device__ function (even though it is constexpr)
+template <typename T>
+struct device_numeric_limits {
+    static constexpr auto inf = std::numeric_limits<T>::infinity();
+    static constexpr auto max = std::numeric_limits<T>::max();
+    static constexpr auto min = std::numeric_limits<T>::min();
+};
+
+
 namespace detail {
 
 
@@ -50,6 +60,38 @@ struct truncate_type_impl<thrust::complex<T>> {
 };
 
 
+/**
+ * Checks if a given value is finite, meaning it is neither +/- infinity
+ * nor NaN.
+ *
+ * @internal  Should only be used if the provided one (from CUDA or HIP) can
+ *            not be used.
+ *            Designed to mirror the math function of CUDA (PTX code was
+ *            identical in the testcase).
+ *
+ * @tparam T  type of the value to check
+ *
+ * @param value  value to check
+ *
+ * returns `true` if the given value is finite, meaning it is neither
+ *         +/- infinity nor NaN.
+ */
+template <typename T>
+GKO_INLINE __device__ xstd::enable_if_t<!is_complex_s<T>::value, bool>
+custom_isfinite(T value)
+{
+    constexpr T infinity{device_numeric_limits<T>::inf};
+    return abs(value) < infinity;
+}
+
+template <typename T>
+GKO_INLINE __device__ xstd::enable_if_t<is_complex_s<T>::value, bool>
+custom_isfinite(T value)
+{
+    return custom_isfinite(value.real()) && custom_isfinite(value.imag());
+}
+
+
 }  // namespace detail
 
 
@@ -66,103 +108,19 @@ struct truncate_type_impl<thrust::complex<T>> {
       (defined(__clang__) || defined(__ICC) || defined(__ICL))))
 
 
-namespace detail {
-
-
-/**
- * This structure can be used to get the exponent mask of a given floating
- * point type. Uses specialization to implement different types.
- */
-template <typename T>
-struct mask_creator {};
-
-template <>
-struct mask_creator<float> {
-    using int_type = int32;
-    static constexpr int_type number_exponent_bits = 8;
-    static constexpr int_type number_significand_bits = 23;
-    // integer representation of a floating point number, where all exponent
-    // bits are set
-    static constexpr int_type exponent_mask =
-        ((int_type{1} << number_exponent_bits) - 1) << number_significand_bits;
-    static __device__ int_type reinterpret_int(const float &value)
-    {
-        return __float_as_int(value);
-    }
-};
-
-template <>
-struct mask_creator<double> {
-    using int_type = int64;
-    static constexpr int_type number_exponent_bits = 11;
-    static constexpr int_type number_significand_bits = 52;
-    // integer representation of a floating point number, where all exponent
-    // bits are set
-    static constexpr int_type exponent_mask =
-        ((int_type{1} << number_exponent_bits) - 1) << number_significand_bits;
-    static __device__ int_type reinterpret_int(const double &value)
-    {
-        return __double_as_longlong(value);
-    }
-};
-
-
-}  // namespace detail
-
-
-/**
- * Checks if a given value is finite, meaning it is neither +/- infinity
- * nor NaN.
- *
- * @internal  It checks if all exponent bits are set. If all are set, the
- *            number either represents NaN or +/- infinity, meaning it is a
- *            non-finite number.
- *
- * @param value  value to check
- *
- * returns `true` if the given value is finite, meaning it is neither
- *         +/- infinity nor NaN.
- */
-#define GKO_DEFINE_ISFINITE_FOR_TYPE(_type)                               \
-    GKO_INLINE __device__ bool isfinite(const _type &value)               \
-    {                                                                     \
-        constexpr auto mask = detail::mask_creator<_type>::exponent_mask; \
-        const auto re_int =                                               \
-            detail::mask_creator<_type>::reinterpret_int(value);          \
-        return (re_int & mask) != mask;                                   \
+#define GKO_DEFINE_ISFINITE_FOR_TYPE(_type)                 \
+    GKO_INLINE __device__ bool isfinite(const _type &value) \
+    {                                                       \
+        return detail::custom_isfinite(value);              \
     }
 
 GKO_DEFINE_ISFINITE_FOR_TYPE(float)
 GKO_DEFINE_ISFINITE_FOR_TYPE(double)
+GKO_DEFINE_ISFINITE_FOR_TYPE(thrust::complex<float>)
+GKO_DEFINE_ISFINITE_FOR_TYPE(thrust::complex<double>)
 #undef GKO_DEFINE_ISFINITE_FOR_TYPE
 
 
-/**
- * Checks if all components of a complex value are finite, meaning they are
- * neither +/- infinity nor NaN.
- *
- * @internal required for the clang compiler. This function will be used rather
- *           than the `isfinite` function in the public `math.hpp` because
- *           there is no template parameter, so it is prefered during lookup.
- *
- * @tparam T  complex type of the value to check
- *
- * @param value  complex value to check
- *
- * returns `true` if both components of the given value are finite, meaning
- *         they are neither +/- infinity nor NaN.
- */
-#define GKO_DEFINE_ISFINITE_FOR_COMPLEX_TYPE(_type)              \
-    GKO_INLINE __device__ bool isfinite(const _type &value)      \
-    {                                                            \
-        return isfinite(value.real()) && isfinite(value.imag()); \
-    }
-
-GKO_DEFINE_ISFINITE_FOR_COMPLEX_TYPE(thrust::complex<float>)
-GKO_DEFINE_ISFINITE_FOR_COMPLEX_TYPE(thrust::complex<double>)
-#undef GKO_DEFINE_ISFINITE_FOR_COMPLEX_TYPE
-
-
 // For all other compiler in combination with CUDA or HIP, just use the provided
 // `isfinite` function
 #elif defined(__CUDA_ARCH__) || __HIP_DEVICE_COMPILE__
@@ -173,13 +131,3 @@ using ::isfinite;
 
 
 #endif  // defined(__CUDA_ARCH__) || __HIP_DEVICE_COMPILE__
-
-
-// We need this struct, because otherwise we would call a __host__ function in a
-// __device__ function (even though it is constexpr)
-template <typename T>
-struct device_numeric_limits {
-    static constexpr auto inf = std::numeric_limits<T>::infinity();
-    static constexpr auto max = std::numeric_limits<T>::max();
-    static constexpr auto min = std::numeric_limits<T>::min();
-};
diff --git a/common/factorization/par_ilu_kernels.hpp.inc b/common/factorization/par_ilu_kernels.hpp.inc
@@ -317,12 +317,12 @@ __global__ __launch_bounds__(default_block_size) void compute_l_u_factors(
         sum += last_operation;  // undo the last operation
         if (row > col) {
             auto to_write = sum / u_values[u_row_ptrs[col + 1] - 1];
-            if (::gko::isfinite(to_write)) {
+            if (gko::isfinite(to_write)) {
                 l_values[l_idx - 1] = to_write;
             }
         } else {
             auto to_write = sum;
-            if (::gko::isfinite(to_write)) {
+            if (gko::isfinite(to_write)) {
                 u_values[u_idx - 1] = to_write;
             }
         }
diff --git a/cuda/test/base/math.cu b/cuda/test/base/math.cu
@@ -50,47 +50,68 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace {
+namespace kernel {
 
 
-template <typename T>
-__global__ void test_real_isfinite(bool *result)
+template <typename T, typename FuncType>
+__device__ bool test_real_isfinite_function(FuncType isfin)
 {
-    constexpr T inf = INFINITY;
+    constexpr T inf = gko::device_numeric_limits<T>::inf;
+    constexpr T quiet_nan = NAN;
     bool test_true{};
     bool test_false{};
 
-    test_true =
-        gko::isfinite(T{0}) && gko::isfinite(-T{0}) && gko::isfinite(T{1});
-    test_false = gko::isfinite(inf) || gko::isfinite(-inf) ||
-                 gko::isfinite(NAN) || gko::isfinite(inf - inf) ||
-                 gko::isfinite(inf / inf) || gko::isfinite(inf * T{2}) ||
-                 gko::isfinite(T{1} / T{0}) || gko::isfinite(T{0} / T{0});
-    *result = test_true && !test_false;
+    test_true = isfin(T{0}) && isfin(-T{0}) && isfin(T{1});
+    test_false = isfin(inf) || isfin(-inf) || isfin(quiet_nan) ||
+                 isfin(inf - inf) || isfin(inf / inf) || isfin(inf * T{2}) ||
+                 isfin(T{1} / T{0}) || isfin(T{0} / T{0});
+    return test_true && !test_false;
 }
 
 
-template <typename ComplexType>
-__global__ void test_complex_isfinite(bool *result)
+template <typename ComplexType, typename FuncType>
+__device__ bool test_complex_isfinite_function(FuncType isfin)
 {
     static_assert(gko::is_complex_s<ComplexType>::value,
                   "Template type must be a complex type.");
     using T = gko::remove_complex<ComplexType>;
     using c_type = gko::kernels::cuda::cuda_type<ComplexType>;
-    constexpr T inf = INFINITY;
+    constexpr T inf = gko::device_numeric_limits<T>::inf;
     constexpr T quiet_nan = NAN;
     bool test_true{};
     bool test_false{};
 
-    test_true = gko::isfinite(c_type{T{0}, T{0}}) &&
-                gko::isfinite(c_type{-T{0}, -T{0}}) &&
-                gko::isfinite(c_type{T{1}, T{0}}) &&
-                gko::isfinite(c_type{T{0}, T{1}});
-    test_false =
-        gko::isfinite(c_type{inf, T{0}}) || gko::isfinite(c_type{-inf, T{0}}) ||
-        gko::isfinite(c_type{quiet_nan, T{0}}) ||
-        gko::isfinite(c_type{T{0}, inf}) || gko::isfinite(c_type{T{0}, -inf}) ||
-        gko::isfinite(c_type{T{0}, quiet_nan});
-    *result = test_true && !test_false;
+    test_true = isfin(c_type{T{0}, T{0}}) && isfin(c_type{-T{0}, -T{0}}) &&
+                isfin(c_type{T{1}, T{0}}) && isfin(c_type{T{0}, T{1}});
+    test_false = isfin(c_type{inf, T{0}}) || isfin(c_type{-inf, T{0}}) ||
+                 isfin(c_type{quiet_nan, T{0}}) || isfin(c_type{T{0}, inf}) ||
+                 isfin(c_type{T{0}, -inf}) || isfin(c_type{T{0}, quiet_nan});
+    return test_true && !test_false;
+}
+
+
+}  // namespace kernel
+
+
+template <typename T>
+__global__ void test_real_isfinite(bool *result)
+{
+    bool gko_isfinite = kernel::test_real_isfinite_function<T>(
+        [](T val) { return gko::isfinite(val); });
+    bool custom_isfinite = kernel::test_real_isfinite_function<T>(
+        [](T val) { return gko::detail::custom_isfinite(val); });
+    *result = gko_isfinite && custom_isfinite;
+}
+
+
+template <typename ComplexType>
+__global__ void test_complex_isfinite(bool *result)
+{
+    bool gko_isfinite = kernel::test_complex_isfinite_function<ComplexType>(
+        [](ComplexType val) { return gko::isfinite(val); });
+    bool custom_isfinite = kernel::test_complex_isfinite_function<ComplexType>(
+        [](ComplexType val) { return gko::detail::custom_isfinite(val); });
+    *result = gko_isfinite && custom_isfinite;
 }
 
 
diff --git a/hip/test/base/math.hip.cpp b/hip/test/base/math.hip.cpp
@@ -51,47 +51,68 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace {
+namespace kernel {
 
 
-template <typename T>
-__global__ void test_real_isfinite(bool *result)
+template <typename T, typename FuncType>
+__device__ bool test_real_isfinite_function(FuncType isfin)
 {
-    constexpr T inf = INFINITY;
+    constexpr T inf = gko::device_numeric_limits<T>::inf;
+    constexpr T quiet_nan = NAN;
     bool test_true{};
     bool test_false{};
 
-    test_true =
-        gko::isfinite(T{0}) && gko::isfinite(-T{0}) && gko::isfinite(T{1});
-    test_false = gko::isfinite(inf) || gko::isfinite(-inf) ||
-                 gko::isfinite(NAN) || gko::isfinite(inf - inf) ||
-                 gko::isfinite(inf / inf) || gko::isfinite(inf * T{2}) ||
-                 gko::isfinite(T{1} / T{0}) || gko::isfinite(T{0} / T{0});
-    *result = test_true && !test_false;
+    test_true = isfin(T{0}) && isfin(-T{0}) && isfin(T{1});
+    test_false = isfin(inf) || isfin(-inf) || isfin(quiet_nan) ||
+                 isfin(inf - inf) || isfin(inf / inf) || isfin(inf * T{2}) ||
+                 isfin(T{1} / T{0}) || isfin(T{0} / T{0});
+    return test_true && !test_false;
 }
 
 
-template <typename ComplexType>
-__global__ void test_complex_isfinite(bool *result)
+template <typename ComplexType, typename FuncType>
+__device__ bool test_complex_isfinite_function(FuncType isfin)
 {
     static_assert(gko::is_complex_s<ComplexType>::value,
                   "Template type must be a complex type.");
     using T = gko::remove_complex<ComplexType>;
     using c_type = gko::kernels::hip::hip_type<ComplexType>;
-    constexpr T inf = INFINITY;
+    constexpr T inf = gko::device_numeric_limits<T>::inf;
     constexpr T quiet_nan = NAN;
     bool test_true{};
     bool test_false{};
 
-    test_true = gko::isfinite(c_type{T{0}, T{0}}) &&
-                gko::isfinite(c_type{-T{0}, -T{0}}) &&
-                gko::isfinite(c_type{T{1}, T{0}}) &&
-                gko::isfinite(c_type{T{0}, T{1}});
-    test_false =
-        gko::isfinite(c_type{inf, T{0}}) || gko::isfinite(c_type{-inf, T{0}}) ||
-        gko::isfinite(c_type{quiet_nan, T{0}}) ||
-        gko::isfinite(c_type{T{0}, inf}) || gko::isfinite(c_type{T{0}, -inf}) ||
-        gko::isfinite(c_type{T{0}, quiet_nan});
-    *result = test_true && !test_false;
+    test_true = isfin(c_type{T{0}, T{0}}) && isfin(c_type{-T{0}, -T{0}}) &&
+                isfin(c_type{T{1}, T{0}}) && isfin(c_type{T{0}, T{1}});
+    test_false = isfin(c_type{inf, T{0}}) || isfin(c_type{-inf, T{0}}) ||
+                 isfin(c_type{quiet_nan, T{0}}) || isfin(c_type{T{0}, inf}) ||
+                 isfin(c_type{T{0}, -inf}) || isfin(c_type{T{0}, quiet_nan});
+    return test_true && !test_false;
+}
+
+
+}  // namespace kernel
+
+
+template <typename T>
+__global__ void test_real_isfinite(bool *result)
+{
+    bool gko_isfinite = kernel::test_real_isfinite_function<T>(
+        [](T val) { return gko::isfinite(val); });
+    bool custom_isfinite = kernel::test_real_isfinite_function<T>(
+        [](T val) { return gko::detail::custom_isfinite(val); });
+    *result = gko_isfinite && custom_isfinite;
+}
+
+
+template <typename ComplexType>
+__global__ void test_complex_isfinite(bool *result)
+{
+    bool gko_isfinite = kernel::test_complex_isfinite_function<ComplexType>(
+        [](ComplexType val) { return gko::isfinite(val); });
+    bool custom_isfinite = kernel::test_complex_isfinite_function<ComplexType>(
+        [](ComplexType val) { return gko::detail::custom_isfinite(val); });
+    *result = gko_isfinite && custom_isfinite;
 }
 
 
diff --git a/include/ginkgo/core/base/math.hpp b/include/ginkgo/core/base/math.hpp
@@ -730,7 +730,8 @@ template <typename T>
 GKO_INLINE GKO_ATTRIBUTES xstd::enable_if_t<!is_complex_s<T>::value, bool>
 isfinite(const T &value)
 {
-    return std::isfinite(value);
+    using std::isfinite;
+    return isfinite(value);
 }
 
 #endif  // defined(__CUDA_ARCH__)

Original file line number	Diff line number	Diff line change
`@@ -317,12 +317,12 @@ __global__ __launch_bounds__(default_block_size) void compute_l_u_factors(`
`317`	`317`	`sum += last_operation; // undo the last operation`
`318`	`318`	`if (row > col) {`
`319`	`319`	`auto to_write = sum / u_values[u_row_ptrs[col + 1] - 1];`
`320`		`- if (::gko::isfinite(to_write)) {`
	`320`	`+ if (gko::isfinite(to_write)) {`
`321`	`321`	`l_values[l_idx - 1] = to_write;`
`322`	`322`	`}`
`323`	`323`	`} else {`
`324`	`324`	`auto to_write = sum;`
`325`		`- if (::gko::isfinite(to_write)) {`
	`325`	`+ if (gko::isfinite(to_write)) {`
`326`	`326`	`u_values[u_idx - 1] = to_write;`
`327`	`327`	`}`
`328`	`328`	`}`
Original file line number	Diff line number	Diff line change
`@@ -730,7 +730,8 @@ template <typename T>`
`730`	`730`	`GKO_INLINE GKO_ATTRIBUTES xstd::enable_if_t<!is_complex_s<T>::value, bool>`
`731`	`731`	`isfinite(const T &value)`
`732`	`732`	`{`
`733`		`- return std::isfinite(value);`
	`733`	`+ using std::isfinite;`
	`734`	`+ return isfinite(value);`
`734`	`735`	`}`
`735`	`736`
`736`	`737`	`#endif // defined(__CUDA_ARCH__)`