IntelPython
diff --git a/‎dpctl/tensor/__init__.py
Lines changed: 8 additions & 0 deletions b/‎dpctl/tensor/__init__.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎dpctl/tensor/_elementwise_funcs.py
Lines changed: 84 additions & 4 deletions b/‎dpctl/tensor/_elementwise_funcs.py
Lines changed: 84 additions & 4 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp
Lines changed: 236 additions & 0 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp
Lines changed: 236 additions & 0 deletions
@@ -116,11 +116,15 @@
     logical_or,
     logical_xor,
     multiply,
+    negative,
     not_equal,
+    positive,
+    pow,
     proj,
     real,
     sin,
     sqrt,
+    square,
     subtract,
 )
 from ._reduction import sum
@@ -220,12 +224,16 @@
     "logical_or",
     "logical_xor",
     "log1p",
+    "negative",
+    "positive",
     "proj",
     "real",
     "sin",
     "sqrt",
+    "square",
     "divide",
     "multiply",
+    "pow",
     "subtract",
     "equal",
     "not_equal",
 
@@ -715,7 +715,27 @@
 )
 
 # U25: ==== NEGATIVE    (x)
-# FIXME: implement U25
+_negative_docstring_ = """
+negative(x, out=None, order='K')
+
+Computes the numerical negative for each element `x_i` of input array `x`.
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out (usm_ndarray):
+        Output array to populate. Array must have the correct
+        shape and the expected data type.
+    order ("C","F","A","K", optional): memory layout of the new
+        output array, if parameter `out` is `None`.
+        Default: "K".
+Return:
+    usm_ndarray:
+        An array containing the negative of `x`.
+"""
+
+negative = UnaryElementwiseFunc(
+    "negative", ti._negative_result_type, ti._negative, _negative_docstring_
+)
 
 # B20: ==== NOT_EQUAL   (x1, x2)
 _not_equal_docstring_ = """
@@ -747,10 +767,48 @@
 )
 
 # U26: ==== POSITIVE    (x)
-# FIXME: implement U26
+_positive_docstring_ = """
+positive(x, out=None, order='K')
+
+Computes the numerical positive for each element `x_i` of input array `x`.
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out (usm_ndarray):
+        Output array to populate. Array must have the correct
+        shape and the expected data type.
+    order ("C","F","A","K", optional): memory layout of the new
+        output array, if parameter `out` is `None`.
+        Default: "K".
+Return:
+    usm_ndarray:
+        An array containing the values of `x`.
+"""
+
+positive = UnaryElementwiseFunc(
+    "positive", ti._positive_result_type, ti._positive, _positive_docstring_
+)
 
 # B21: ==== POW         (x1, x2)
-# FIXME: implement B21
+_pow_docstring_ = """
+pow(x1, x2, out=None, order='K')
+
+Calculates `x1_i` raised to `x2_i` for each element `x1_i` of the input array
+`x1` with the respective element `x2_i` of the input array `x2`.
+
+Args:
+    x1 (usm_ndarray):
+        First input array, expected to have a numeric data type.
+    x2 (usm_ndarray):
+        Second input array, also expected to have a numeric data type.
+Returns:
+    usm_ndarray:
+        an array containing the element-wise result. The data type of
+        the returned array is determined by the Type Promotion Rules.
+"""
+pow = BinaryElementwiseFunc(
+    "pow", ti._pow_result_type, ti._pow, _pow_docstring_
+)
 
 # U??: ==== PROJ        (x)
 _proj_docstring = """
@@ -838,7 +896,29 @@
 # FIXME: implement U31
 
 # U32: ==== SQUARE      (x)
-# FIXME: implement U32
+_square_docstring_ = """
+square(x, out=None, order='K')
+
+Computes `x_i**2` (or `x_i*x_i`) for each element `x_i` of input array `x`.
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_ndarray:
+        An array containing the square `x`.
+        The data type of the returned array is determined by
+        the Type Promotion Rules.
+"""
+
+square = UnaryElementwiseFunc(
+    "square", ti._square_result_type, ti._square, _square_docstring_
+)
 
 # U33: ==== SQRT        (x)
 _sqrt_docstring_ = """
 
@@ -0,0 +1,236 @@
+//=== negative.hpp -   Unary function POSITIVE           ------  *-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of POSITIVE(x)
+/// function that returns x.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+#include <pybind11/pybind11.h>
+
+#include <iostream>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace negative
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+using dpctl::tensor::type_utils::vec_cast;
+
+template <typename argT, typename resT> struct NegativeFunctor
+{
+
+    using is_constant = typename std::false_type;
+    // constexpr resT constant_value = resT{};
+    using supports_vec = typename std::false_type;
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &x)
+    {
+        return -x;
+    }
+};
+
+template <typename argT,
+          typename resT = argT,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using NegativeContigFunctor = elementwise_common::
+    UnaryContigFunctor<argT, resT, NegativeFunctor<argT, resT>, vec_sz, n_vecs>;
+
+template <typename T> struct NegativeOutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::TypeMapResultEntry<T, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::uint16_t>,
+        td_ns::TypeMapResultEntry<T, std::uint32_t>,
+        td_ns::TypeMapResultEntry<T, std::uint64_t>,
+        td_ns::TypeMapResultEntry<T, std::int8_t>,
+        td_ns::TypeMapResultEntry<T, std::int16_t>,
+        td_ns::TypeMapResultEntry<T, std::int32_t>,
+        td_ns::TypeMapResultEntry<T, std::int64_t>,
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
+class negative_contig_kernel;
+
+typedef sycl::event (*negative_contig_impl_fn_ptr_t)(
+    sycl::queue,
+    size_t,
+    const char *,
+    char *,
+    const std::vector<sycl::event> &);
+
+template <typename argTy>
+sycl::event negative_contig_impl(sycl::queue exec_q,
+                                 size_t nelems,
+                                 const char *arg_p,
+                                 char *res_p,
+                                 const std::vector<sycl::event> &depends = {})
+{
+    sycl::event negative_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        size_t lws = 64;
+        constexpr unsigned int vec_sz = 4;
+        constexpr unsigned int n_vecs = 2;
+        const size_t n_groups =
+            ((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz));
+        const auto gws_range = sycl::range<1>(n_groups * lws);
+        const auto lws_range = sycl::range<1>(lws);
+
+        using resTy = typename NegativeOutputType<argTy>::value_type;
+        const argTy *arg_tp = reinterpret_cast<const argTy *>(arg_p);
+        resTy *res_tp = reinterpret_cast<resTy *>(res_p);
+
+        cgh.parallel_for<negative_contig_kernel<argTy, resTy, vec_sz, n_vecs>>(
+            sycl::nd_range<1>(gws_range, lws_range),
+            NegativeContigFunctor<argTy, resTy, vec_sz, n_vecs>(arg_tp, res_tp,
+                                                                nelems));
+    });
+    return negative_ev;
+}
+
+template <typename fnT, typename T> struct NegativeContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename NegativeOutputType<T>::value_type,
+                                     void>) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = negative_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T> struct NegativeTypeMapFactory
+{
+    /*! @brief get typeid for output type of std::negative(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename NegativeOutputType<T>::value_type;
+        ;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename argTy, typename resTy, typename IndexerT>
+using NegativeStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, NegativeFunctor<argTy, resTy>>;
+
+template <typename T1, typename T2, typename T3> class negative_strided_kernel;
+
+typedef sycl::event (*negative_strided_impl_fn_ptr_t)(
+    sycl::queue,
+    size_t,
+    int,
+    const py::ssize_t *,
+    const char *,
+    py::ssize_t,
+    char *,
+    py::ssize_t,
+    const std::vector<sycl::event> &,
+    const std::vector<sycl::event> &);
+
+template <typename argTy>
+sycl::event
+negative_strided_impl(sycl::queue exec_q,
+                      size_t nelems,
+                      int nd,
+                      const py::ssize_t *shape_and_strides,
+                      const char *arg_p,
+                      py::ssize_t arg_offset,
+                      char *res_p,
+                      py::ssize_t res_offset,
+                      const std::vector<sycl::event> &depends,
+                      const std::vector<sycl::event> &additional_depends)
+{
+    sycl::event negative_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+        cgh.depends_on(additional_depends);
+
+        using resTy = typename NegativeOutputType<argTy>::value_type;
+        using IndexerT =
+            typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer;
+
+        IndexerT indexer{nd, arg_offset, res_offset, shape_and_strides};
+
+        const argTy *arg_tp = reinterpret_cast<const argTy *>(arg_p);
+        resTy *res_tp = reinterpret_cast<resTy *>(res_p);
+
+        cgh.parallel_for<negative_strided_kernel<argTy, resTy, IndexerT>>(
+            {nelems}, NegativeStridedFunctor<argTy, resTy, IndexerT>(
+                          arg_tp, res_tp, indexer));
+    });
+    return negative_ev;
+}
+
+template <typename fnT, typename T> struct NegativeStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename NegativeOutputType<T>::value_type,
+                                     void>) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = negative_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace negative
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl