Skip to content

Commit 7510d7e

Browse files
authored
Merge pull request #1268 from IntelPython/more-elementwise-functions
Implements negative, positive, pow, and square
2 parents 7bfc5a0 + 4748233 commit 7510d7e

File tree

11 files changed

+1722
-12
lines changed

11 files changed

+1722
-12
lines changed

dpctl/tensor/__init__.py

+8
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,15 @@
116116
logical_or,
117117
logical_xor,
118118
multiply,
119+
negative,
119120
not_equal,
121+
positive,
122+
pow,
120123
proj,
121124
real,
122125
sin,
123126
sqrt,
127+
square,
124128
subtract,
125129
)
126130
from ._reduction import sum
@@ -220,12 +224,16 @@
220224
"logical_or",
221225
"logical_xor",
222226
"log1p",
227+
"negative",
228+
"positive",
223229
"proj",
224230
"real",
225231
"sin",
226232
"sqrt",
233+
"square",
227234
"divide",
228235
"multiply",
236+
"pow",
229237
"subtract",
230238
"equal",
231239
"not_equal",

dpctl/tensor/_elementwise_funcs.py

+84-4
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,27 @@
715715
)
716716

717717
# U25: ==== NEGATIVE (x)
718-
# FIXME: implement U25
718+
_negative_docstring_ = """
719+
negative(x, out=None, order='K')
720+
721+
Computes the numerical negative for each element `x_i` of input array `x`.
722+
Args:
723+
x (usm_ndarray):
724+
Input array, expected to have numeric data type.
725+
out (usm_ndarray):
726+
Output array to populate. Array must have the correct
727+
shape and the expected data type.
728+
order ("C","F","A","K", optional): memory layout of the new
729+
output array, if parameter `out` is `None`.
730+
Default: "K".
731+
Return:
732+
usm_ndarray:
733+
An array containing the negative of `x`.
734+
"""
735+
736+
negative = UnaryElementwiseFunc(
737+
"negative", ti._negative_result_type, ti._negative, _negative_docstring_
738+
)
719739

720740
# B20: ==== NOT_EQUAL (x1, x2)
721741
_not_equal_docstring_ = """
@@ -747,10 +767,48 @@
747767
)
748768

749769
# U26: ==== POSITIVE (x)
750-
# FIXME: implement U26
770+
_positive_docstring_ = """
771+
positive(x, out=None, order='K')
772+
773+
Computes the numerical positive for each element `x_i` of input array `x`.
774+
Args:
775+
x (usm_ndarray):
776+
Input array, expected to have numeric data type.
777+
out (usm_ndarray):
778+
Output array to populate. Array must have the correct
779+
shape and the expected data type.
780+
order ("C","F","A","K", optional): memory layout of the new
781+
output array, if parameter `out` is `None`.
782+
Default: "K".
783+
Return:
784+
usm_ndarray:
785+
An array containing the values of `x`.
786+
"""
787+
788+
positive = UnaryElementwiseFunc(
789+
"positive", ti._positive_result_type, ti._positive, _positive_docstring_
790+
)
751791

752792
# B21: ==== POW (x1, x2)
753-
# FIXME: implement B21
793+
_pow_docstring_ = """
794+
pow(x1, x2, out=None, order='K')
795+
796+
Calculates `x1_i` raised to `x2_i` for each element `x1_i` of the input array
797+
`x1` with the respective element `x2_i` of the input array `x2`.
798+
799+
Args:
800+
x1 (usm_ndarray):
801+
First input array, expected to have a numeric data type.
802+
x2 (usm_ndarray):
803+
Second input array, also expected to have a numeric data type.
804+
Returns:
805+
usm_ndarray:
806+
an array containing the element-wise result. The data type of
807+
the returned array is determined by the Type Promotion Rules.
808+
"""
809+
pow = BinaryElementwiseFunc(
810+
"pow", ti._pow_result_type, ti._pow, _pow_docstring_
811+
)
754812

755813
# U??: ==== PROJ (x)
756814
_proj_docstring = """
@@ -838,7 +896,29 @@
838896
# FIXME: implement U31
839897

840898
# U32: ==== SQUARE (x)
841-
# FIXME: implement U32
899+
_square_docstring_ = """
900+
square(x, out=None, order='K')
901+
902+
Computes `x_i**2` (or `x_i*x_i`) for each element `x_i` of input array `x`.
903+
Args:
904+
x (usm_ndarray):
905+
Input array, expected to have numeric data type.
906+
out ({None, usm_ndarray}, optional):
907+
Output array to populate.
908+
Array have the correct shape and the expected data type.
909+
order ("C","F","A","K", optional):
910+
Memory layout of the newly output array, if parameter `out` is `None`.
911+
Default: "K".
912+
Returns:
913+
usm_ndarray:
914+
An array containing the square `x`.
915+
The data type of the returned array is determined by
916+
the Type Promotion Rules.
917+
"""
918+
919+
square = UnaryElementwiseFunc(
920+
"square", ti._square_result_type, ti._square, _square_docstring_
921+
)
842922

843923
# U33: ==== SQRT (x)
844924
_sqrt_docstring_ = """
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
//=== negative.hpp - Unary function POSITIVE ------ *-C++-*--/===//
2+
//
3+
// Data Parallel Control (dpctl)
4+
//
5+
// Copyright 2020-2023 Intel Corporation
6+
//
7+
// Licensed under the Apache License, Version 2.0 (the "License");
8+
// you may not use this file except in compliance with the License.
9+
// You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// Unless required by applicable law or agreed to in writing, software
14+
// distributed under the License is distributed on an "AS IS" BASIS,
15+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
// See the License for the specific language governing permissions and
17+
// limitations under the License.
18+
//
19+
//===---------------------------------------------------------------------===//
20+
///
21+
/// \file
22+
/// This file defines kernels for elementwise evaluation of POSITIVE(x)
23+
/// function that returns x.
24+
//===---------------------------------------------------------------------===//
25+
26+
#pragma once
27+
#include <CL/sycl.hpp>
28+
#include <cmath>
29+
#include <cstddef>
30+
#include <cstdint>
31+
#include <type_traits>
32+
33+
#include "kernels/elementwise_functions/common.hpp"
34+
35+
#include "utils/offset_utils.hpp"
36+
#include "utils/type_dispatch.hpp"
37+
#include "utils/type_utils.hpp"
38+
#include <pybind11/pybind11.h>
39+
40+
#include <iostream>
41+
42+
namespace dpctl
43+
{
44+
namespace tensor
45+
{
46+
namespace kernels
47+
{
48+
namespace negative
49+
{
50+
51+
namespace py = pybind11;
52+
namespace td_ns = dpctl::tensor::type_dispatch;
53+
54+
using dpctl::tensor::type_utils::is_complex;
55+
using dpctl::tensor::type_utils::vec_cast;
56+
57+
template <typename argT, typename resT> struct NegativeFunctor
58+
{
59+
60+
using is_constant = typename std::false_type;
61+
// constexpr resT constant_value = resT{};
62+
using supports_vec = typename std::false_type;
63+
using supports_sg_loadstore = typename std::negation<
64+
std::disjunction<is_complex<resT>, is_complex<argT>>>;
65+
66+
resT operator()(const argT &x)
67+
{
68+
return -x;
69+
}
70+
};
71+
72+
template <typename argT,
73+
typename resT = argT,
74+
unsigned int vec_sz = 4,
75+
unsigned int n_vecs = 2>
76+
using NegativeContigFunctor = elementwise_common::
77+
UnaryContigFunctor<argT, resT, NegativeFunctor<argT, resT>, vec_sz, n_vecs>;
78+
79+
template <typename T> struct NegativeOutputType
80+
{
81+
using value_type = typename std::disjunction< // disjunction is C++17
82+
// feature, supported by DPC++
83+
td_ns::TypeMapResultEntry<T, std::uint8_t>,
84+
td_ns::TypeMapResultEntry<T, std::uint16_t>,
85+
td_ns::TypeMapResultEntry<T, std::uint32_t>,
86+
td_ns::TypeMapResultEntry<T, std::uint64_t>,
87+
td_ns::TypeMapResultEntry<T, std::int8_t>,
88+
td_ns::TypeMapResultEntry<T, std::int16_t>,
89+
td_ns::TypeMapResultEntry<T, std::int32_t>,
90+
td_ns::TypeMapResultEntry<T, std::int64_t>,
91+
td_ns::TypeMapResultEntry<T, sycl::half>,
92+
td_ns::TypeMapResultEntry<T, float>,
93+
td_ns::TypeMapResultEntry<T, double>,
94+
td_ns::TypeMapResultEntry<T, std::complex<float>>,
95+
td_ns::TypeMapResultEntry<T, std::complex<double>>,
96+
td_ns::DefaultResultEntry<void>>::result_type;
97+
};
98+
99+
template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
100+
class negative_contig_kernel;
101+
102+
typedef sycl::event (*negative_contig_impl_fn_ptr_t)(
103+
sycl::queue,
104+
size_t,
105+
const char *,
106+
char *,
107+
const std::vector<sycl::event> &);
108+
109+
template <typename argTy>
110+
sycl::event negative_contig_impl(sycl::queue exec_q,
111+
size_t nelems,
112+
const char *arg_p,
113+
char *res_p,
114+
const std::vector<sycl::event> &depends = {})
115+
{
116+
sycl::event negative_ev = exec_q.submit([&](sycl::handler &cgh) {
117+
cgh.depends_on(depends);
118+
119+
size_t lws = 64;
120+
constexpr unsigned int vec_sz = 4;
121+
constexpr unsigned int n_vecs = 2;
122+
const size_t n_groups =
123+
((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz));
124+
const auto gws_range = sycl::range<1>(n_groups * lws);
125+
const auto lws_range = sycl::range<1>(lws);
126+
127+
using resTy = typename NegativeOutputType<argTy>::value_type;
128+
const argTy *arg_tp = reinterpret_cast<const argTy *>(arg_p);
129+
resTy *res_tp = reinterpret_cast<resTy *>(res_p);
130+
131+
cgh.parallel_for<negative_contig_kernel<argTy, resTy, vec_sz, n_vecs>>(
132+
sycl::nd_range<1>(gws_range, lws_range),
133+
NegativeContigFunctor<argTy, resTy, vec_sz, n_vecs>(arg_tp, res_tp,
134+
nelems));
135+
});
136+
return negative_ev;
137+
}
138+
139+
template <typename fnT, typename T> struct NegativeContigFactory
140+
{
141+
fnT get()
142+
{
143+
if constexpr (std::is_same_v<typename NegativeOutputType<T>::value_type,
144+
void>) {
145+
fnT fn = nullptr;
146+
return fn;
147+
}
148+
else {
149+
fnT fn = negative_contig_impl<T>;
150+
return fn;
151+
}
152+
}
153+
};
154+
155+
template <typename fnT, typename T> struct NegativeTypeMapFactory
156+
{
157+
/*! @brief get typeid for output type of std::negative(T x) */
158+
std::enable_if_t<std::is_same<fnT, int>::value, int> get()
159+
{
160+
using rT = typename NegativeOutputType<T>::value_type;
161+
;
162+
return td_ns::GetTypeid<rT>{}.get();
163+
}
164+
};
165+
166+
template <typename argTy, typename resTy, typename IndexerT>
167+
using NegativeStridedFunctor = elementwise_common::
168+
UnaryStridedFunctor<argTy, resTy, IndexerT, NegativeFunctor<argTy, resTy>>;
169+
170+
template <typename T1, typename T2, typename T3> class negative_strided_kernel;
171+
172+
typedef sycl::event (*negative_strided_impl_fn_ptr_t)(
173+
sycl::queue,
174+
size_t,
175+
int,
176+
const py::ssize_t *,
177+
const char *,
178+
py::ssize_t,
179+
char *,
180+
py::ssize_t,
181+
const std::vector<sycl::event> &,
182+
const std::vector<sycl::event> &);
183+
184+
template <typename argTy>
185+
sycl::event
186+
negative_strided_impl(sycl::queue exec_q,
187+
size_t nelems,
188+
int nd,
189+
const py::ssize_t *shape_and_strides,
190+
const char *arg_p,
191+
py::ssize_t arg_offset,
192+
char *res_p,
193+
py::ssize_t res_offset,
194+
const std::vector<sycl::event> &depends,
195+
const std::vector<sycl::event> &additional_depends)
196+
{
197+
sycl::event negative_ev = exec_q.submit([&](sycl::handler &cgh) {
198+
cgh.depends_on(depends);
199+
cgh.depends_on(additional_depends);
200+
201+
using resTy = typename NegativeOutputType<argTy>::value_type;
202+
using IndexerT =
203+
typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer;
204+
205+
IndexerT indexer{nd, arg_offset, res_offset, shape_and_strides};
206+
207+
const argTy *arg_tp = reinterpret_cast<const argTy *>(arg_p);
208+
resTy *res_tp = reinterpret_cast<resTy *>(res_p);
209+
210+
cgh.parallel_for<negative_strided_kernel<argTy, resTy, IndexerT>>(
211+
{nelems}, NegativeStridedFunctor<argTy, resTy, IndexerT>(
212+
arg_tp, res_tp, indexer));
213+
});
214+
return negative_ev;
215+
}
216+
217+
template <typename fnT, typename T> struct NegativeStridedFactory
218+
{
219+
fnT get()
220+
{
221+
if constexpr (std::is_same_v<typename NegativeOutputType<T>::value_type,
222+
void>) {
223+
fnT fn = nullptr;
224+
return fn;
225+
}
226+
else {
227+
fnT fn = negative_strided_impl<T>;
228+
return fn;
229+
}
230+
}
231+
};
232+
233+
} // namespace negative
234+
} // namespace kernels
235+
} // namespace tensor
236+
} // namespace dpctl

0 commit comments

Comments
 (0)