Skip to content

Commit b221169

Browse files
Merge remote-tracking branch 'origin/master' into elementwise-func-fixes
2 parents da8dfed + 8a019d1 commit b221169

30 files changed

+1436
-111
lines changed

.github/workflows/generate-coverage.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ jobs:
7979
- name: Install dpctl dependencies
8080
shell: bash -l {0}
8181
run: |
82-
pip install numpy cython setuptools pytest pytest-cov scikit-build cmake coverage[toml]
82+
pip install numpy cython"<3" setuptools pytest pytest-cov scikit-build cmake coverage[toml]
8383
8484
- name: Build dpctl with coverage
8585
shell: bash -l {0}

.github/workflows/generate-docs.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
- name: Install Intel OneAPI
2727
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
2828
run: |
29-
sudo apt-get install intel-oneapi-dpcpp-cpp-compiler
29+
sudo apt-get install intel-oneapi-compiler-dpcpp-cpp
3030
- name: Install Lua
3131
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
3232
run: |
@@ -49,7 +49,7 @@ jobs:
4949
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
5050
shell: bash -l {0}
5151
run: |
52-
pip install numpy cython setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics
52+
pip install numpy cython"<3" setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics
5353
- name: Checkout repo
5454
uses: actions/checkout@v3
5555
with:

.github/workflows/os-llvm-sycl-build.yml

+32-24
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ jobs:
1111

1212
env:
1313
DOWNLOAD_URL_PREFIX: https://github.com/intel/llvm/releases/download
14-
DRIVER_PATH: 2023-WW13
15-
OCLCPUEXP_FN: oclcpuexp-2023.15.3.0.20_rel.tar.gz
16-
FPGAEMU_FN: fpgaemu-2023.15.3.0.20_rel.tar.gz
14+
DRIVER_PATH: 2023-WW27
15+
OCLCPUEXP_FN: oclcpuexp-2023.16.6.0.28_rel.tar.gz
16+
FPGAEMU_FN: fpgaemu-2023.16.6.0.28_rel.tar.gz
1717
TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/
1818
TBB_INSTALL_DIR: oneapi-tbb-2021.9.0
1919
TBB_FN: oneapi-tbb-2021.9.0-lin.tgz
@@ -37,34 +37,42 @@ jobs:
3737
${{ runner.os }}-
3838
3939
- name: Download and install nightly and components
40+
env:
41+
USE_LATEST_SYCLOS: 0
4042
shell: bash -l {0}
4143
run: |
4244
cd /home/runner/work
4345
mkdir -p sycl_bundle
4446
cd sycl_bundle
45-
# get list of shas and tags from remote, filter sycl-nightly tags and reverse order
46-
export LLVM_TAGS=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | \
47-
grep sycl-nightly | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
48-
# initialize
49-
unset DEPLOY_NIGHTLY_TAG
50-
unset DEPLOY_NIGHTLY_TAG_SHA
51-
52-
# go through tags and find the most recent one where nighly build binary is available
53-
while IFS= read -r NEXT_LLVM_TAG; do
54-
export NEXT_LLVM_TAG_SHA=$(echo ${NEXT_LLVM_TAG} | awk '{print $1}')
55-
export NEXT_NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \
56-
$(echo ${NEXT_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}'))
57-
if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/dpcpp-compiler.tar.gz 2>&1 | grep 'HTTP/1.1 200 OK'` ]];
58-
then
59-
export DEPLOY_NIGHTLY_TAG=${NEXT_NIGHTLY_TAG}
60-
export DEPLOY_LLVM_TAG_SHA=${NEXT_LLVM_TAG_SHA}
61-
break
62-
fi
63-
done <<< "${LLVM_TAGS}"
47+
if [[ "${USE_LATEST_SYCLOS:-0}" -eq "1" ]]; then
48+
# get list of shas and tags from remote, filter sycl-nightly tags and reverse order
49+
export LLVM_TAGS=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | \
50+
grep sycl-nightly | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
51+
# initialize
52+
unset DEPLOY_NIGHTLY_TAG
53+
unset DEPLOY_NIGHTLY_TAG_SHA
54+
55+
# go through tags and find the most recent one where nighly build binary is available
56+
while IFS= read -r NEXT_LLVM_TAG; do
57+
export NEXT_LLVM_TAG_SHA=$(echo ${NEXT_LLVM_TAG} | awk '{print $1}')
58+
export NEXT_NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \
59+
$(echo ${NEXT_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}'))
60+
if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/dpcpp-compiler.tar.gz 2>&1 | grep 'HTTP/1.1 200 OK'` ]];
61+
then
62+
export DEPLOY_NIGHTLY_TAG=${NEXT_NIGHTLY_TAG}
63+
export DEPLOY_LLVM_TAG_SHA=${NEXT_LLVM_TAG_SHA}
64+
break
65+
fi
66+
done <<< "${LLVM_TAGS}"
67+
else
68+
# Use latest known to work tag instead
69+
export DEPLOY_NIGHTLY_TAG="sycl-nightly%2F20230606"
70+
export DEPLOY_LLVM_TAG_SHA=f44d0133d4b0077298f034697a1f3818ff1d6134
71+
fi
6472
6573
[[ -n "${DEPLOY_NIGHTLY_TAG}" ]] || exit 1
6674
[[ -n "${DEPLOY_LLVM_TAG_SHA}" ]] || exit 1
67-
echo "Using ${m} corresponding to intel/llvm at ${DEPLOY_LLVM_TAG_SHA}"
75+
echo "Using ${DEPLOY_NIGHTLY_TAG} corresponding to intel/llvm at ${DEPLOY_LLVM_TAG_SHA}"
6876
6977
if [[ -f bundle_id.txt && ( "$(cat bundle_id.txt)" == "${DEPLOY_LLVM_TAG_SHA}" ) ]]; then
7078
echo "Using cached download of ${DEPLOY_LLVM_TAG_SHA}"
@@ -100,7 +108,7 @@ jobs:
100108
- name: Install dpctl dependencies
101109
shell: bash -l {0}
102110
run: |
103-
pip install numpy cython setuptools pytest scikit-build cmake
111+
pip install numpy cython"<3" setuptools pytest scikit-build cmake
104112
105113
- name: Checkout repo
106114
uses: actions/checkout@v3

conda-recipe/meta.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ requirements:
2020
- cmake >=3.21
2121
- ninja
2222
- git
23-
- cython
23+
- cython <3
2424
- python
2525
- scikit-build
2626
- numpy

dpctl/tensor/__init__.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,14 @@
9494
from ._elementwise_funcs import (
9595
abs,
9696
add,
97+
ceil,
9798
conj,
9899
cos,
99100
divide,
100101
equal,
101102
exp,
102103
expm1,
104+
floor,
103105
floor_divide,
104106
greater,
105107
greater_equal,
@@ -128,6 +130,7 @@
128130
sqrt,
129131
square,
130132
subtract,
133+
trunc,
131134
)
132135
from ._reduction import sum
133136

@@ -208,16 +211,21 @@
208211
"inf",
209212
"abs",
210213
"add",
214+
"ceil",
211215
"conj",
212216
"cos",
217+
"divide",
218+
"equal",
213219
"exp",
214220
"expm1",
221+
"floor",
222+
"floor_divide",
215223
"greater",
216224
"greater_equal",
217225
"imag",
226+
"isfinite",
218227
"isinf",
219228
"isnan",
220-
"isfinite",
221229
"less",
222230
"less_equal",
223231
"log",
@@ -228,19 +236,17 @@
228236
"log1p",
229237
"log2",
230238
"log10",
239+
"multiply",
231240
"negative",
241+
"not_equal",
232242
"positive",
243+
"pow",
233244
"proj",
234245
"real",
235246
"sin",
236247
"sqrt",
237248
"square",
238-
"divide",
239-
"multiply",
240-
"pow",
241249
"subtract",
242-
"equal",
243-
"not_equal",
244250
"sum",
245-
"floor_divide",
251+
"trunc",
246252
]

dpctl/tensor/_copy_utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,11 @@ def _copy_same_shape(dst, src):
213213
"""Assumes src and dst have the same shape."""
214214
# check that memory regions do not overlap
215215
if ti._array_overlap(dst, src):
216+
if src._pointer == dst._pointer and (
217+
src is dst
218+
or (src.strides == dst.strides and src.dtype == dst.dtype)
219+
):
220+
return
216221
_copy_overlapping(src=src, dst=dst)
217222
return
218223

dpctl/tensor/_elementwise_common.py

+40-22
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,20 @@ def __call__(self, x, out=None, order="K"):
5252
if not isinstance(x, dpt.usm_ndarray):
5353
raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")
5454

55+
if order not in ["C", "F", "K", "A"]:
56+
order = "K"
57+
buf_dt, res_dt = _find_buf_dtype(
58+
x.dtype, self.result_type_resolver_fn_, x.sycl_device
59+
)
60+
if res_dt is None:
61+
raise TypeError(
62+
f"function '{self.name_}' does not support input type "
63+
f"({x.dtype}), "
64+
"and the input could not be safely coerced to any "
65+
"supported types according to the casting rule ''safe''."
66+
)
67+
68+
orig_out = out
5569
if out is not None:
5670
if not isinstance(out, dpt.usm_ndarray):
5771
raise TypeError(
@@ -64,8 +78,21 @@ def __call__(self, x, out=None, order="K"):
6478
f"Expected output shape is {x.shape}, got {out.shape}"
6579
)
6680

67-
if ti._array_overlap(x, out):
68-
raise TypeError("Input and output arrays have memory overlap")
81+
if res_dt != out.dtype:
82+
raise TypeError(
83+
f"Output array of type {res_dt} is needed,"
84+
f" got {out.dtype}"
85+
)
86+
87+
if (
88+
buf_dt is None
89+
and ti._array_overlap(x, out)
90+
and not ti._same_logical_tensors(x, out)
91+
):
92+
# Allocate a temporary buffer to avoid memory overlapping.
93+
# Note if `buf_dt` is not None, a temporary copy of `x` will be
94+
# created, so the array overlap check isn't needed.
95+
out = dpt.empty_like(out)
6996

7097
if (
7198
dpctl.utils.get_execution_queue((x.sycl_queue, out.sycl_queue))
@@ -75,13 +102,6 @@ def __call__(self, x, out=None, order="K"):
75102
"Input and output allocation queues are not compatible"
76103
)
77104

78-
if order not in ["C", "F", "K", "A"]:
79-
order = "K"
80-
buf_dt, res_dt = _find_buf_dtype(
81-
x.dtype, self.result_type_resolver_fn_, x.sycl_device
82-
)
83-
if res_dt is None:
84-
raise RuntimeError
85105
exec_q = x.sycl_queue
86106
if buf_dt is None:
87107
if out is None:
@@ -91,17 +111,20 @@ def __call__(self, x, out=None, order="K"):
91111
if order == "A":
92112
order = "F" if x.flags.f_contiguous else "C"
93113
out = dpt.empty_like(x, dtype=res_dt, order=order)
94-
else:
95-
if res_dt != out.dtype:
96-
raise TypeError(
97-
f"Output array of type {res_dt} is needed,"
98-
f" got {out.dtype}"
99-
)
100114

101-
ht, _ = self.unary_fn_(x, out, sycl_queue=exec_q)
102-
ht.wait()
115+
ht_unary_ev, unary_ev = self.unary_fn_(x, out, sycl_queue=exec_q)
116+
117+
if not (orig_out is None or orig_out is out):
118+
# Copy the out data from temporary buffer to original memory
119+
ht_copy_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
120+
src=out, dst=orig_out, sycl_queue=exec_q, depends=[unary_ev]
121+
)
122+
ht_copy_ev.wait()
123+
out = orig_out
103124

125+
ht_unary_ev.wait()
104126
return out
127+
105128
if order == "K":
106129
buf = _empty_like_orderK(x, buf_dt)
107130
else:
@@ -117,11 +140,6 @@ def __call__(self, x, out=None, order="K"):
117140
out = _empty_like_orderK(buf, res_dt)
118141
else:
119142
out = dpt.empty_like(buf, dtype=res_dt, order=order)
120-
else:
121-
if buf_dt != out.dtype:
122-
raise TypeError(
123-
f"Output array of type {buf_dt} is needed, got {out.dtype}"
124-
)
125143

126144
ht, _ = self.unary_fn_(buf, out, sycl_queue=exec_q, depends=[copy_ev])
127145
ht_copy_ev.wait()

dpctl/tensor/_elementwise_funcs.py

+74-3
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,30 @@
114114
# FIXME: implement B07
115115

116116
# U09: ==== CEIL (x)
117-
# FIXME: implement U09
117+
_ceil_docstring = """
118+
ceil(x, out=None, order='K')
119+
120+
Returns the ceiling for each element `x_i` for input array `x`.
121+
The ceil of the scalar `x` is the smallest integer `i`, such that `i >= x`.
122+
123+
Args:
124+
x (usm_ndarray):
125+
Input array, expected to have numeric data type.
126+
out ({None, usm_ndarray}, optional):
127+
Output array to populate.
128+
Array have the correct shape and the expected data type.
129+
order ("C","F","A","K", optional):
130+
Memory layout of the newly output array, if parameter `out` is `None`.
131+
Default: "K".
132+
Returns:
133+
usm_narray:
134+
An array containing the element-wise ceiling of input array.
135+
The returned array has the same data type as `x`.
136+
"""
137+
138+
ceil = UnaryElementwiseFunc(
139+
"ceil", ti._ceil_result_type, ti._ceil, _ceil_docstring
140+
)
118141

119142
# U10: ==== CONJ (x)
120143
_conj_docstring = """
@@ -271,7 +294,30 @@
271294
)
272295

273296
# U15: ==== FLOOR (x)
274-
# FIXME: implement U15
297+
_floor_docstring = """
298+
floor(x, out=None, order='K')
299+
300+
Returns the floor for each element `x_i` for input array `x`.
301+
The floor of the scalar `x` is the largest integer `i`, such that `i <= x`.
302+
303+
Args:
304+
x (usm_ndarray):
305+
Input array, expected to have numeric data type.
306+
out ({None, usm_ndarray}, optional):
307+
Output array to populate.
308+
Array have the correct shape and the expected data type.
309+
order ("C","F","A","K", optional):
310+
Memory layout of the newly output array, if parameter `out` is `None`.
311+
Default: "K".
312+
Returns:
313+
usm_narray:
314+
An array containing the element-wise floor of input array.
315+
The returned array has the same data type as `x`.
316+
"""
317+
318+
floor = UnaryElementwiseFunc(
319+
"floor", ti._floor_result_type, ti._floor, _floor_docstring
320+
)
275321

276322
# B10: ==== FLOOR_DIVIDE (x1, x2)
277323
_floor_divide_docstring_ = """
@@ -1031,4 +1077,29 @@
10311077
# FIXME: implement U35
10321078

10331079
# U36: ==== TRUNC (x)
1034-
# FIXME: implement U36
1080+
_trunc_docstring = """
1081+
trunc(x, out=None, order='K')
1082+
1083+
Returns the truncated value for each element `x_i` for input array `x`.
1084+
The truncated value of the scalar `x` is the nearest integer i which is
1085+
closer to zero than `x` is. In short, the fractional part of the
1086+
signed number `x` is discarded.
1087+
1088+
Args:
1089+
x (usm_ndarray):
1090+
Input array, expected to have numeric data type.
1091+
out ({None, usm_ndarray}, optional):
1092+
Output array to populate.
1093+
Array have the correct shape and the expected data type.
1094+
order ("C","F","A","K", optional):
1095+
Memory layout of the newly output array, if parameter `out` is `None`.
1096+
Default: "K".
1097+
Returns:
1098+
usm_narray:
1099+
An array containing the element-wise truncated value of input array.
1100+
The returned array has the same data type as `x`.
1101+
"""
1102+
1103+
trunc = UnaryElementwiseFunc(
1104+
"trunc", ti._trunc_result_type, ti._trunc, _trunc_docstring
1105+
)

0 commit comments

Comments
 (0)