Skip to content

Commit 8e6a55f

Browse files
Merge remote-tracking branch 'origin/master' into fix/wheel-metadata
2 parents 18d45de + d3ce80e commit 8e6a55f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+5604
-134
lines changed

.github/workflows/generate-coverage.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ jobs:
7979
- name: Install dpctl dependencies
8080
shell: bash -l {0}
8181
run: |
82-
pip install numpy cython setuptools pytest pytest-cov scikit-build cmake coverage[toml]
82+
pip install numpy cython"<3" setuptools pytest pytest-cov scikit-build cmake coverage[toml]
8383
8484
- name: Build dpctl with coverage
8585
shell: bash -l {0}

.github/workflows/generate-docs.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
- name: Install Intel OneAPI
2727
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
2828
run: |
29-
sudo apt-get install intel-oneapi-dpcpp-cpp-compiler
29+
sudo apt-get install intel-oneapi-compiler-dpcpp-cpp
3030
- name: Install Lua
3131
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
3232
run: |
@@ -49,7 +49,7 @@ jobs:
4949
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
5050
shell: bash -l {0}
5151
run: |
52-
pip install numpy cython setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics
52+
pip install numpy cython"<3" setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics
5353
- name: Checkout repo
5454
uses: actions/checkout@v3
5555
with:

.github/workflows/os-llvm-sycl-build.yml

+32-24
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ jobs:
1111

1212
env:
1313
DOWNLOAD_URL_PREFIX: https://github.com/intel/llvm/releases/download
14-
DRIVER_PATH: 2023-WW13
15-
OCLCPUEXP_FN: oclcpuexp-2023.15.3.0.20_rel.tar.gz
16-
FPGAEMU_FN: fpgaemu-2023.15.3.0.20_rel.tar.gz
14+
DRIVER_PATH: 2023-WW27
15+
OCLCPUEXP_FN: oclcpuexp-2023.16.6.0.28_rel.tar.gz
16+
FPGAEMU_FN: fpgaemu-2023.16.6.0.28_rel.tar.gz
1717
TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/
1818
TBB_INSTALL_DIR: oneapi-tbb-2021.9.0
1919
TBB_FN: oneapi-tbb-2021.9.0-lin.tgz
@@ -37,34 +37,42 @@ jobs:
3737
${{ runner.os }}-
3838
3939
- name: Download and install nightly and components
40+
env:
41+
USE_LATEST_SYCLOS: 0
4042
shell: bash -l {0}
4143
run: |
4244
cd /home/runner/work
4345
mkdir -p sycl_bundle
4446
cd sycl_bundle
45-
# get list of shas and tags from remote, filter sycl-nightly tags and reverse order
46-
export LLVM_TAGS=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | \
47-
grep sycl-nightly | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
48-
# initialize
49-
unset DEPLOY_NIGHTLY_TAG
50-
unset DEPLOY_NIGHTLY_TAG_SHA
51-
52-
# go through tags and find the most recent one where nighly build binary is available
53-
while IFS= read -r NEXT_LLVM_TAG; do
54-
export NEXT_LLVM_TAG_SHA=$(echo ${NEXT_LLVM_TAG} | awk '{print $1}')
55-
export NEXT_NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \
56-
$(echo ${NEXT_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}'))
57-
if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/dpcpp-compiler.tar.gz 2>&1 | grep 'HTTP/1.1 200 OK'` ]];
58-
then
59-
export DEPLOY_NIGHTLY_TAG=${NEXT_NIGHTLY_TAG}
60-
export DEPLOY_LLVM_TAG_SHA=${NEXT_LLVM_TAG_SHA}
61-
break
62-
fi
63-
done <<< "${LLVM_TAGS}"
47+
if [[ "${USE_LATEST_SYCLOS:-0}" -eq "1" ]]; then
48+
# get list of shas and tags from remote, filter sycl-nightly tags and reverse order
49+
export LLVM_TAGS=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | \
50+
grep sycl-nightly | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
51+
# initialize
52+
unset DEPLOY_NIGHTLY_TAG
53+
unset DEPLOY_NIGHTLY_TAG_SHA
54+
55+
# go through tags and find the most recent one where nighly build binary is available
56+
while IFS= read -r NEXT_LLVM_TAG; do
57+
export NEXT_LLVM_TAG_SHA=$(echo ${NEXT_LLVM_TAG} | awk '{print $1}')
58+
export NEXT_NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \
59+
$(echo ${NEXT_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}'))
60+
if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/dpcpp-compiler.tar.gz 2>&1 | grep 'HTTP/1.1 200 OK'` ]];
61+
then
62+
export DEPLOY_NIGHTLY_TAG=${NEXT_NIGHTLY_TAG}
63+
export DEPLOY_LLVM_TAG_SHA=${NEXT_LLVM_TAG_SHA}
64+
break
65+
fi
66+
done <<< "${LLVM_TAGS}"
67+
else
68+
# Use latest known to work tag instead
69+
export DEPLOY_NIGHTLY_TAG="sycl-nightly%2F20230606"
70+
export DEPLOY_LLVM_TAG_SHA=f44d0133d4b0077298f034697a1f3818ff1d6134
71+
fi
6472
6573
[[ -n "${DEPLOY_NIGHTLY_TAG}" ]] || exit 1
6674
[[ -n "${DEPLOY_LLVM_TAG_SHA}" ]] || exit 1
67-
echo "Using ${m} corresponding to intel/llvm at ${DEPLOY_LLVM_TAG_SHA}"
75+
echo "Using ${DEPLOY_NIGHTLY_TAG} corresponding to intel/llvm at ${DEPLOY_LLVM_TAG_SHA}"
6876
6977
if [[ -f bundle_id.txt && ( "$(cat bundle_id.txt)" == "${DEPLOY_LLVM_TAG_SHA}" ) ]]; then
7078
echo "Using cached download of ${DEPLOY_LLVM_TAG_SHA}"
@@ -100,7 +108,7 @@ jobs:
100108
- name: Install dpctl dependencies
101109
shell: bash -l {0}
102110
run: |
103-
pip install numpy cython setuptools pytest scikit-build cmake
111+
pip install numpy cython"<3" setuptools pytest scikit-build cmake
104112
105113
- name: Checkout repo
106114
uses: actions/checkout@v3

conda-recipe/meta.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ requirements:
2020
- cmake >=3.21
2121
- ninja
2222
- git
23-
- cython
23+
- cython <3
2424
- python
2525
- scikit-build
2626
- numpy

dpctl/tensor/__init__.py

+20
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,22 @@
111111
less_equal,
112112
log,
113113
log1p,
114+
log2,
115+
log10,
116+
logical_and,
117+
logical_not,
118+
logical_or,
119+
logical_xor,
114120
multiply,
121+
negative,
115122
not_equal,
123+
positive,
124+
pow,
116125
proj,
117126
real,
118127
sin,
119128
sqrt,
129+
square,
120130
subtract,
121131
)
122132
from ._reduction import sum
@@ -211,13 +221,23 @@
211221
"less",
212222
"less_equal",
213223
"log",
224+
"logical_and",
225+
"logical_not",
226+
"logical_or",
227+
"logical_xor",
214228
"log1p",
229+
"log2",
230+
"log10",
231+
"negative",
232+
"positive",
215233
"proj",
216234
"real",
217235
"sin",
218236
"sqrt",
237+
"square",
219238
"divide",
220239
"multiply",
240+
"pow",
221241
"subtract",
222242
"equal",
223243
"not_equal",

dpctl/tensor/_copy_utils.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import dpctl.tensor as dpt
2424
import dpctl.tensor._tensor_impl as ti
2525
import dpctl.utils
26+
from dpctl.tensor._ctors import _get_dtype
2627
from dpctl.tensor._device import normalize_queue_device
2728

2829
__doc__ = (
@@ -212,6 +213,11 @@ def _copy_same_shape(dst, src):
212213
"""Assumes src and dst have the same shape."""
213214
# check that memory regions do not overlap
214215
if ti._array_overlap(dst, src):
216+
if src._pointer == dst._pointer and (
217+
src is dst
218+
or (src.strides == dst.strides and src.dtype == dst.dtype)
219+
):
220+
return
215221
_copy_overlapping(src=src, dst=dst)
216222
return
217223

@@ -364,7 +370,8 @@ def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True):
364370
array (usm_ndarray):
365371
An input array.
366372
new_dtype (dtype):
367-
The data type of the resulting array.
373+
The data type of the resulting array. If `None`, gives default
374+
floating point type supported by device where `array` is allocated.
368375
order ({"C", "F", "A", "K"}, optional):
369376
Controls memory layout of the resulting array if a copy
370377
is returned.
@@ -392,7 +399,7 @@ def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True):
392399
"Recognized values are 'A', 'C', 'F', or 'K'"
393400
)
394401
ary_dtype = usm_ary.dtype
395-
target_dtype = dpt.dtype(newdtype)
402+
target_dtype = _get_dtype(newdtype, usm_ary.sycl_queue)
396403
if not dpt.can_cast(ary_dtype, target_dtype, casting=casting):
397404
raise TypeError(
398405
f"Can not cast from {ary_dtype} to {newdtype} "

dpctl/tensor/_ctors.py

+2
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,8 @@ def full(
11011101
fill_value = int(fill_value.real)
11021102
elif fill_value_type is complex and np.issubdtype(dtype, np.floating):
11031103
fill_value = fill_value.real
1104+
elif fill_value_type is int and np.issubdtype(dtype, np.integer):
1105+
fill_value = _to_scalar(fill_value, dtype)
11041106

11051107
hev, _ = ti._full_usm_ndarray(fill_value, res, sycl_queue)
11061108
hev.wait()

dpctl/tensor/_elementwise_common.py

+35-22
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,15 @@ def __call__(self, x, out=None, order="K"):
5252
if not isinstance(x, dpt.usm_ndarray):
5353
raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")
5454

55+
if order not in ["C", "F", "K", "A"]:
56+
order = "K"
57+
buf_dt, res_dt = _find_buf_dtype(
58+
x.dtype, self.result_type_resolver_fn_, x.sycl_device
59+
)
60+
if res_dt is None:
61+
raise RuntimeError
62+
63+
orig_out = out
5564
if out is not None:
5665
if not isinstance(out, dpt.usm_ndarray):
5766
raise TypeError(
@@ -64,8 +73,21 @@ def __call__(self, x, out=None, order="K"):
6473
f"Expected output shape is {x.shape}, got {out.shape}"
6574
)
6675

67-
if ti._array_overlap(x, out):
68-
raise TypeError("Input and output arrays have memory overlap")
76+
if res_dt != out.dtype:
77+
raise TypeError(
78+
f"Output array of type {res_dt} is needed,"
79+
f" got {out.dtype}"
80+
)
81+
82+
if (
83+
buf_dt is None
84+
and ti._array_overlap(x, out)
85+
and not ti._same_logical_tensors(x, out)
86+
):
87+
# Allocate a temporary buffer to avoid memory overlapping.
88+
# Note if `buf_dt` is not None, a temporary copy of `x` will be
89+
# created, so the array overlap check isn't needed.
90+
out = dpt.empty_like(out)
6991

7092
if (
7193
dpctl.utils.get_execution_queue((x.sycl_queue, out.sycl_queue))
@@ -75,13 +97,6 @@ def __call__(self, x, out=None, order="K"):
7597
"Input and output allocation queues are not compatible"
7698
)
7799

78-
if order not in ["C", "F", "K", "A"]:
79-
order = "K"
80-
buf_dt, res_dt = _find_buf_dtype(
81-
x.dtype, self.result_type_resolver_fn_, x.sycl_device
82-
)
83-
if res_dt is None:
84-
raise RuntimeError
85100
exec_q = x.sycl_queue
86101
if buf_dt is None:
87102
if out is None:
@@ -91,17 +106,20 @@ def __call__(self, x, out=None, order="K"):
91106
if order == "A":
92107
order = "F" if x.flags.f_contiguous else "C"
93108
out = dpt.empty_like(x, dtype=res_dt, order=order)
94-
else:
95-
if res_dt != out.dtype:
96-
raise TypeError(
97-
f"Output array of type {res_dt} is needed,"
98-
f" got {out.dtype}"
99-
)
100109

101-
ht, _ = self.unary_fn_(x, out, sycl_queue=exec_q)
102-
ht.wait()
110+
ht_unary_ev, unary_ev = self.unary_fn_(x, out, sycl_queue=exec_q)
111+
112+
if not (orig_out is None or orig_out is out):
113+
# Copy the out data from temporary buffer to original memory
114+
ht_copy_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
115+
src=out, dst=orig_out, sycl_queue=exec_q, depends=[unary_ev]
116+
)
117+
ht_copy_ev.wait()
118+
out = orig_out
103119

120+
ht_unary_ev.wait()
104121
return out
122+
105123
if order == "K":
106124
buf = _empty_like_orderK(x, buf_dt)
107125
else:
@@ -117,11 +135,6 @@ def __call__(self, x, out=None, order="K"):
117135
out = _empty_like_orderK(buf, res_dt)
118136
else:
119137
out = dpt.empty_like(buf, dtype=res_dt, order=order)
120-
else:
121-
if buf_dt != out.dtype:
122-
raise TypeError(
123-
f"Output array of type {buf_dt} is needed, got {out.dtype}"
124-
)
125138

126139
ht, _ = self.unary_fn_(buf, out, sycl_queue=exec_q, depends=[copy_ev])
127140
ht_copy_ev.wait()

0 commit comments

Comments
 (0)