IntelPython
diff --git a/‎.github/workflows/generate-docs.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/generate-docs.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/os-llvm-sycl-build.yml
Lines changed: 31 additions & 23 deletions b/‎.github/workflows/os-llvm-sycl-build.yml
Lines changed: 31 additions & 23 deletions
diff --git a/‎dpctl/tensor/__init__.py
Lines changed: 13 additions & 1 deletion b/‎dpctl/tensor/__init__.py
Lines changed: 13 additions & 1 deletion
diff --git a/‎dpctl/tensor/_elementwise_funcs.py
Lines changed: 132 additions & 6 deletions b/‎dpctl/tensor/_elementwise_funcs.py
Lines changed: 132 additions & 6 deletions
diff --git a/‎dpctl/tensor/_usmarray.pyx
Lines changed: 13 additions & 3 deletions b/‎dpctl/tensor/_usmarray.pyx
Lines changed: 13 additions & 3 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp
Lines changed: 4 additions & 3 deletions b/‎dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp
Lines changed: 4 additions & 3 deletions
@@ -26,7 +26,7 @@ jobs:
       - name: Install Intel OneAPI
         if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
         run: |
-          sudo apt-get install intel-oneapi-dpcpp-cpp-compiler
+          sudo apt-get install intel-oneapi-compiler-dpcpp-cpp
       - name: Install Lua
         if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
         run: |
 
@@ -11,9 +11,9 @@ jobs:
 
     env:
       DOWNLOAD_URL_PREFIX: https://github.com/intel/llvm/releases/download
-      DRIVER_PATH: 2023-WW13
-      OCLCPUEXP_FN: oclcpuexp-2023.15.3.0.20_rel.tar.gz
-      FPGAEMU_FN: fpgaemu-2023.15.3.0.20_rel.tar.gz
+      DRIVER_PATH: 2023-WW27
+      OCLCPUEXP_FN: oclcpuexp-2023.16.6.0.28_rel.tar.gz
+      FPGAEMU_FN: fpgaemu-2023.16.6.0.28_rel.tar.gz
       TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/
       TBB_INSTALL_DIR: oneapi-tbb-2021.9.0
       TBB_FN: oneapi-tbb-2021.9.0-lin.tgz
@@ -37,34 +37,42 @@ jobs:
             ${{ runner.os }}-
 
       - name: Download and install nightly and components
+        env:
+          USE_LATEST_SYCLOS: 0
         shell: bash -l {0}
         run: |
           cd /home/runner/work
           mkdir -p sycl_bundle
           cd sycl_bundle
-          # get list of shas and tags from remote, filter sycl-nightly tags and reverse order
-          export LLVM_TAGS=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | \
-                       grep sycl-nightly | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
-          # initialize
-          unset DEPLOY_NIGHTLY_TAG
-          unset DEPLOY_NIGHTLY_TAG_SHA
-
-          # go through tags and find the most recent one where nighly build binary is available
-          while IFS= read -r NEXT_LLVM_TAG; do
-              export NEXT_LLVM_TAG_SHA=$(echo ${NEXT_LLVM_TAG} | awk '{print $1}')
-              export NEXT_NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \
-                                          $(echo ${NEXT_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}'))
-              if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/dpcpp-compiler.tar.gz  2>&1 | grep 'HTTP/1.1 200 OK'` ]];
-              then
-                  export DEPLOY_NIGHTLY_TAG=${NEXT_NIGHTLY_TAG}
-                  export DEPLOY_LLVM_TAG_SHA=${NEXT_LLVM_TAG_SHA}
-                  break
-              fi
-          done <<< "${LLVM_TAGS}"
+          if [[ "${USE_LATEST_SYCLOS:-0}" -eq "1" ]]; then
+              # get list of shas and tags from remote, filter sycl-nightly tags and reverse order
+              export LLVM_TAGS=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | \
+                          grep sycl-nightly | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
+              # initialize
+              unset DEPLOY_NIGHTLY_TAG
+              unset DEPLOY_NIGHTLY_TAG_SHA
+
+              # go through tags and find the most recent one where nighly build binary is available
+              while IFS= read -r NEXT_LLVM_TAG; do
+                  export NEXT_LLVM_TAG_SHA=$(echo ${NEXT_LLVM_TAG} | awk '{print $1}')
+                  export NEXT_NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \
+                                              $(echo ${NEXT_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}'))
+                  if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/dpcpp-compiler.tar.gz  2>&1 | grep 'HTTP/1.1 200 OK'` ]];
+                  then
+                      export DEPLOY_NIGHTLY_TAG=${NEXT_NIGHTLY_TAG}
+                      export DEPLOY_LLVM_TAG_SHA=${NEXT_LLVM_TAG_SHA}
+                      break
+                  fi
+              done <<< "${LLVM_TAGS}"
+          else
+              # Use latest known to work tag instead
+              export DEPLOY_NIGHTLY_TAG="sycl-nightly%2F20230606"
+              export DEPLOY_LLVM_TAG_SHA=f44d0133d4b0077298f034697a1f3818ff1d6134
+          fi
 
           [[ -n "${DEPLOY_NIGHTLY_TAG}" ]] || exit 1
           [[ -n "${DEPLOY_LLVM_TAG_SHA}" ]] || exit 1
-          echo "Using ${m} corresponding to intel/llvm at ${DEPLOY_LLVM_TAG_SHA}"
+          echo "Using ${DEPLOY_NIGHTLY_TAG} corresponding to intel/llvm at ${DEPLOY_LLVM_TAG_SHA}"
 
           if [[ -f bundle_id.txt && ( "$(cat bundle_id.txt)" == "${DEPLOY_LLVM_TAG_SHA}" ) ]]; then
               echo "Using cached download of ${DEPLOY_LLVM_TAG_SHA}"
 
@@ -113,16 +113,22 @@
     less_equal,
     log,
     log1p,
+    log2,
+    log10,
     logical_and,
     logical_not,
     logical_or,
     logical_xor,
     multiply,
+    negative,
     not_equal,
+    positive,
+    pow,
     proj,
     real,
     sin,
     sqrt,
+    square,
     subtract,
     trunc,
 )
@@ -217,9 +223,9 @@
     "greater",
     "greater_equal",
     "imag",
+    "isfinite",
     "isinf",
     "isnan",
-    "isfinite",
     "less",
     "less_equal",
     "log",
@@ -228,12 +234,18 @@
     "logical_or",
     "logical_xor",
     "log1p",
+    "log2",
+    "log10",
     "multiply",
+    "negative",
     "not_equal",
+    "positive",
+    "pow",
     "proj",
     "real",
     "sin",
     "sqrt",
+    "square",
     "subtract",
     "sum",
     "trunc",
 
@@ -609,10 +609,56 @@
 )
 
 # U22: ==== LOG2        (x)
-# FIXME: implement U22
+_log2_docstring_ = """
+log2(x, out=None, order='K')
+
+Computes the base-2 logarithm for each element `x_i` of input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_narray:
+        An array containing the base-2 logarithm of `x`.
+        The data type of the returned array is determined by the
+        Type Promotion Rules.
+"""
+
+log2 = UnaryElementwiseFunc(
+    "log2", ti._log2_result_type, ti._log2, _log2_docstring_
+)
 
 # U23: ==== LOG10       (x)
-# FIXME: implement U23
+_log10_docstring_ = """
+log10(x, out=None, order='K')
+
+Computes the base-10 logarithm for each element `x_i` of input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_narray:
+        An array containing the base-1- logarithm of `x`.
+        The data type of the returned array is determined by the
+        Type Promotion Rules.
+"""
+
+log10 = UnaryElementwiseFunc(
+    "log10", ti._log10_result_type, ti._log10, _log10_docstring_
+)
 
 # B15: ==== LOGADDEXP   (x1, x2)
 # FIXME: implement B15
@@ -761,7 +807,27 @@
 )
 
 # U25: ==== NEGATIVE    (x)
-# FIXME: implement U25
+_negative_docstring_ = """
+negative(x, out=None, order='K')
+
+Computes the numerical negative for each element `x_i` of input array `x`.
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out (usm_ndarray):
+        Output array to populate. Array must have the correct
+        shape and the expected data type.
+    order ("C","F","A","K", optional): memory layout of the new
+        output array, if parameter `out` is `None`.
+        Default: "K".
+Return:
+    usm_ndarray:
+        An array containing the negative of `x`.
+"""
+
+negative = UnaryElementwiseFunc(
+    "negative", ti._negative_result_type, ti._negative, _negative_docstring_
+)
 
 # B20: ==== NOT_EQUAL   (x1, x2)
 _not_equal_docstring_ = """
@@ -793,10 +859,48 @@
 )
 
 # U26: ==== POSITIVE    (x)
-# FIXME: implement U26
+_positive_docstring_ = """
+positive(x, out=None, order='K')
+
+Computes the numerical positive for each element `x_i` of input array `x`.
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out (usm_ndarray):
+        Output array to populate. Array must have the correct
+        shape and the expected data type.
+    order ("C","F","A","K", optional): memory layout of the new
+        output array, if parameter `out` is `None`.
+        Default: "K".
+Return:
+    usm_ndarray:
+        An array containing the values of `x`.
+"""
+
+positive = UnaryElementwiseFunc(
+    "positive", ti._positive_result_type, ti._positive, _positive_docstring_
+)
 
 # B21: ==== POW         (x1, x2)
-# FIXME: implement B21
+_pow_docstring_ = """
+pow(x1, x2, out=None, order='K')
+
+Calculates `x1_i` raised to `x2_i` for each element `x1_i` of the input array
+`x1` with the respective element `x2_i` of the input array `x2`.
+
+Args:
+    x1 (usm_ndarray):
+        First input array, expected to have a numeric data type.
+    x2 (usm_ndarray):
+        Second input array, also expected to have a numeric data type.
+Returns:
+    usm_ndarray:
+        an array containing the element-wise result. The data type of
+        the returned array is determined by the Type Promotion Rules.
+"""
+pow = BinaryElementwiseFunc(
+    "pow", ti._pow_result_type, ti._pow, _pow_docstring_
+)
 
 # U??: ==== PROJ        (x)
 _proj_docstring = """
@@ -884,7 +988,29 @@
 # FIXME: implement U31
 
 # U32: ==== SQUARE      (x)
-# FIXME: implement U32
+_square_docstring_ = """
+square(x, out=None, order='K')
+
+Computes `x_i**2` (or `x_i*x_i`) for each element `x_i` of input array `x`.
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_ndarray:
+        An array containing the square `x`.
+        The data type of the returned array is determined by
+        the Type Promotion Rules.
+"""
+
+square = UnaryElementwiseFunc(
+    "square", ti._square_result_type, ti._square, _square_docstring_
+)
 
 # U33: ==== SQRT        (x)
 _sqrt_docstring_ = """
 
@@ -37,6 +37,7 @@ cimport dpctl.memory as c_dpmem
 cimport dpctl.tensor._dlpack as c_dlpack
 
 import dpctl.tensor._flags as _flags
+from dpctl.tensor._tensor_impl import default_device_fp_type
 
 include "_stride_utils.pxi"
 include "_types.pxi"
@@ -104,7 +105,7 @@ cdef class InternalUSMArrayError(Exception):
 
 
 cdef class usm_ndarray:
-    """ usm_ndarray(shape, dtype="|f8", strides=None, buffer="device", \
+    """ usm_ndarray(shape, dtype=None, strides=None, buffer="device", \
            offset=0, order="C", buffer_ctor_kwargs=dict(), \
            array_namespace=None)
 
@@ -116,6 +117,8 @@ cdef class usm_ndarray:
             Shape of the array to be created.
         dtype (str, dtype):
             Array data type, i.e. the type of array elements.
+            If ``dtype`` has the value ``None``, it is determined by default
+            floating point type supported by target device.
             The supported types are
                * ``bool``
                      boolean type
@@ -134,7 +137,7 @@ cdef class usm_ndarray:
                      double-precision real and complex floating
                      types, supported if target device's property
                      ``has_aspect_fp64`` is ``True``.
-            Default: ``"|f8"``.
+            Default: ``None``.
         strides (tuple, optional):
             Strides of the array to be created in elements.
             If ``strides`` has the value ``None``, it is determined by the
@@ -219,7 +222,7 @@ cdef class usm_ndarray:
                 "Data pointers of cloned and original objects are different.")
         return res
 
-    def __cinit__(self, shape, dtype="|f8", strides=None, buffer='device',
+    def __cinit__(self, shape, dtype=None, strides=None, buffer='device',
                   Py_ssize_t offset=0, order='C',
                   buffer_ctor_kwargs=dict(),
                   array_namespace=None):
@@ -252,6 +255,13 @@ cdef class usm_ndarray:
             except Exception:
                 raise TypeError("Argument shape must be a list or a tuple.")
         nd = len(shape)
+        if dtype is None:
+            q = buffer_ctor_kwargs.get("queue")
+            if q is not None:
+                dtype = default_device_fp_type(q)
+            else:
+                dev = dpctl.select_default_device()
+                dtype = "f8" if dev.has_aspect_fp64 else "f4"
         typenum = dtype_to_typenum(dtype)
         if (typenum < 0):
             if typenum == -2:
 
@@ -198,8 +198,9 @@ struct SequentialBooleanReduction
             // must convert to boolean first to handle nans
             using dpctl::tensor::type_utils::convert_impl;
             outT val = convert_impl<bool, argT>(inp_[inp_offset]);
+            ReductionOp op = reduction_op_;
 
-            red_val = reduction_op_(red_val, val);
+            red_val = op(red_val, val);
         }
 
         out_[out_iter_offset] = red_val;
@@ -452,9 +453,9 @@ struct StridedBooleanReduction
                 // must convert to boolean first to handle nans
                 using dpctl::tensor::type_utils::convert_impl;
                 bool val = convert_impl<bool, argT>(inp_[inp_offset]);
+                ReductionOp op = reduction_op_;
 
-                local_red_val =
-                    reduction_op_(local_red_val, static_cast<outT>(val));
+                local_red_val = op(local_red_val, static_cast<outT>(val));
             }
         }
         // reduction and atomic operations are performed
Original file line number	Diff line number	Diff line change
`@@ -198,8 +198,9 @@ struct SequentialBooleanReduction`
`198`	`198`	`// must convert to boolean first to handle nans`
`199`	`199`	`using dpctl::tensor::type_utils::convert_impl;`
`200`	`200`	`outT val = convert_impl<bool, argT>(inp_[inp_offset]);`
	`201`	`+ ReductionOp op = reduction_op_;`
`201`	`202`
`202`		`- red_val = reduction_op_(red_val, val);`
	`203`	`+ red_val = op(red_val, val);`
`203`	`204`	`}`
`204`	`205`
`205`	`206`	`out_[out_iter_offset] = red_val;`
`@@ -452,9 +453,9 @@ struct StridedBooleanReduction`
`452`	`453`	`// must convert to boolean first to handle nans`
`453`	`454`	`using dpctl::tensor::type_utils::convert_impl;`
`454`	`455`	`bool val = convert_impl<bool, argT>(inp_[inp_offset]);`
	`456`	`+ ReductionOp op = reduction_op_;`
`455`	`457`
`456`		`- local_red_val =`
`457`		`- reduction_op_(local_red_val, static_cast<outT>(val));`
	`458`	`+ local_red_val = op(local_red_val, static_cast<outT>(val));`
`458`	`459`	`}`
`459`	`460`	`}`
`460`	`461`	`// reduction and atomic operations are performed`