updating official name and public url; use LLVM compile defs (#104)

fschlimb · web-flow · commit 24a9666c061c · 2024-04-10T13:14:54.000+02:00
* updating official name and public url; use LLVM compile defs
* using older conda/build
* adding sysroot to runtime deps
* fixing out-of-bounds access in accumulate
* fixing invalid conditions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -46,7 +46,7 @@ jobs:
           bash $CPKG -u -b -f -p $CONDA_ROOT
           export PATH=$CONDA_ROOT/condabin:$CONDA_ROOT/bin:${PATH}
           eval "$($CONDA_ROOT/bin/python -m conda shell.bash hook)"
-          conda install -c conda-forge --override-channels python git-lfs conda-build
+          conda install -c conda-forge --override-channels python git-lfs 'conda-build<=24.1.2'
           # aahhhh bug in conda somewhere
           sed -i "s,\#\!/usr/bin/env python,#\!$CONDA_ROOT/bin/python," $CONDA_ROOT/*bin/conda
           conda clean --all -y
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -43,6 +43,7 @@ message(STATUS "Expected IMEX sha: \"${EXPECTED_IMEX_SHA}\"")
 # find_package(ZLIB REQUIRED)
 find_library(LIBZ z NAMES libz.so libz.so.1 REQUIRED PATHS ${ZLIB_ROOT}/lib)
 message("FOUND zlib ${LIBZ}")
+set(ZLIB_LIBRARY ${LIBZ})
 find_package(TBB REQUIRED)
 find_package(Python3 COMPONENTS Interpreter Development.Module NumPy REQUIRED)
 find_package(pybind11 CONFIG REQUIRED)
@@ -64,9 +65,11 @@ list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
 #include(AddMLIR)
 #include(AddIMEX)
 
+# LLVM defs, most impoprtatntly __STDC_FORMAT_MACROS for PRIxPTR et al
+message(STATUS "LLVM_DEFINITIONS: ${LLVM_DEFINITIONS}")
+add_definitions(${LLVM_DEFINITIONS})
 # macro for mlir/imex root directories
-# __STDC_FORMAT_MACROS for PRIxPTR et al
-add_compile_definitions(CMAKE_MLIR_ROOT="${MLIR_ROOT}" CMAKE_IMEX_ROOT="${IMEX_ROOT}" FORTIFY_SOURCE=2 __STDC_FORMAT_MACROS=1)
+add_compile_definitions(CMAKE_MLIR_ROOT="${MLIR_ROOT}" CMAKE_IMEX_ROOT="${IMEX_ROOT}" FORTIFY_SOURCE=2)
 add_compile_options(
     "-ftemplate-backtrace-limit=0"
     "$<$<STREQUAL:${CMAKE_CXX_COMPILER_ID},GNU>:-flarge-source-files>"
diff --git a/README.md b/README.md
@@ -1,19 +1,19 @@
-[![.github/workflows/ci.yml](https://github.com/intel-sandbox/sharpy/actions/workflows/ci.yml/badge.svg)](https://github.com/intel-sandbox/sharpy/actions/workflows/ci.yml)
+[![.github/workflows/ci.yml](https://github.com/IntelPython/sharded-array-for-python/actions/workflows/ci.yml/badge.svg)](https://github.com/IntelPython/sharded-array-for-python/actions/workflows/ci.yml)
 
 ***This software package is not ready for production use and and merely a proof of concept implementation.***
 
-# Distributed Python Array
+# Sharded Array For Python
 
 A array implementation following the [array API as defined by the data-API consortium](https://data-apis.org/array-api/latest/index.html).
 Parallel and distributed execution currently is MPI/CSP-like. In a later version support for a controller-worker execution model will be added.
 
 ## Setting up build environment
 
-Install MLIR/LLVM and IMEX (see https://github.com/intel-innersource/frameworks.ai.mlir.mlir-extensions).
+Install MLIR/LLVM and Intel® Extension for MLIR (IMEX, see https://github.com/intel/mlir-extensions).
 
 ```bash
-git clone --recurse-submodules https://github.com/intel-sandbox/sharpy
-cd sharpy
+git clone --recurse-submodules https://github.com/IntelPython/sharded-array-for-python
+cd sharded-array-for-python
 git checkout jit
 conda create --file conda-env.txt --name sharpy
 conda activate sharpy
@@ -22,7 +22,7 @@ export MLIRROOT=<your-MLIR-install-dir>
 export IMEXROOT=<your-IMEX-install-dir>
 ```
 
-## Building sharpy
+## Building Sharded Array For Python
 
 ```bash
 python -m pip install .
@@ -98,7 +98,7 @@ pre-commit autoupdate
 
 ### Deferred Execution
 
-Typically, sharpy operations do not get executed immediately. Instead, the function returns a transparent object (a future) only.
+Typically, operations do not get executed immediately. Instead, the function returns a transparent object (a future) only.
 The actual computation gets deferred by creating a promise/deferred object and queuing it for later. This is not visible to users, they can use it as any other numpy-like library.
 
 Only when actual data is needed, computation will happen; that is when
@@ -112,17 +112,17 @@ In the background a worker thread handles deferred objects. Until computation is
 
 Arrays and operations on them get transparently distributed across multiple processes. Respective functionality is partly handled by this library and partly IMEX dist dialect.
 IMEX relies on a runtime library for complex communication tasks and for inspecting runtime configuration, such as number of processes and process id (MPI rank).
-sharpy provides this library functionality in a separate dynamic library "idtr".
+Sharded Array For Python provides this library functionality in a separate dynamic library "idtr".
 
 Right now, data is split in the first dimension (only). Each process knows the partition it owns. For optimization partitions can actually overlap.
 
-sharpy currently supports one execution mode: CSP/SPMD/explicitly-distributed execution, meaning all processes execute the same program, execution is replicated on all processes. Data is typically not replicated but distributed among processes. The distribution is handled automatically by sharpy, all operations on sharpy arrays can be viewed as collective operations.
+Sharded Array For Python currently supports one execution mode: CSP/SPMD/explicitly-distributed execution, meaning all processes execute the same program, execution is replicated on all processes. Data is typically not replicated but distributed among processes. The distribution is handled automatically, all operations on Sharded Arrays For Python can be viewed as collective operations.
 
 Later, we'll add a Controller-Worker/implicitly-distributed execution mode, meaning only a single process executes the program and it distributes data and work to worker processes.
 
 ### Array API Coverage
 
-Currently only a subset of the Array API is covered by sharpy
+Currently only a subset of the Array API is covered by Sharded Array For Python
 
 - elementwise binary operations
 - elementwise unary operations
@@ -135,18 +135,18 @@ Currently only a subset of the Array API is covered by sharpy
 
 ### Other Functionality
 
-- `sharpy.to_numpy` converts a sharpy array into a numpy array.
-- `sharpy.numpy.from_function` allows creating a sharpy array from a function (similar to numpy)
-- In addition to the Array API sharpy also provides functionality facilitating interacting with sharpy arrays in a distributed environment.
+- `sharpy.to_numpy` converts a sharded array into a numpy array.
+- `sharpy.numpy.from_function` allows creating a sharded array from a function (similar to numpy)
+- In addition to the Array API Sharded Array For Python also provides functionality facilitating interacting with sharded arrays in a distributed environment.
   - `sharpy.spmd.gather` gathers the distributed array and forms a single, local and contiguous copy of the data as a numpy array
   - `sharpy.spmd.get_locals` return the local part of the distributed array as a numpy array
-- sharpy allows providing a fallback array implementation. By setting SHARPY_FALLBACK to a python package it will call that package if a given function is not provided by sharpy. It will pass sharpy arrays as (gathered) numpy-arrays.
+- sharpy allows providing a fallback array implementation. By setting SHARPY_FALLBACK to a python package it will call that package if a given function is not provided. It will pass sharded arrays as (gathered) numpy-arrays.
 
 ## Environment variables
 
 ### Compile time variables
 
-Required to compile `sharpy`:
+Required to compile Sharded Array For Python:
 
 - `MLIRROOT`: Set path to MLIR install root.
 - `IMEXROOT`: Set path to Intel MLIR Extensions install root.
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
@@ -37,6 +37,7 @@ requirements:
     - tbb
     - impi_rt
     - mpi4py
+    - sysroot_linux-64 >=2.28  # [linux]
 
 build:
   number: 0
diff --git a/src/CollComm.cpp b/src/CollComm.cpp
@@ -42,8 +42,8 @@ void gather_array(NDArray::ptr_type a_ptr, rank_type root, void *outPtr) {
   auto myoff = a_ptr->local_offsets()[0];
   auto nd = a_ptr->ndims();
   auto gshape = a_ptr->shape();
-  auto myTileSz =
-      std::accumulate(&gshape[1], &gshape[nd], 1, std::multiplies<int64_t>());
+  auto myTileSz = std::accumulate(&gshape.data()[1], &gshape.data()[nd], 1,
+                                  std::multiplies<int64_t>());
 
   // allgather process local offset and sizes
   std::vector<int> displacements(nranks);
diff --git a/src/idtr.cpp b/src/idtr.cpp
@@ -691,7 +691,7 @@ UHCache getMetaData(SHARPY::rank_type nworkers, int64_t ndims,
         // target is rightHalo
         if (cE._bufferizeSend) {
           cE._rSendOff[i] = i ? cE._rSendOff[i - 1] + cE._rSendSize[i - 1] : 0;
-          if (cE._rSendOff[i] < cE._rSendOff[i - 1]) {
+          if (i && cE._rSendOff[i] < cE._rSendOff[i - 1]) {
             throw std::overflow_error("Fatal: Integer overflow in getMetaData");
           }
           cE._rBufferStart[i * ndims] = localRowStart;
@@ -709,7 +709,7 @@ UHCache getMetaData(SHARPY::rank_type nworkers, int64_t ndims,
         // target is leftHalo
         if (cE._bufferizeSend) {
           cE._lSendOff[i] = i ? cE._lSendOff[i - 1] + cE._lSendSize[i - 1] : 0;
-          if (cE._lSendOff[i] < cE._lSendOff[i - 1]) {
+          if (i && cE._lSendOff[i] < cE._lSendOff[i - 1]) {
             throw std::overflow_error("Fatal: Integer overflow in getMetaData");
           }
           cE._lBufferStart[i * ndims] = localRowStart;