diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index 2184ddd49537b..9e483612bc994 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -158,6 +158,7 @@ jobs: 'generic-no-wide-characters', 'generic-no-rtti', 'generic-optimized-speed', + 'generic-pstl-openmp', 'generic-static', 'bootstrapping-build' ] diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index abe12c2805a7c..dee2a75f74d89 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -300,10 +300,11 @@ option(LIBCXX_HAS_EXTERNAL_THREAD_API This option may only be set to ON when LIBCXX_ENABLE_THREADS=ON." OFF) if (LIBCXX_ENABLE_THREADS) - set(LIBCXX_PSTL_BACKEND "std_thread" CACHE STRING "Which PSTL backend to use") + set(LIBCXX_PSTL_BACKEND_DEFAULT "std_thread") else() - set(LIBCXX_PSTL_BACKEND "serial" CACHE STRING "Which PSTL backend to use") + set(LIBCXX_PSTL_BACKEND_DEFAULT "serial") endif() +set(LIBCXX_PSTL_BACKEND "${LIBCXX_PSTL_BACKEND_DEFAULT}" CACHE STRING "Select the PSTL backend to use. Valid values are serial, std-thread, libdispatch, openmp. Default: ${LIBCXX_PSTL_BACKEND_DEFAULT}") # Misc options ---------------------------------------------------------------- # FIXME: Turn -pedantic back ON. It is currently off because it warns @@ -552,6 +553,11 @@ function(cxx_add_basic_build_flags target) endif() endif() target_compile_options(${target} PUBLIC "${LIBCXX_ADDITIONAL_COMPILE_FLAGS}") + + # If the PSTL backend depends on OpenMP, we must enable the OpenMP tool chain + if (LIBCXX_PSTL_BACKEND STREQUAL "openmp") + target_add_compile_flags_if_supported(${target} PUBLIC -fopenmp) + endif() endfunction() # Exception flags ============================================================= @@ -784,6 +790,8 @@ elseif(LIBCXX_PSTL_BACKEND STREQUAL "std_thread") config_define(1 _LIBCPP_PSTL_BACKEND_STD_THREAD) elseif(LIBCXX_PSTL_BACKEND STREQUAL "libdispatch") config_define(1 _LIBCPP_PSTL_BACKEND_LIBDISPATCH) +elseif (LIBCXX_PSTL_BACKEND STREQUAL "openmp") + config_define(1 _LIBCPP_PSTL_BACKEND_OPENMP) else() message(FATAL_ERROR "LIBCXX_PSTL_BACKEND is set to ${LIBCXX_PSTL_BACKEND}, which is not a valid backend. Valid backends are: serial, std_thread and libdispatch") diff --git a/libcxx/cmake/caches/Generic-pstl-openmp.cmake b/libcxx/cmake/caches/Generic-pstl-openmp.cmake new file mode 100644 index 0000000000000..f3ff4f3b57fd2 --- /dev/null +++ b/libcxx/cmake/caches/Generic-pstl-openmp.cmake @@ -0,0 +1 @@ +set(LIBCXX_PSTL_BACKEND openmp CACHE STRING "") diff --git a/libcxx/docs/UserDocumentation.rst b/libcxx/docs/UserDocumentation.rst index 2c1bc1373659c..f1e7b19ead579 100644 --- a/libcxx/docs/UserDocumentation.rst +++ b/libcxx/docs/UserDocumentation.rst @@ -1,17 +1,19 @@ -.. _user-documentation: +.. _using-libcxx: -================== -User documentation -================== +============ +Using libc++ +============ .. contents:: :local: +Usually, libc++ is packaged and shipped by a vendor through some delivery vehicle +(operating system distribution, SDK, toolchain, etc) and users don't need to do +anything special in order to use the library. + This page contains information about configuration knobs that can be used by users when they know libc++ is used by their toolchain, and how to use libc++ -when it is not the default library used by their toolchain. It is aimed at -users of libc++: a separate page contains documentation aimed at vendors who -build and ship libc++ as part of their toolchain. +when it is not the default library used by their toolchain. Using a different version of the C++ Standard @@ -26,29 +28,10 @@ matches that Standard in the library. $ clang++ -std=c++17 test.cpp -Note that using ``-std=c++XY`` with a version of the Standard that has not been ratified -yet is considered unstable. While we strive to maintain stability, libc++ may be forced to -make breaking changes to features shipped in a Standard that hasn't been ratified yet. Use -these versions of the Standard at your own risk. - - -Using libc++ when it is not the system default -============================================== - -Usually, libc++ is packaged and shipped by a vendor through some delivery vehicle -(operating system distribution, SDK, toolchain, etc) and users don't need to do -anything special in order to use the library. - -On systems where libc++ is provided but is not the default, Clang provides a flag -called ``-stdlib=`` that can be used to decide which standard library is used. -Using ``-stdlib=libc++`` will select libc++: - -.. code-block:: bash - - $ clang++ -stdlib=libc++ test.cpp - -On systems where libc++ is the library in use by default such as macOS and FreeBSD, -this flag is not required. +.. warning:: + Using ``-std=c++XY`` with a version of the Standard that has not been ratified yet + is considered unstable. Libc++ reserves the right to make breaking changes to the + library until the standard has been ratified. Enabling experimental C++ Library features @@ -60,19 +43,15 @@ the Standard but whose implementation is not complete or stable yet in libc++. T are disabled by default because they are neither API nor ABI stable. However, the ``-fexperimental-library`` compiler flag can be defined to turn those features on. -On compilers that do not support the ``-fexperimental-library`` flag (such as GCC), -users can define the ``_LIBCPP_ENABLE_EXPERIMENTAL`` macro and manually link against -the appropriate static library (usually shipped as ``libc++experimental.a``) to get -access to experimental library features. - The following features are currently considered experimental and are only provided when ``-fexperimental-library`` is passed: * The parallel algorithms library (```` and the associated algorithms) +* ``std::stop_token``, ``std::stop_source`` and ``std::stop_callback`` +* ``std::jthread`` * ``std::chrono::tzdb`` and related time zone functionality -* ```` -.. note:: +.. warning:: Experimental libraries are experimental. * The contents of the ```` headers and the associated static library will not remain compatible between versions. @@ -81,18 +60,98 @@ when ``-fexperimental-library`` is passed: the experimental feature is removed two releases after the non-experimental version has shipped. The full policy is explained :ref:`here `. +.. note:: + On compilers that do not support the ``-fexperimental-library`` flag, users can + define the ``_LIBCPP_ENABLE_EXPERIMENTAL`` macro and manually link against the + appropriate static library (usually shipped as ``libc++experimental.a``) to get + access to experimental library features. -Libc++ Configuration Macros + +Using libc++ when it is not the system default +============================================== + +On systems where libc++ is provided but is not the default, Clang provides a flag +called ``-stdlib=`` that can be used to decide which standard library is used. +Using ``-stdlib=libc++`` will select libc++: + +.. code-block:: bash + + $ clang++ -stdlib=libc++ test.cpp + +On systems where libc++ is the library in use by default such as macOS and FreeBSD, +this flag is not required. + + +.. _alternate libcxx: + +Using a custom built libc++ =========================== -Libc++ provides a number of configuration macros that can be used by developers to -enable or disable extended libc++ behavior. +Most compilers provide a way to disable the default behavior for finding the +standard library and to override it with custom paths. With Clang, this can +be done with: -.. warning:: - Configuration macros that are not documented here are not intended to be customized - by developers and should not be used. In particular, some configuration macros are - only intended to be used by vendors and changing their value from the one provided - in your toolchain can lead to unexpected behavior. +.. code-block:: bash + + $ clang++ -nostdinc++ -nostdlib++ \ + -isystem /include/c++/v1 \ + -L /lib \ + -Wl,-rpath,/lib \ + -lc++ \ + test.cpp + +The option ``-Wl,-rpath,/lib`` adds a runtime library search path, +which causes the system's dynamic linker to look for libc++ in ``/lib`` +whenever the program is loaded. + +GCC does not support the ``-nostdlib++`` flag, so one must use ``-nodefaultlibs`` +instead. Since that removes all the standard system libraries and not just libc++, +the system libraries must be re-added manually. For example: + +.. code-block:: bash + + $ g++ -nostdinc++ -nodefaultlibs \ + -isystem /include/c++/v1 \ + -L /lib \ + -Wl,-rpath,/lib \ + -lc++ -lc++abi -lm -lc -lgcc_s -lgcc \ + test.cpp + + +GDB Pretty printers for libc++ +============================== + +GDB does not support pretty-printing of libc++ symbols by default. However, libc++ does +provide pretty-printers itself. Those can be used as: + +.. code-block:: bash + + $ gdb -ex "source /utils/gdb/libcxx/printers.py" \ + -ex "python register_libcxx_printer_loader()" \ + + +.. _include-what-you-use: + +include-what-you-use (IWYU) +=========================== + +libc++ provides an IWYU `mapping file `_, +which drastically improves the accuracy of the tool when using libc++. To use the mapping file with +IWYU, you should run the tool like so: + +.. code-block:: bash + + $ include-what-you-use -Xiwyu --mapping_file=/path/to/libcxx/include/libcxx.imp file.cpp + +If you would prefer to not use that flag, then you can replace ``/path/to/include-what-you-use/share/libcxx.imp`` +file with the libc++-provided ``libcxx.imp`` file. + +Libc++ Configuration Macros +=========================== + +Libc++ provides a number of configuration macros which can be used to enable +or disable extended libc++ behavior, including enabling hardening or thread +safety annotations. **_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS**: This macro is used to enable -Wthread-safety annotations on libc++'s @@ -134,12 +193,6 @@ enable or disable extended libc++ behavior. warning saying that `std::auto_ptr` is deprecated. If the macro is defined, no warning will be emitted. By default, this macro is not defined. -**_LIBCPP_ENABLE_EXPERIMENTAL**: - This macro enables experimental features. This can be used on compilers that do - not support the ``-fexperimental-library`` flag. When used, users also need to - ensure that the appropriate experimental library (usually ``libc++experimental.a``) - is linked into their program. - C++17 Specific Configuration Macros ----------------------------------- **_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR**: @@ -156,18 +209,12 @@ C++17 Specific Configuration Macros **_LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE**: This macro is used to re-enable the `random_shuffle` algorithm. -**_LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION**: - This macro is used to re-enable `unary_function` and `binary_function`. - **_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS**: This macro is used to re-enable `set_unexpected`, `get_unexpected`, and `unexpected`. C++20 Specific Configuration Macros ----------------------------------- -**_LIBCPP_ENABLE_CXX20_REMOVED_UNCAUGHT_EXCEPTION**: - This macro is used to re-enable `uncaught_exception`. - **_LIBCPP_ENABLE_CXX20_REMOVED_SHARED_PTR_UNIQUE**: This macro is used to re-enable the function ``std::shared_ptr<...>::unique()``. @@ -184,9 +231,6 @@ C++20 Specific Configuration Macros **_LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR**: This macro is used to re-enable `raw_storage_iterator`. -**_LIBCPP_ENABLE_CXX20_REMOVED_TEMPORARY_BUFFER**: - This macro is used to re-enable `get_temporary_buffer` and `return_temporary_buffer`. - **_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS**: This macro is used to re-enable `is_literal_type`, `is_literal_type_v`, `result_of` and `result_of_t`. @@ -263,7 +307,7 @@ Extensions to the C++23 modules ``std`` and ``std.compat`` ---------------------------------------------------------- Like other major implementations, libc++ provides C++23 modules ``std`` and -``std.compat`` in C++20 as an extension. +``std.compat`` in C++20 as an extension" Constant-initialized std::string -------------------------------- @@ -320,14 +364,109 @@ Unpoisoning may not be an option, if (for example) you are not maintaining the a * You are using allocator, which does not call destructor during deallocation. * You are aware that memory allocated with an allocator may be accessed, even when unused by container. -Support for compiler extensions -------------------------------- +Offloading C++ Parallel Algorithms to GPUs +------------------------------------------ + +Experimental support for GPU offloading has been added to ``libc++``. The +implementation uses OpenMP target offloading to leverage GPU compute resources. +The OpenMP PSTL backend can target both NVIDIA and AMD GPUs. +However, the implementation only supports contiguous iterators, such as +iterators for ``std::vector`` or ``std::array``. +To enable the OpenMP offloading backend it must be selected with +``LIBCXX_PSTL_BACKEND=openmp`` when installing ``libc++``. Further, when +compiling a program, the user must specify the command line options +``-fopenmp -fexperimental-library``. To install LLVM with OpenMP offloading +enabled, please read +`the LLVM OpenMP FAQ. `_ +You may also want to to visit +`the OpenMP offloading command-line argument reference. `_ + +Example +~~~~~~~ + +The following is an example of offloading vector addition to a GPU using our +standard library extension. It implements the classical vector addition from +BLAS that overwrites the vector ``y`` with ``y=a*x+y``. Thus ``y.begin()`` is +both used as an input and an output iterator in this example. + +.. code-block:: cpp + + #include + #include + + template + void axpy(const T1 a, const std::vector &x, std::vector &y) { + std::transform(std::execution::par_unseq, x.begin(), x.end(), y.begin(), + y.begin(), [=](T2 xi, T3 yi) { return a * xi + yi; }); + } -Clang, GCC and other compilers all provide their own set of language extensions. These extensions -have often been developed without particular consideration for their interaction with the library, -and as such, libc++ does not go out of its way to support them. The library may support specific -compiler extensions which would then be documented explicitly, but the basic expectation should be -that no special support is provided for arbitrary compiler extensions. +The execution policy ``std::execution::par_unseq`` states that the algorithm's +execution may be parallelized, vectorized, and migrated across threads. This is +the only execution mode that is safe to offload to GPUs, and for all other +execution modes the algorithms will execute on the CPU. +Special attention must be paid to the lambda captures when enabling GPU +offloading. If the lambda captures by reference, the user must manually map the +variables to the device. If capturing by reference, the above example could +be implemented in the following way. + +.. code-block:: cpp + + template + void axpy(const T1 a, const std::vector &x, std::vector &y) { + #pragma omp target data map(to : a) + std::transform(std::execution::par_unseq, x.begin(), x.end(), y.begin(), + y.begin(), [&](T2 xi, T3 yi) { return a * xi + yi; }); + } + +However, if unified shared memory, USM, is enabled, no additional data mapping +is necessary when capturing y reference. + +Compiling functions for GPUs with OpenMP +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The C++ standard defines that all accesses to memory are inside a single address +space. However, discrete GPU systems have distinct address spaces. A single +address space can be emulated if your system supports unified shared memory. +However, many discrete GPU systems do not, and in those cases it is important to +pass device function pointers to the parallel algorithms. Below is an example of +how the OpenMP ``declare target`` directive with the ``indirect`` clause can be +used to mark that a function should be compiled for both host and device. + +.. code-block:: cpp + + // This function computes the squared difference of two floating points + float squared(float a, float b) { return a * a - 2.0f * a * b + b * b; }; + + // Declare that the function must be compiled for both host and device + #pragma omp declare target indirect to(squared) + + int main() { + std::vector a(100, 1.0); + std::vector b(100, 1.25); + + // Pass the host function pointer to the parallel algorithm and let OpenMP + // translate it to the device function pointer internally + float sum = + std::transform_reduce(std::execution::par_unseq, a.begin(), a.end(), + b.begin(), 0.0f, std::plus{}, squared); + + // Validate that the result is approximately 6.25 + assert(std::abs(sum - 6.25f) < 1e-10); + return 0; + } + +Without unified shared memory, the above example will not work if the host +function pointer ``squared`` is passed to the parallel algorithm. + +Important notes about exception handling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +GPU architectures do not support exception handling. If compiling a program +containing parallel algorithms with current versions of Clang, a program with +exceptions in offloaded code regions will compile, but the program will +terminate if an exception is thrown on the device. This does not conform with +the C++ standard and exception handling on GPUs will hopefully be better +supported in future releases of LLVM. Platform specific behavior ========================== @@ -351,67 +490,3 @@ specific locale is imbued, the IO with the underlying stream happens with regular ``char`` elements, which are converted to/from wide characters according to the locale. Note that this doesn't behave as expected if the stream has been set in Unicode mode. - - -Third-party Integrations -======================== - -Libc++ provides integration with a few third-party tools. - -Debugging libc++ internals in LLDB ----------------------------------- - -LLDB hides the implementation details of libc++ by default. - -E.g., when setting a breakpoint in a comparator passed to ``std::sort``, the -backtrace will read as - -.. code-block:: - - (lldb) thread backtrace - * thread #1, name = 'a.out', stop reason = breakpoint 3.1 - * frame #0: 0x000055555555520e a.out`my_comparator(a=1, b=8) at test-std-sort.cpp:6:3 - frame #7: 0x0000555555555615 a.out`void std::__1::sort[abi:ne200000], bool (*)(int, int)>(__first=(item = 8), __last=(item = 0), __comp=(a.out`my_less(int, int) at test-std-sort.cpp:5)) at sort.h:1003:3 - frame #8: 0x000055555555531a a.out`main at test-std-sort.cpp:24:3 - -Note how the caller of ``my_comparator`` is shown as ``std::sort``. Looking at -the frame numbers, we can see that frames #1 until #6 were hidden. Those frames -represent internal implementation details such as ``__sort4`` and similar -utility functions. - -To also show those implementation details, use ``thread backtrace -u``. -Alternatively, to disable those compact backtraces, use ``frame recognizer list`` -and ``frame recognizer disable`` on the "libc++ frame recognizer". - -Futhermore, stepping into libc++ functions is disabled by default. This is controlled via the -setting ``target.process.thread.step-avoid-regexp`` which defaults to ``^std::`` and can be -disabled using ``settings set target.process.thread.step-avoid-regexp ""``. - -GDB Pretty printers for libc++ ------------------------------- - -GDB does not support pretty-printing of libc++ symbols by default. However, libc++ does -provide pretty-printers itself. Those can be used as: - -.. code-block:: bash - - $ gdb -ex "source /utils/gdb/libcxx/printers.py" \ - -ex "python register_libcxx_printer_loader()" \ - - - -.. _include-what-you-use: - -include-what-you-use (IWYU) ---------------------------- - -libc++ provides an IWYU `mapping file `_, -which drastically improves the accuracy of the tool when using libc++. To use the mapping file with -IWYU, you should run the tool like so: - -.. code-block:: bash - - $ include-what-you-use -Xiwyu --mapping_file=/path/to/libcxx/include/libcxx.imp file.cpp - -If you would prefer to not use that flag, then you can replace ``/path/to/include-what-you-use/share/libcxx.imp`` -file with the libc++-provided ``libcxx.imp`` file. diff --git a/libcxx/docs/VendorDocumentation.rst b/libcxx/docs/VendorDocumentation.rst index 959a28607d75d..5727005e24fbd 100644 --- a/libcxx/docs/VendorDocumentation.rst +++ b/libcxx/docs/VendorDocumentation.rst @@ -1,17 +1,19 @@ -.. _VendorDocumentation: +.. _BuildingLibcxx: -==================== -Vendor Documentation -==================== +=============== +Building libc++ +=============== .. contents:: :local: +.. _build instructions: + The instructions on this page are aimed at vendors who ship libc++ as part of an operating system distribution, a toolchain or similar shipping vehicles. If you are a user merely trying to use libc++ in your program, you most likely want to -refer to your vendor's documentation, or to the general user documentation -:ref:`here `. +refer to your vendor's documentation, or to the general documentation for using +libc++ :ref:`here `. .. warning:: If your operating system already provides libc++, it is important to be careful @@ -40,37 +42,21 @@ with the following CMake invocation: $ ninja -C build install-cxx install-cxxabi install-unwind # Install .. note:: - See :ref:`Vendor Configuration Options` below for more configuration options. + See :ref:`CMake Options` below for more configuration options. After building the various ``install-XXX`` targets, shared libraries for libc++, libc++abi and libunwind should now be present in ``/lib``, and headers in -``/include/c++/v1``. See the instructions below for information on how -to use this libc++ over the default one. +``/include/c++/v1``. See :ref:`using an alternate libc++ installation +` for information on how to use this libc++ over the default one. In the default configuration, the runtimes will be built using the compiler available by default on your system. Of course, you can change what compiler is being used with the usual CMake variables. If you wish to build the runtimes from a just-built Clang, the bootstrapping build explained below makes this task easy. -Using the just-built libc++ ---------------------------- - -Most compilers provide a way to disable the default behavior for finding the standard library and -to override it with custom paths. With Clang, this can be done with: - -.. code-block:: bash - - $ clang++ -nostdinc++ -isystem /include/c++/v1 \ - -nostdlib++ -L /lib -lc++ \ - -Wl,-rpath,/lib \ - test.cpp -The option ``-Wl,-rpath,/lib`` adds a runtime library search path, which causes the system's -dynamic linker to look for libc++ in ``/lib`` whenever the program is loaded. - - -The Bootstrapping build -======================= +Bootstrapping build +=================== It is possible to build Clang and then build the runtimes using that just-built compiler in a single CMake invocation. This is usually the correct way to build the runtimes when putting together @@ -89,29 +75,123 @@ CMake invocation at ``/llvm``: $ ninja -C build install-runtimes # Install .. note:: - - This type of build is also commonly called a "Runtimes build", but we would like to move - away from that terminology, which is too confusing. + This type of build is also commonly called a "Runtimes build", but we would like to move + away from that terminology, which is too confusing. - - Adding the `--fresh` flag to the top-level cmake invocation in a bootstrapping build *will not* - freshen the cmake cache of any of the enabled runtimes. +.. warning:: + Adding the `--fresh` flag to the top-level cmake invocation in a bootstrapping build *will not* + freshen the cmake cache of any of the enabled runtimes. +Support for Windows +=================== -.. _Vendor Configuration Options: +libcxx supports being built with clang-cl, but not with MSVC's cl.exe, as +cl doesn't support the ``#include_next`` extension. Furthermore, VS 2017 or +newer (19.14) is required. -Vendor Configuration Options -============================ +libcxx also supports being built with clang targeting MinGW environments. -This section documents configuration options that can be used by vendors when building the library. -These options provide a great deal of flexibility to customize libc++, such as selecting the ABI in -use, whether some features are provided, etc. +CMake + Visual Studio +--------------------- -.. warning:: - Many of these CMake options are tied to configuration macros with a corresponding name in the source - code. However, these configuration macros are not intended to be customized by users directly, since - many of them require the library to be built with a matching configuration. If you don't build libc++ - yourself, you should not use the options documented here. +Building with Visual Studio currently does not permit running tests. However, +it is the simplest way to build. + +.. code-block:: batch + + > cmake -G "Visual Studio 16 2019" -S runtimes -B build ^ + -T "ClangCL" ^ + -DLLVM_ENABLE_RUNTIMES=libcxx ^ + -DLIBCXX_ENABLE_SHARED=YES ^ + -DLIBCXX_ENABLE_STATIC=NO + > cmake --build build + +CMake + ninja (MSVC) +-------------------- + +Building with ninja is required for development to enable tests. +A couple of tests require Bash to be available, and a couple dozens +of tests require other posix tools (cp, grep and similar - LLVM's tests +require the same). Without those tools the vast majority of tests +can still be ran successfully. + +If Git for Windows is available, that can be used to provide the bash +shell by adding the right bin directory to the path, e.g. +``set PATH=%PATH%;C:\Program Files\Git\usr\bin``. + +Alternatively, one can also choose to run the whole build in a MSYS2 +shell. That can be set up e.g. by starting a Visual Studio Tools Command +Prompt (for getting the environment variables pointing to the headers and +import libraries), and making sure that clang-cl is available in the +path. From there, launch an MSYS2 shell via e.g. +``C:\msys64\msys2_shell.cmd -full-path -mingw64`` (preserving the earlier +environment, allowing the MSVC headers/libraries and clang-cl to be found). + +In either case, then run: + +.. code-block:: batch + + > cmake -G Ninja -S runtimes -B build ^ + -DCMAKE_C_COMPILER=clang-cl ^ + -DCMAKE_CXX_COMPILER=clang-cl ^ + -DLLVM_ENABLE_RUNTIMES=libcxx + > ninja -C build cxx + > ninja -C build check-cxx + +If you are running in an MSYS2 shell and you have installed the +MSYS2-provided clang package (which defaults to a non-MSVC target), you +should add e.g. ``-DCMAKE_CXX_COMPILER_TARGET=x86_64-windows-msvc`` (replacing +``x86_64`` with the architecture you're targeting) to the ``cmake`` command +line above. This will instruct ``check-cxx`` to use the right target triple +when invoking ``clang++``. + +CMake + ninja (MinGW) +--------------------- + +libcxx can also be built in MinGW environments, e.g. with the MinGW +compilers in MSYS2. This requires clang to be available (installed with +e.g. the ``mingw-w64-x86_64-clang`` package), together with CMake and ninja. + +.. code-block:: bash + + > cmake -G Ninja -S runtimes -B build \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DLLVM_ENABLE_LLD=ON \ + -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi" \ + -DLIBCXXABI_ENABLE_SHARED=OFF \ + -DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON + > ninja -C build cxx + > ninja -C build check-cxx + +.. _`libc++abi`: http://libcxxabi.llvm.org/ + + +.. _CMake Options: + +CMake Options +============= -General purpose options +Here are some of the CMake variables that are used often, along with a +brief explanation and LLVM-specific notes. For full documentation, check the +CMake docs or execute ``cmake --help-variable VARIABLE_NAME``. + +**CMAKE_BUILD_TYPE**:STRING + Sets the build type for ``make`` based generators. Possible values are + Release, Debug, RelWithDebInfo and MinSizeRel. On systems like Visual Studio + the user sets the build type with the IDE settings. + +**CMAKE_INSTALL_PREFIX**:PATH + Path where LLVM will be installed if "make install" is invoked or the + "INSTALL" target is built. + +**CMAKE_CXX_COMPILER**:STRING + The C++ compiler to use when building and testing libc++. + + +.. _libcxx-specific options: + +libc++ specific options ----------------------- .. option:: LIBCXX_INSTALL_LIBRARY:BOOL @@ -213,13 +293,11 @@ General purpose options Output name for the shared libc++ runtime library. -.. option:: {LIBCXX,LIBCXXABI,LIBUNWIND}_ADDITIONAL_COMPILE_FLAGS:STRING +.. option:: LIBCXX_ADDITIONAL_COMPILE_FLAGS:STRING **Default**: ``""`` - Additional compile flags to use when building the runtimes. This should be a CMake ``;``-delimited list of individual - compiler options to use. For options that must be passed as-is to the compiler without deduplication (e.g. - ``-Xclang -foo`` option groups), consider using ``SHELL:`` as `documented here `_. + Additional Compile only flags which can be provided in cache. .. option:: LIBCXX_ADDITIONAL_LIBRARIES:STRING @@ -227,6 +305,65 @@ General purpose options Additional libraries libc++ is linked to which can be provided in cache. + +.. _ABI Library Specific Options: + +ABI Library Specific Options +---------------------------- + +.. option:: LIBCXX_CXX_ABI:STRING + + **Values**: ``none``, ``libcxxabi``, ``system-libcxxabi``, ``libcxxrt``, ``libstdc++``, ``libsupc++``, ``vcruntime``. + + Select the ABI library to build libc++ against. + +.. option:: LIBCXX_CXX_ABI_INCLUDE_PATHS:PATHS + + Provide additional search paths for the ABI library headers. + +.. option:: LIBCXX_CXX_ABI_LIBRARY_PATH:PATH + + Provide the path to the ABI library that libc++ should link against. This is only + useful when linking against an out-of-tree ABI library. + +.. option:: LIBCXX_ENABLE_STATIC_ABI_LIBRARY:BOOL + + **Default**: ``OFF`` + + If this option is enabled, libc++ will try and link the selected ABI library + statically. + +.. option:: LIBCXX_ENABLE_ABI_LINKER_SCRIPT:BOOL + + **Default**: ``ON`` by default on UNIX platforms other than Apple unless + 'LIBCXX_ENABLE_STATIC_ABI_LIBRARY' is ON. Otherwise the default value is ``OFF``. + + This option generate and installs a linker script as ``libc++.so`` which + links the correct ABI library. + +.. option:: LIBCXXABI_USE_LLVM_UNWINDER:BOOL + + **Default**: ``ON`` + + Build and use the LLVM unwinder. Note: This option can only be used when + libc++abi is the C++ ABI library used. + +.. option:: LIBCXXABI_ADDITIONAL_COMPILE_FLAGS:STRING + + **Default**: ``""`` + + Additional Compile only flags which can be provided in cache. + +.. option:: LIBCXXABI_ADDITIONAL_LIBRARIES:STRING + + **Default**: ``""`` + + Additional libraries libc++abi is linked to which can be provided in cache. + + +libc++ Feature Options +---------------------- + .. option:: LIBCXX_ENABLE_EXCEPTIONS:BOOL **Default**: ``ON`` @@ -244,8 +381,7 @@ General purpose options **Default**: ``ON`` (or value of ``LLVM_INCLUDE_TESTS``) - Build the libc++ test suite, which includes various types of tests like conformance - tests, vendor-specific tests and benchmarks. + Build the libc++ tests. .. option:: LIBCXX_INCLUDE_BENCHMARKS:BOOL @@ -254,6 +390,31 @@ General purpose options Build the libc++ benchmark tests and the Google Benchmark library needed to support them. +.. option:: LIBCXX_BENCHMARK_TEST_ARGS:STRING + + **Default**: ``--benchmark_min_time=0.01`` + + A semicolon list of arguments to pass when running the libc++ benchmarks using the + ``check-cxx-benchmarks`` rule. By default we run the benchmarks for a very short amount of time, + since the primary use of ``check-cxx-benchmarks`` is to get test and sanitizer coverage, not to + get accurate measurements. + +.. option:: LIBCXX_BENCHMARK_NATIVE_STDLIB:STRING + + **Default**:: ``""`` + + **Values**:: ``libc++``, ``libstdc++`` + + Build the libc++ benchmark tests and Google Benchmark library against the + specified standard library on the platform. On Linux this can be used to + compare libc++ to libstdc++ by building the benchmark tests against both + standard libraries. + +.. option:: LIBCXX_BENCHMARK_NATIVE_GCC_TOOLCHAIN:STRING + + Use the specified GCC toolchain and standard library when building the native + stdlib benchmark tests. + .. option:: LIBCXX_ASSERTION_HANDLER_FILE:PATH **Default**:: ``"${CMAKE_CURRENT_SOURCE_DIR}/vendor/llvm/default_assertion_handler.in"`` @@ -261,11 +422,22 @@ General purpose options Specify the path to a header that contains a custom implementation of the assertion handler that gets invoked when a hardening assertion fails. If provided, this header will be included by the library, replacing the - default assertion handler. If this is specified as a relative path, it - is assumed to be relative to ``/libcxx``. + default assertion handler. + +.. option:: LIBCXX_PSTL_BACKEND:STRING + + **Default**:: ``"serial"`` + + **Values**:: ``serial``, ``std-thread``, ``libdispatch``, ``openmp`` + + Select the desired backend for C++ parallel algorithms. All four options can + target multi-core CPU architectures, and ``openmp`` can additionally target + GPU architectures. The ``openmp`` backend requires OpenMP version 4.5 or + later. -ABI Specific Options --------------------- + +libc++ ABI Feature Options +-------------------------- The following options allow building libc++ for a different ABI version. @@ -291,7 +463,7 @@ The following options allow building libc++ for a different ABI version. with other libc++ versions. .. warning:: - When providing a custom namespace, it's the vendor's responsibility to ensure the name won't cause + When providing a custom namespace, it's the user's responsibility to ensure the name won't cause conflicts with other names defined by libc++, both now and in the future. In particular, inline namespaces of the form ``__[0-9]+`` could cause conflicts with future versions of the library, and so should be avoided. @@ -303,48 +475,8 @@ The following options allow building libc++ for a different ABI version. A semicolon-separated list of ABI macros to persist in the site config header. See ``include/__config`` for the list of ABI macros. -.. option:: LIBCXX_CXX_ABI:STRING - - **Values**: ``none``, ``libcxxabi``, ``system-libcxxabi``, ``libcxxrt``, ``libstdc++``, ``libsupc++``, ``vcruntime``. - - Select the ABI library to build libc++ against. - -.. option:: LIBCXX_CXX_ABI_INCLUDE_PATHS:PATHS - - Provide additional search paths for the ABI library headers. - -.. option:: LIBCXX_CXX_ABI_LIBRARY_PATH:PATH - - Provide the path to the ABI library that libc++ should link against. This is only - useful when linking against an out-of-tree ABI library. - -.. option:: LIBCXX_ENABLE_STATIC_ABI_LIBRARY:BOOL - - **Default**: ``OFF`` - - If this option is enabled, libc++ will try and link the selected ABI library - statically. - -.. option:: LIBCXX_ENABLE_ABI_LINKER_SCRIPT:BOOL - - **Default**: ``ON`` by default on UNIX platforms other than Apple unless - 'LIBCXX_ENABLE_STATIC_ABI_LIBRARY' is ON. Otherwise the default value is ``OFF``. - - This option generate and installs a linker script as ``libc++.so`` which - links the correct ABI library. - -.. option:: LIBCXXABI_USE_LLVM_UNWINDER:BOOL - - **Default**: ``ON`` - - Build and use the LLVM unwinder. Note: This option can only be used when - libc++abi is the C++ ABI library used. -.. option:: LIBCXXABI_ADDITIONAL_LIBRARIES:STRING - - **Default**: ``""`` - - Additional libraries libc++abi is linked to which can be provided in cache. +.. _LLVM-specific variables: LLVM-specific options --------------------- @@ -367,91 +499,6 @@ LLVM-specific options others. -Support for Windows -=================== - -Libc++ supports being built with clang-cl, but not with MSVC's cl.exe, as -cl doesn't support the ``#include_next`` extension. Furthermore, VS 2017 or -newer (19.14) is required. - -Libc++ also supports being built with clang targeting MinGW environments. - -CMake + Visual Studio ---------------------- - -Building with Visual Studio currently does not permit running tests. However, -it is the simplest way to build. - -.. code-block:: batch - - > cmake -G "Visual Studio 16 2019" -S runtimes -B build ^ - -T "ClangCL" ^ - -DLLVM_ENABLE_RUNTIMES=libcxx ^ - -DLIBCXX_ENABLE_SHARED=YES ^ - -DLIBCXX_ENABLE_STATIC=NO - > cmake --build build - -CMake + ninja (MSVC) --------------------- - -Building with ninja is required for development to enable tests. -A couple of tests require Bash to be available, and a couple dozens -of tests require other posix tools (cp, grep and similar - LLVM's tests -require the same). Without those tools the vast majority of tests -can still be ran successfully. - -If Git for Windows is available, that can be used to provide the bash -shell by adding the right bin directory to the path, e.g. -``set PATH=%PATH%;C:\Program Files\Git\usr\bin``. - -Alternatively, one can also choose to run the whole build in a MSYS2 -shell. That can be set up e.g. by starting a Visual Studio Tools Command -Prompt (for getting the environment variables pointing to the headers and -import libraries), and making sure that clang-cl is available in the -path. From there, launch an MSYS2 shell via e.g. -``C:\msys64\msys2_shell.cmd -full-path -mingw64`` (preserving the earlier -environment, allowing the MSVC headers/libraries and clang-cl to be found). - -In either case, then run: - -.. code-block:: batch - - > cmake -G Ninja -S runtimes -B build ^ - -DCMAKE_C_COMPILER=clang-cl ^ - -DCMAKE_CXX_COMPILER=clang-cl ^ - -DLLVM_ENABLE_RUNTIMES=libcxx - > ninja -C build cxx - > ninja -C build check-cxx - -If you are running in an MSYS2 shell and you have installed the -MSYS2-provided clang package (which defaults to a non-MSVC target), you -should add e.g. ``-DCMAKE_CXX_COMPILER_TARGET=x86_64-windows-msvc`` (replacing -``x86_64`` with the architecture you're targeting) to the ``cmake`` command -line above. This will instruct ``check-cxx`` to use the right target triple -when invoking ``clang++``. - -CMake + ninja (MinGW) ---------------------- - -libcxx can also be built in MinGW environments, e.g. with the MinGW -compilers in MSYS2. This requires clang to be available (installed with -e.g. the ``mingw-w64-x86_64-clang`` package), together with CMake and ninja. - -.. code-block:: bash - - > cmake -G Ninja -S runtimes -B build \ - -DCMAKE_C_COMPILER=clang \ - -DCMAKE_CXX_COMPILER=clang++ \ - -DLLVM_ENABLE_LLD=ON \ - -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi" \ - -DLIBCXXABI_ENABLE_SHARED=OFF \ - -DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON - > ninja -C build cxx - > ninja -C build check-cxx - -.. _`libc++abi`: http://libcxxabi.llvm.org/ - - .. _assertion-handler: Overriding the default assertion handler diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 1610d1ee848a5..9bf39b2a25539 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -600,6 +600,7 @@ set(files __pstl/backend_fwd.h __pstl/backends/default.h __pstl/backends/libdispatch.h + __pstl/backends/openmp.h __pstl/backends/serial.h __pstl/backends/std_thread.h __pstl/cpu_algos/any_of.h diff --git a/libcxx/include/__algorithm/ranges_find_last_if.h b/libcxx/include/__algorithm/ranges_find_last_if.h new file mode 100644 index 0000000000000..7a4f26cd5c07b --- /dev/null +++ b/libcxx/include/__algorithm/ranges_find_last_if.h @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_FIND_LAST_IF_H +#define _LIBCPP___ALGORITHM_RANGES_FIND_LAST_IF_H + +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 20 + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +namespace __find_last_if { + +struct __fn { + template _Sp, + class _Proj = identity, + indirect_unary_predicate> _Pred> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Ip> + operator()(_Ip __first, _Sp __last, _Pred __pred, _Proj __proj = {}) const { + std::optional<_Ip> __found; + for (; __first != __last; ++__first) { + if (std::invoke(__pred, std::invoke(__proj, *__first))) { + __found = __first; + } + } + if (!__found) + return {__first, __first}; + return {*__found, std::ranges::next(*__found, __last)}; + } + + template , _Proj>> _Pred> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Rp> + operator()(_Rp&& __r, _Pred __pred, _Proj __proj = {}) const { + return this->operator()(ranges::begin(__r), ranges::end(__r), std::ref(__pred), std::ref(__proj)); + } +}; + +} // namespace __find_last_if + +inline namespace __cpo { +inline constexpr auto find_last_if = __find_last_if::__fn{}; +} // namespace __cpo + +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 20 + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ALGORITHM_RANGES_FIND_LAST_IF_H diff --git a/libcxx/include/__algorithm/ranges_find_last_if_not.h b/libcxx/include/__algorithm/ranges_find_last_if_not.h new file mode 100644 index 0000000000000..d6d06335b9518 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_find_last_if_not.h @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_FIND_LAST_IF_NOT_H +#define _LIBCPP___ALGORITHM_RANGES_FIND_LAST_IF_NOT_H + +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 20 + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +namespace __find_last_if_not { + +struct __fn { + template _Sp, + class _Proj = identity, + indirect_unary_predicate> _Pred> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Ip> + operator()(_Ip __first, _Sp __last, _Pred __pred, _Proj __proj = {}) const { + std::optional<_Ip> __found; + for (; __first != __last; ++__first) { + if (!std::invoke(__pred, std::invoke(__proj, *__first))) { + __found = __first; + } + } + if (!__found) + return {__first, __first}; + return {*__found, std::ranges::next(*__found, __last)}; + } + + template , _Proj>> _Pred> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Rp> + operator()(_Rp&& __r, _Pred __pred, _Proj __proj = {}) const { + return this->operator()(ranges::begin(__r), ranges::end(__r), std::ref(__pred), std::ref(__proj)); + } +}; + +} // namespace __find_last_if_not + +inline namespace __cpo { +inline constexpr auto find_last_if_not = __find_last_if_not::__fn{}; +} // namespace __cpo + +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 20 + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ALGORITHM_RANGES_FIND_LAST_IF_NOT_H diff --git a/libcxx/include/__algorithm/ranges_shift_left.h b/libcxx/include/__algorithm/ranges_shift_left.h new file mode 100644 index 0000000000000..5f2d47c7805d5 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_shift_left.h @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_SHIFT_LEFT_H +#define _LIBCPP___ALGORITHM_RANGES_SHIFT_LEFT_H + +#include <__config> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/subrange.h> +#include <__utility/move.h> +#include <__utility/forward.h> +#include <__iterator/advance.h> +#include <__iterator/distance.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +namespace __shift_left { + +struct __fn { + template _Sent> + _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> + operator()(_Iter __first, _Sent __last, iter_difference_t<_Iter> __n) const { + if (__n <= 0) { + return {__first, __first}; + } + + auto __dist = std::ranges::distance(__first, __last); + if (__n >= __dist) { + return {__first, __first}; + } + + auto __mid = std::ranges::next(__first, __n); + auto __new_last = std::move(__mid, __last, __first); + return {__first, __new_last}; + } + + template + requires std::permutable> + _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> + operator()(_Range&& __range, range_difference_t<_Range> __n) const { + return (*this)(std::ranges::begin(__range), std::ranges::end(__range), __n); + } +}; + +} // namespace __shift_left + +inline namespace __cpo { + inline constexpr auto shift_left = __shift_left::__fn{}; +} // namespace __cpo + +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ALGORITHM_RANGES_SHIFT_LEFT_H diff --git a/libcxx/include/__algorithm/ranges_shift_right.h b/libcxx/include/__algorithm/ranges_shift_right.h new file mode 100644 index 0000000000000..de4405446ba86 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_shift_right.h @@ -0,0 +1,75 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_SHIFT_RIGHT_H +#define _LIBCPP___ALGORITHM_RANGES_SHIFT_RIGHT_H + +#include <__config> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/subrange.h> +#include <__utility/move.h> +#include <__utility/forward.h> +#include <__iterator/advance.h> +#include <__iterator/distance.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +namespace __shift_right { + +struct __fn { + template _Sent> + _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> + operator()(_Iter __first, _Sent __last, iter_difference_t<_Iter> __n) const { + if (__n <= 0) { + return {__last, __last}; + } + + auto __dist = std::ranges::distance(__first, __last); + if (__n >= __dist) { + return {__last, __last}; + } + + auto __new_first = std::ranges::next(__first, __dist - __n); + std::move_backward(__first, __new_first, __last); + return {__new_first, __last}; + } + + template + requires std::permutable> + _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> + operator()(_Range&& __range, range_difference_t<_Range> __n) const { + return (*this)(std::ranges::begin(__range), std::ranges::end(__range), __n); + } +}; + +} // namespace __shift_right + +inline namespace __cpo { + inline constexpr auto shift_right = __shift_right::__fn{}; +} // namespace __cpo + +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ALGORITHM_RANGES_SHIFT_RIGHT_H diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in index fc01aaf2d8746..fa1c99264514c 100644 --- a/libcxx/include/__config_site.in +++ b/libcxx/include/__config_site.in @@ -38,6 +38,7 @@ #cmakedefine _LIBCPP_PSTL_BACKEND_SERIAL #cmakedefine _LIBCPP_PSTL_BACKEND_STD_THREAD #cmakedefine _LIBCPP_PSTL_BACKEND_LIBDISPATCH +#cmakedefine _LIBCPP_PSTL_BACKEND_OPENMP // Hardening. #cmakedefine _LIBCPP_HARDENING_MODE_DEFAULT @_LIBCPP_HARDENING_MODE_DEFAULT@ diff --git a/libcxx/include/__pstl/backend.h b/libcxx/include/__pstl/backend.h index 5980b0708cd34..cb47501c19fc8 100644 --- a/libcxx/include/__pstl/backend.h +++ b/libcxx/include/__pstl/backend.h @@ -19,20 +19,20 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> -#if _LIBCPP_STD_VER >= 17 - -# if defined(_LIBCPP_PSTL_BACKEND_SERIAL) -# include <__pstl/backends/default.h> -# include <__pstl/backends/serial.h> -# elif defined(_LIBCPP_PSTL_BACKEND_STD_THREAD) -# include <__pstl/backends/default.h> -# include <__pstl/backends/std_thread.h> -# elif defined(_LIBCPP_PSTL_BACKEND_LIBDISPATCH) -# include <__pstl/backends/default.h> -# include <__pstl/backends/libdispatch.h> -# endif - -#endif // _LIBCPP_STD_VER >= 17 +#if defined(_LIBCPP_PSTL_BACKEND_SERIAL) +# include <__pstl/backends/default.h> +# include <__pstl/backends/serial.h> +#elif defined(_LIBCPP_PSTL_BACKEND_STD_THREAD) +# include <__pstl/backends/default.h> +# include <__pstl/backends/std_thread.h> +#elif defined(_LIBCPP_PSTL_BACKEND_LIBDISPATCH) +# include <__pstl/backends/default.h> +# include <__pstl/backends/libdispatch.h> +#elif defined(_LIBCPP_PSTL_BACKEND_OPENMP) +# include <__pstl/backends/default.h> +# include <__pstl/backends/openmp.h> +# include <__pstl/backends/std_thread.h> +#endif _LIBCPP_POP_MACROS diff --git a/libcxx/include/__pstl/backend_fwd.h b/libcxx/include/__pstl/backend_fwd.h index 2132e8dbceb3a..ed08d45206a8b 100644 --- a/libcxx/include/__pstl/backend_fwd.h +++ b/libcxx/include/__pstl/backend_fwd.h @@ -39,8 +39,6 @@ _LIBCPP_PUSH_MACROS // the user. // -#if _LIBCPP_STD_VER >= 17 - _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { @@ -49,21 +47,25 @@ struct __backend_configuration; struct __default_backend_tag; struct __libdispatch_backend_tag; +struct __openmp_backend_tag; struct __serial_backend_tag; struct __std_thread_backend_tag; -# if defined(_LIBCPP_PSTL_BACKEND_SERIAL) +#if defined(_LIBCPP_PSTL_BACKEND_SERIAL) using __current_configuration = __backend_configuration<__serial_backend_tag, __default_backend_tag>; -# elif defined(_LIBCPP_PSTL_BACKEND_STD_THREAD) +#elif defined(_LIBCPP_PSTL_BACKEND_STD_THREAD) using __current_configuration = __backend_configuration<__std_thread_backend_tag, __default_backend_tag>; -# elif defined(_LIBCPP_PSTL_BACKEND_LIBDISPATCH) +#elif defined(_LIBCPP_PSTL_BACKEND_LIBDISPATCH) using __current_configuration = __backend_configuration<__libdispatch_backend_tag, __default_backend_tag>; -# else +#elif defined(_LIBCPP_PSTL_BACKEND_OPENMP) +using __current_configuration = + __backend_configuration<__openmp_backend_tag, __std_thread_backend_tag, __default_backend_tag>; +#else // ...New vendors can add parallel backends here... -# error "Invalid PSTL backend configuration" -# endif +# error "Invalid PSTL backend configuration" +#endif template struct __find_if; @@ -298,8 +300,6 @@ struct __reduce; } // namespace __pstl _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP_STD_VER >= 17 - _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_BACKEND_FWD_H diff --git a/libcxx/include/__pstl/backends/openmp.h b/libcxx/include/__pstl/backends/openmp.h new file mode 100644 index 0000000000000..158be91fb9ca9 --- /dev/null +++ b/libcxx/include/__pstl/backends/openmp.h @@ -0,0 +1,511 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___PSTL_BACKENDS_OPENMP_H +#define _LIBCPP___PSTL_BACKENDS_OPENMP_H + +// Combined OpenMP CPU and GPU Backend +// =================================== +// Contrary to the CPU backends found in ./cpu_backends/, the OpenMP backend can +// target both CPUs and GPUs. The OpenMP standard defines that when offloading +// code to an accelerator, the compiler must generate a fallback code for +// execution on the host. Thereby, the backend works as a CPU backend if no +// targeted accelerator is available at execution time. The target regions can +// also be compiled directly for a CPU architecture, for instance by adding the +// command-line option `-fopenmp-targets=x86_64-pc-linux-gnu` in Clang. +// +// When is an Algorithm Offloaded? +// ------------------------------- +// Only parallel algorithms with the parallel unsequenced execution policy are +// offloaded to the device. We cannot offload parallel algorithms with a +// parallel execution policy to GPUs because invocations executing in the same +// thread "are indeterminately sequenced with respect to each other" which we +// cannot guarantee on a GPU. +// +// The standard draft states that "the semantics [...] allow the implementation +// to fall back to sequential execution if the system cannot parallelize an +// algorithm invocation". If it is not deemed safe to offload the parallel +// algorithm to the device, we first fall back to a parallel unsequenced +// implementation from ./cpu_backends. The CPU implementation may then fall back +// to sequential execution. In that way we strive to achieve the best possible +// performance. +// +// Further, "it is the caller's responsibility to ensure that the invocation +// does not introduce data races or deadlocks." +// +// Implicit Assumptions +// -------------------- +// If the user provides a function pointer as an argument to a parallel +// algorithm, it is assumed that it is the device pointer as there is currently +// no way to check whether a host or device pointer was passed. +// +// Mapping Clauses +// --------------- +// In some of the parallel algorithms, the user is allowed to provide the same +// iterator as input and output. The order of the maps matters because OpenMP +// keeps a reference counter of which variables have been mapped to the device. +// Thereby, a varible is only copied to the device if its reference counter is +// incremented from zero, and it is only copied back to the host when the +// reference counter is decremented to zero again. +// This allows nesting mapped regions, for instance in recursive functions, +// without enforcing a lot of unnecessary data movement. +// Therefore, `pragma omp target data map(to:...)` must be used before +// `pragma omp target data map(alloc:...)`. Conversely, the maps with map +// modifier `release` must be placed before the maps with map modifier `from` +// when transferring the result from the device to the host. +// +// Example: Assume `a` and `b` are pointers to the same array. +// ``` C++ +// #pragma omp target enter data map(alloc:a[0:n]) +// // The reference counter is incremented from 0 to 1. a is not copied to the +// // device because of the `alloc` map modifier. +// #pragma omp target enter data map(to:b[0:n]) +// // The reference counter is incremented from 1 to 2. b is not copied because +// // the reference counter is positive. Therefore b, and a, are uninitialized +// // on the device. +// ``` +// +// Exceptions +// ---------- +// Currently, GPU architectures do not handle exceptions. OpenMP target regions +// are allowed to contain try/catch statements and throw expressions in Clang, +// but if a throw expression is reached, it will terminate the program. That +// does not conform to the C++ standard. +// +// [This document](https://eel.is/c++draft/algorithms.parallel) has been used as +// reference for these considerations. + +#include <__algorithm/unwrap_iter.h> +#include <__config> +#include <__functional/operations.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/wrap_iter.h> +#include <__pstl/backend_fwd.h> +#include <__pstl/dispatch.h> +#include <__type_traits/desugars_to.h> +#include <__type_traits/is_arithmetic.h> +#include <__type_traits/is_trivially_copyable.h> +#include <__type_traits/remove_cvref.h> +#include <__utility/empty.h> +#include <__utility/forward.h> +#include <__utility/move.h> +#include +#include + +#if !defined(_OPENMP) +# error "Trying to use the OpenMP PSTL backend, but OpenMP is not enabled. Did you compile with -fopenmp?" +#elif (defined(_OPENMP) && _OPENMP < 201511) +# error \ + "OpenMP target offloading has been supported since OpenMP version 4.5 (201511). Please use a more recent version of OpenMP." +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD +namespace __pstl { + +// The following functions can be used to map contiguous array sections to and from the device. +// For now, they are simple overlays of the OpenMP pragmas, but they should be updated when adding +// support for other iterator types. +template +_LIBCPP_HIDE_FROM_ABI void +__omp_map_to([[maybe_unused]] const _Iterator __p, [[maybe_unused]] const _DifferenceType __len) noexcept { + static_assert(__libcpp_is_contiguous_iterator<_Iterator>::value); +#pragma omp target enter data map(to : __p[0 : __len]) +} + +template +_LIBCPP_HIDE_FROM_ABI void +__omp_map_from([[maybe_unused]] const _Iterator __p, [[maybe_unused]] const _DifferenceType __len) noexcept { + static_assert(__libcpp_is_contiguous_iterator<_Iterator>::value); +#pragma omp target exit data map(from : __p[0 : __len]) +} + +template +_LIBCPP_HIDE_FROM_ABI void +__omp_map_alloc([[maybe_unused]] const _Iterator __p, [[maybe_unused]] const _DifferenceType __len) noexcept { + static_assert(__libcpp_is_contiguous_iterator<_Iterator>::value); +#pragma omp target enter data map(alloc : __p[0 : __len]) +} + +template +_LIBCPP_HIDE_FROM_ABI void +__omp_map_release([[maybe_unused]] const _Iterator __p, [[maybe_unused]] const _DifferenceType __len) noexcept { + static_assert(__libcpp_is_contiguous_iterator<_Iterator>::value); +#pragma omp target exit data map(release : __p[0 : __len]) +} + +// +// fill +// +template +_LIBCPP_HIDE_FROM_ABI _Tp* __omp_fill(_Tp* __out1, _DifferenceType __n, const _Up& __value) noexcept { + __pstl::__omp_map_alloc(__out1, __n); +#pragma omp target teams distribute parallel for + for (_DifferenceType __i = 0; __i < __n; ++__i) + *(__out1 + __i) = __value; + __pstl::__omp_map_from(__out1, __n); + return __out1 + __n; +} + +template <> +struct __fill<__openmp_backend_tag, execution::parallel_unsequenced_policy> { + template + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__empty> + operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Tp const& __value) const noexcept { + using _ValueType = typename iterator_traits<_ForwardIterator>::value_type; + if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator>::value && is_trivially_copyable_v<_ValueType> && + is_trivially_copyable_v<_Tp>) { + __pstl::__omp_fill(std::__unwrap_iter(__first), __last - __first, __value); + return __empty{}; + } else { + using _Backends = __backends_after<__current_configuration, __openmp_backend_tag>; + using _Fallback = __dispatch<__pstl::__fill, _Backends, __remove_cvref_t<_Policy>>; + return _Fallback{}(std::forward<_Policy>(__policy), std::move(__first), std::move(__last), __value); + } + } +}; + +// +// find_if +// +template +_LIBCPP_HIDE_FROM_ABI _Tp* __omp_find_if(_Tp* __first, _DifferenceType __n, _Predicate __pred) noexcept { + __pstl::__omp_map_to(__first, __n); + _DifferenceType __idx = __n; +#pragma omp target teams distribute parallel for reduction(min : __idx) + for (_DifferenceType __i = 0; __i < __n; ++__i) { + if (__pred(*(__first + __i))) { + __idx = (__i < __idx) ? __i : __idx; + } + } + __pstl::__omp_map_release(__first, __n); + return __first + __idx; +} + +template <> +struct __find_if<__openmp_backend_tag, execution::parallel_unsequenced_policy> { + template + _LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator> + operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept { + using _ValueType = typename iterator_traits<_ForwardIterator>::value_type; + if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator>::value && is_trivially_copyable_v<_ValueType>) { + return std::__rewrap_iter(__first, __pstl::__omp_find_if(std::__unwrap_iter(__first), __last - __first, __pred)); + } else { + using _Backends = __backends_after<__current_configuration, __openmp_backend_tag>; + using _Fallback = __dispatch<__pstl::__find_if, _Backends, __remove_cvref_t<_Policy>>; + return _Fallback{}(std::forward<_Policy>(__policy), std::move(__first), std::move(__last), std::move(__pred)); + } + } +}; + +// +// for_each +// +template +_LIBCPP_HIDE_FROM_ABI _Tp* __omp_for_each(_Tp* __inout1, _DifferenceType __n, _Function __f) noexcept { + __pstl::__omp_map_to(__inout1, __n); +#pragma omp target teams distribute parallel for + for (_DifferenceType __i = 0; __i < __n; ++__i) + __f(*(__inout1 + __i)); + __pstl::__omp_map_from(__inout1, __n); + return __inout1 + __n; +} + +template <> +struct __for_each<__openmp_backend_tag, execution::parallel_unsequenced_policy> { + template + _LIBCPP_HIDE_FROM_ABI optional<__empty> + operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Functor __func) const noexcept { + using _ValueType = typename iterator_traits<_ForwardIterator>::value_type; + if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator>::value && + __libcpp_is_contiguous_iterator<_ForwardIterator>::value && is_trivially_copyable_v<_ValueType>) { + __pstl::__omp_for_each(std::__unwrap_iter(__first), __last - __first, std::move(__func)); + return __empty{}; + } else { + using _Backends = __backends_after<__current_configuration, __openmp_backend_tag>; + using _Fallback = __dispatch<__pstl::__for_each, _Backends, __remove_cvref_t<_Policy>>; + return _Fallback{}(std::forward<_Policy>(__policy), std::move(__first), std::move(__last), std::move(__func)); + } + } +}; + +// +// transform +// +template +_LIBCPP_HIDE_FROM_ABI _Tp* __omp_transform(_Tp* __in1, _DifferenceType __n, _Up* __out1, _Function __f) noexcept { + // The order of the following maps matter, as we wish to move the data. If + // they were placed in the reverse order, and __in equals __out, then we would + // allocate the buffer on the device without copying the data. + __pstl::__omp_map_to(__in1, __n); + __pstl::__omp_map_alloc(__out1, __n); +#pragma omp target teams distribute parallel for + for (_DifferenceType __i = 0; __i < __n; ++__i) + *(__out1 + __i) = __f(*(__in1 + __i)); + // The order of the following two maps matters, since the user could legally + // overwrite __in The "release" map modifier decreases the reference counter + // by one, and "from" only moves the data to the host, when the reference + // count is decremented to zero. + __pstl::__omp_map_release(__in1, __n); + __pstl::__omp_map_from(__out1, __n); + return __out1 + __n; +} + +template <> +struct __transform<__openmp_backend_tag, execution::parallel_unsequenced_policy> { + template + _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> + operator()(_Policy&& __policy, + _ForwardIterator __first, + _ForwardIterator __last, + _ForwardOutIterator __outit, + _UnaryOperation __op) const noexcept { + using _ValueType = typename iterator_traits<_ForwardIterator>::value_type; + if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator>::value && + __libcpp_is_contiguous_iterator<_ForwardOutIterator>::value && is_trivially_copyable_v<_ValueType>) { + return std::__rewrap_iter( + __outit, + __omp_transform( + std::__unwrap_iter(__first), __last - __first, std::__unwrap_iter(__outit), std::move(__op))); + } else { + using _Backends = __backends_after<__current_configuration, __openmp_backend_tag>; + using _Fallback = __dispatch<__pstl::__transform, _Backends, __remove_cvref_t<_Policy>>; + return _Fallback{}( + std::forward<_Policy>(__policy), std::move(__first), std::move(__last), std::move(__outit), std::move(__op)); + } + } +}; + +// +// transform_binary +// +template +_LIBCPP_HIDE_FROM_ABI _Tp* +__omp_transform(_Tp* __in1, _DifferenceType __n, _Up* __in2, _Vp* __out1, _Function __f) noexcept { + // The order of the following maps matter, as we wish to move the data. If + // they were placed in the reverse order, and __out equals __in1 or __in2, + // then we would allocate one of the buffer on the device without copying the + // data. + __pstl::__omp_map_to(__in1, __n); + __pstl::__omp_map_to(__in2, __n); + __pstl::__omp_map_alloc(__out1, __n); +#pragma omp target teams distribute parallel for + for (_DifferenceType __i = 0; __i < __n; ++__i) + *(__out1 + __i) = __f(*(__in1 + __i), *(__in2 + __i)); + // The order of the following three maps matters, since the user could legally + // overwrite either of the inputs if __out equals __in1 or __in2. The + // "release" map modifier decreases the reference counter by one, and "from" + // only moves the data from the device, when the reference count is + // decremented to zero. + __pstl::__omp_map_release(__in1, __n); + __pstl::__omp_map_release(__in2, __n); + __pstl::__omp_map_from(__out1, __n); + return __out1 + __n; +} + +template <> +struct __transform_binary<__openmp_backend_tag, execution::parallel_unsequenced_policy> { + template + _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> + operator()(_Policy&& __policy, + _ForwardIterator1 __first1, + _ForwardIterator1 __last1, + _ForwardIterator2 __first2, + _ForwardOutIterator __outit, + _BinaryOperation __op) const noexcept { + using _ValueType1 = typename iterator_traits<_ForwardIterator1>::value_type; + using _ValueType2 = typename iterator_traits<_ForwardIterator2>::value_type; + if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator1>::value && + __libcpp_is_contiguous_iterator<_ForwardIterator2>::value && + __libcpp_is_contiguous_iterator<_ForwardOutIterator>::value && is_trivially_copyable_v<_ValueType1> && + is_trivially_copyable_v<_ValueType2>) { + return std::__rewrap_iter( + __outit, + __pstl::__omp_transform( + std::__unwrap_iter(__first1), + __last1 - __first1, + std::__unwrap_iter(__first2), + std::__unwrap_iter(__outit), + std::move(__op))); + } else { + using _Backends = __backends_after<__current_configuration, __openmp_backend_tag>; + using _Fallback = __dispatch<__pstl::__transform_binary, _Backends, __remove_cvref_t<_Policy>>; + return _Fallback{}( + std::forward<_Policy>(__policy), + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__outit), + std::move(__op)); + } + } +}; + +// +// transform_reduce +// +#define _LIBCPP_PSTL_OMP_SIMD_1_REDUCTION(omp_op, std_op) \ + template \ + _LIBCPP_HIDE_FROM_ABI _Tp __omp_transform_reduce( \ + _Iterator __first, \ + _DifferenceType __n, \ + _Tp __init, \ + std_op<_BinaryOperationType> __reduce, \ + _UnaryOperation __transform) noexcept { \ + __pstl::__omp_map_to(__first, __n); \ +_PSTL_PRAGMA(omp target teams distribute parallel for reduction(omp_op:__init)) \ + for (_DifferenceType __i = 0; __i < __n; ++__i) \ + __init = __reduce(__init, __transform(*(__first + __i))); \ + __pstl::__omp_map_release(__first, __n); \ + return __init; \ + } + +#define _LIBCPP_PSTL_OMP_SIMD_2_REDUCTION(omp_op, std_op) \ + template \ + _LIBCPP_HIDE_FROM_ABI _Tp __omp_transform_reduce( \ + _Iterator1 __first1, \ + _Iterator2 __first2, \ + _DifferenceType __n, \ + _Tp __init, \ + std_op<_BinaryOperationType> __reduce, \ + _UnaryOperation __transform) noexcept { \ + __pstl::__omp_map_to(__first1, __n); \ + __pstl::__omp_map_to(__first2, __n); \ +_PSTL_PRAGMA(omp target teams distribute parallel for reduction(omp_op:__init)) \ + for (_DifferenceType __i = 0; __i < __n; ++__i) \ + __init = __reduce(__init, __transform(*(__first1 + __i), *(__first2 + __i))); \ + __pstl::__omp_map_release(__first1, __n); \ + __pstl::__omp_map_release(__first2, __n); \ + return __init; \ + } + +#define _LIBCPP_PSTL_OMP_SIMD_REDUCTION(omp_op, std_op) \ + _LIBCPP_PSTL_OMP_SIMD_1_REDUCTION(omp_op, std_op) \ + _LIBCPP_PSTL_OMP_SIMD_2_REDUCTION(omp_op, std_op) + +_LIBCPP_PSTL_OMP_SIMD_REDUCTION(+, std::plus) +_LIBCPP_PSTL_OMP_SIMD_REDUCTION(-, std::minus) +_LIBCPP_PSTL_OMP_SIMD_REDUCTION(*, std::multiplies) +_LIBCPP_PSTL_OMP_SIMD_REDUCTION(&&, std::logical_and) +_LIBCPP_PSTL_OMP_SIMD_REDUCTION(||, std::logical_or) +_LIBCPP_PSTL_OMP_SIMD_REDUCTION(&, std::bit_and) +_LIBCPP_PSTL_OMP_SIMD_REDUCTION(|, std::bit_or) +_LIBCPP_PSTL_OMP_SIMD_REDUCTION(^, std::bit_xor) + +// Determine whether a reduction is supported by the OpenMP backend +template +struct __is_supported_reduction : std::false_type {}; + +#define _LIBCPP_PSTL_IS_SUPPORTED_REDUCTION(func) \ + template \ + struct __is_supported_reduction, _Tp, _Tp> : true_type {}; \ + template \ + struct __is_supported_reduction, _Tp, _Up> : true_type {}; + +// __is_trivial_plus_operation already exists +_LIBCPP_PSTL_IS_SUPPORTED_REDUCTION(std::plus) +_LIBCPP_PSTL_IS_SUPPORTED_REDUCTION(std::minus) +_LIBCPP_PSTL_IS_SUPPORTED_REDUCTION(std::multiplies) +_LIBCPP_PSTL_IS_SUPPORTED_REDUCTION(std::logical_and) +_LIBCPP_PSTL_IS_SUPPORTED_REDUCTION(std::logical_or) +_LIBCPP_PSTL_IS_SUPPORTED_REDUCTION(std::bit_and) +_LIBCPP_PSTL_IS_SUPPORTED_REDUCTION(std::bit_or) +_LIBCPP_PSTL_IS_SUPPORTED_REDUCTION(std::bit_xor) + +template <> +struct __transform_reduce<__openmp_backend_tag, execution::parallel_unsequenced_policy> { + template + _LIBCPP_HIDE_FROM_ABI optional<_Tp> + operator()(_Policy&& __policy, + _ForwardIterator __first, + _ForwardIterator __last, + _Tp __init, + _Reduction __reduce, + _Transform __transform) const noexcept { + using _ValueType = typename iterator_traits<_ForwardIterator>::value_type; + if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator>::value && is_arithmetic_v<_Tp> && + __is_supported_reduction<_Reduction, _Tp, _Tp>::value && is_trivially_copyable_v<_ValueType>) { + return __pstl::__omp_transform_reduce( + std::__unwrap_iter(__first), __last - __first, __init, std::move(__reduce), std::move(__transform)); + } else { + using _Backends = __backends_after<__current_configuration, __openmp_backend_tag>; + using _Fallback = __dispatch<__pstl::__transform_reduce, _Backends, __remove_cvref_t<_Policy>>; + return _Fallback{}( + std::forward<_Policy>(__policy), + std::move(__first), + std::move(__last), + std::move(__init), + std::move(__reduce), + std::move(__transform)); + } + } +}; + +// +// transform_reduce_binary +// +template <> +struct __transform_reduce_binary<__openmp_backend_tag, execution::parallel_unsequenced_policy> { + template + _LIBCPP_HIDE_FROM_ABI optional<_Tp> operator()( + _Policy&& __policy, + _ForwardIterator1 __first1, + _ForwardIterator1 __last1, + _ForwardIterator2 __first2, + _Tp __init, + _Reduction __reduce, + _Transform __transform) const noexcept { + using _ValueType1 = typename iterator_traits<_ForwardIterator1>::value_type; + using _ValueType2 = typename iterator_traits<_ForwardIterator2>::value_type; + if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator1>::value && + __libcpp_is_contiguous_iterator<_ForwardIterator2>::value && is_arithmetic_v<_Tp> && + __is_supported_reduction<_Reduction, _Tp, _Tp>::value && is_trivially_copyable_v<_ValueType1> && + is_trivially_copyable_v<_ValueType2>) { + return __pstl::__omp_transform_reduce( + std::__unwrap_iter(__first1), + std::__unwrap_iter(__first2), + __last1 - __first1, + std::move(__init), + std::move(__reduce), + std::move(__transform)); + } else { + using _Backends = __backends_after<__current_configuration, __openmp_backend_tag>; + using _Fallback = __dispatch<__pstl::__transform_reduce_binary, _Backends, __remove_cvref_t<_Policy>>; + return _Fallback{}( + std::forward<_Policy>(__policy), + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__init), + std::move(__reduce), + std::move(__transform)); + } + } +}; + +} // namespace __pstl +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___PSTL_BACKENDS_OPENMP_H diff --git a/libcxx/include/__pstl/dispatch.h b/libcxx/include/__pstl/dispatch.h index ea40fa79eb949..13f51c26a6e22 100644 --- a/libcxx/include/__pstl/dispatch.h +++ b/libcxx/include/__pstl/dispatch.h @@ -60,6 +60,21 @@ struct __find_first_implemented<_Algorithm, __backend_configuration<_B1, _Bn...> template