From 8f41d49c2b1e2aad3330ebde387fe4db2559ac1c Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Fri, 26 Jan 2024 20:48:04 +0100 Subject: [PATCH 1/9] Add `cpu_features` library as a `git` submodule --- .gitmodules | 3 +++ vendor/cpu_features | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 vendor/cpu_features diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..5e37b7b58 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vendor/cpu_features"] + path = vendor/cpu_features + url = https://github.com/google/cpu_features diff --git a/vendor/cpu_features b/vendor/cpu_features new file mode 160000 index 000000000..ba4bffa86 --- /dev/null +++ b/vendor/cpu_features @@ -0,0 +1 @@ +Subproject commit ba4bffa86cbb5456bdb34426ad22b9551278e2c0 From 6a5f0580317e6d88651232ba6072796f3a21c623 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Fri, 26 Jan 2024 20:54:13 +0100 Subject: [PATCH 2/9] Update CMake files to compile and link `cpu_features` in `trimAl` --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a79cb749..be109acf3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,12 @@ add_executable(test ${TRIMAL_OBJECTS}) SET_TARGET_PROPERTIES(test PROPERTIES EXCLUDE_FROM_ALL True) +# Add `cpu_features` for detecting supported SIMD at compile time +add_subdirectory(vendor/cpu_features) +include_directories(vendor/cpu_features/include) +target_link_libraries(trimal cpu_features) +target_link_libraries(readal cpu_features) + # Link the mathematical library to the targets target_link_libraries(trimal m) target_link_libraries(readal m) From 22f586ac5ee59fadc4a7332f123aaf2e9ccd5abc Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Fri, 26 Jan 2024 21:29:12 +0100 Subject: [PATCH 3/9] Add logic to `Manager` to detect supported SIMD features at runtime --- include/Statistics/Manager.h | 14 +++++++++++ source/Statistics/Manager.cpp | 47 +++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/include/Statistics/Manager.h b/include/Statistics/Manager.h index 554b32bfd..c98db14b1 100644 --- a/include/Statistics/Manager.h +++ b/include/Statistics/Manager.h @@ -45,6 +45,16 @@ namespace statistics { class Identity; class Overlap; + /** + * \brief Enum to store the different supported compute kernels for the statistics. + */ + enum ComputePlatform { + NONE, + SSE2, + AVX2, + NEON, + }; + /** * \brief Class to handle the interaction with Alignment and statistics objects.\n * It serves as a wrapper or intermediate between the alignment and each specific stat.\n @@ -52,6 +62,10 @@ namespace statistics { */ class Manager { public: + /** + * \brief + * */ + ComputePlatform platform = ComputePlatform::NONE; /** * \brief Gaps submodule diff --git a/source/Statistics/Manager.cpp b/source/Statistics/Manager.cpp index f775657c1..062090b44 100644 --- a/source/Statistics/Manager.cpp +++ b/source/Statistics/Manager.cpp @@ -27,6 +27,10 @@ ***************************************************************************** */ +#include + +#include "cpuinfo_x86.h" + #include "Statistics/Similarity.h" #include "Statistics/Consistency.h" #include "Statistics/Identity.h" @@ -44,6 +48,8 @@ #include "Platform/x86/SSE2.h" #endif +using namespace cpu_features; + namespace statistics { bool Manager::calculateConservationStats() { // Create a timerLevel that will report times upon its destruction @@ -303,6 +309,45 @@ namespace statistics { ghWindow = 0; shWindow = 0; + + static const X86Info info = GetX86Info(); + static const X86Features features = info.features; + +#if defined(CPU_FEATURES_ARCH_X86_32) +#if defined(HAVE_SSE2) + if (features.sse2 != 0) { + platform = ComputePlatform::SSE2; + } +#endif +#if defined(HAVE_AVX2) + if (features.avx2 != 0) { + platform = ComputePlatform::AVX2; + } +#endif +#endif + +#if defined(CPU_FEATURES_ARCH_X86_64) +#if defined(HAVE_SSE2) + platform = ComputePlatform::SSE2; +#endif +#if defined(HAVE_AVX2) + if (features.avx2 != 0) { + platform = ComputePlatform::AVX2; + } +#endif +#endif + +#if defined(CPU_FEATURES_ARCH_AARCH64) +#if defined(HAVE_NEON) + platform = ComputePlatform::NEON; +#endif +#endif + +#if defined(CPU_FEATURES_ARCH_ARM) +#if defined(HAVE_NEON) + platform = ComputePlatform::NEON; +#endif +#endif } Manager::Manager(Alignment *parent, Manager *mold) { @@ -316,6 +361,8 @@ namespace statistics { ghWindow = mold->ghWindow; shWindow = mold->shWindow; + kernel = mold->kernel; + if (mold->similarity) #if defined(HAVE_AVX2) similarity = new AVX2Similarity(parent, mold->similarity); From 0ae52134078136dd332e05ff9dd21d79abf42b83 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Fri, 26 Jan 2024 21:44:23 +0100 Subject: [PATCH 4/9] Setup statistics copy based off manager platform in `Manager::Manager` --- source/Statistics/Manager.cpp | 137 ++++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 47 deletions(-) diff --git a/source/Statistics/Manager.cpp b/source/Statistics/Manager.cpp index 062090b44..619b0dc6c 100644 --- a/source/Statistics/Manager.cpp +++ b/source/Statistics/Manager.cpp @@ -313,38 +313,38 @@ namespace statistics { static const X86Info info = GetX86Info(); static const X86Features features = info.features; -#if defined(CPU_FEATURES_ARCH_X86_32) -#if defined(HAVE_SSE2) +#ifdef CPU_FEATURES_ARCH_X86_32 +#ifdef HAVE_SSE2 if (features.sse2 != 0) { platform = ComputePlatform::SSE2; } #endif -#if defined(HAVE_AVX2) +#ifdef HAVE_AVX2 if (features.avx2 != 0) { platform = ComputePlatform::AVX2; } #endif #endif -#if defined(CPU_FEATURES_ARCH_X86_64) -#if defined(HAVE_SSE2) +#ifdef CPU_FEATURES_ARCH_X86_64 +#ifdef HAVE_SSE2 platform = ComputePlatform::SSE2; #endif -#if defined(HAVE_AVX2) +#ifdef HAVE_AVX2 if (features.avx2 != 0) { platform = ComputePlatform::AVX2; } #endif #endif -#if defined(CPU_FEATURES_ARCH_AARCH64) -#if defined(HAVE_NEON) +#ifdef CPU_FEATURES_ARCH_AARCH64 +#ifdef HAVE_NEON platform = ComputePlatform::NEON; #endif #endif -#if defined(CPU_FEATURES_ARCH_ARM) -#if defined(HAVE_NEON) +#ifdef CPU_FEATURES_ARCH_ARM +#ifdef HAVE_NEON platform = ComputePlatform::NEON; #endif #endif @@ -361,55 +361,98 @@ namespace statistics { ghWindow = mold->ghWindow; shWindow = mold->shWindow; - kernel = mold->kernel; + platform = mold->platform; - if (mold->similarity) -#if defined(HAVE_AVX2) - similarity = new AVX2Similarity(parent, mold->similarity); -#elif defined(HAVE_SSE2) - similarity = new SSE2Similarity(parent, mold->similarity); -#elif defined(HAVE_NEON) - similarity = new NEONSimilarity(parent, mold->similarity); -#else - similarity = new Similarity(parent, mold->similarity); + if (mold->similarity) { + + switch (platform) { +#ifdef HAVE_AVX2 + case(ComputePlatform::AVX2): + similarity = new AVX2Similarity(parent, mold->similarity); + break; +#endif +#ifdef HAVE_SSE2 + case(ComputePlatform::SSE2): + similarity = new SSE2Similarity(parent, mold->similarity); + break; #endif +#ifdef HAVE_NEON + case(ComputePlatform::NEON): + similarity = new NEONSimilarity(parent, mold->similarity); + break; +#endif + default: + similarity = new Similarity(parent, mold->similarity); + } + } if (mold->consistency) consistency = new Consistency(parent, mold->consistency); - if (mold->gaps) -#if defined(HAVE_AVX2) - gaps = new AVX2Gaps(parent, mold->gaps); -#elif defined(HAVE_SSE2) - gaps = new SSE2Gaps(parent, mold->gaps); -#elif defined(HAVE_NEON) - gaps = new NEONGaps(parent, mold->gaps); -#else - gaps = new Gaps(parent, mold->gaps); + if (mold->gaps) { + switch (platform) { +#ifdef HAVE_AVX2 + case(ComputePlatform::AVX2): + gaps = new AVX2Gaps(parent, mold->gaps); + break; #endif +#ifdef HAVE_SSE2 + case(ComputePlatform::SSE2): + gaps = new SSE2Gaps(parent, mold->gaps); + break; +#endif +#ifdef HAVE_NEON + case(ComputePlatform::NEON): + gaps = new NEONGaps(parent, mold->gaps); + break; +#endif + default: + gaps = new Gaps(parent, mold->gaps); + } + } - if (mold->identity) -#if defined(HAVE_AVX2) - identity = new AVX2Identity(parent, mold->identity); -#elif defined(HAVE_SSE2) - identity = new SSE2Identity(parent, mold->identity); -#elif defined(HAVE_NEON) - identity = new NEONIdentity(parent, mold->identity); -#else - identity = new Identity(parent, mold->identity); + if (mold->identity) { + switch (platform) { +#ifdef HAVE_AVX2 + case(ComputePlatform::AVX2): + identity = new AVX2Identity(parent, mold->identity); + break; +#endif +#ifdef HAVE_SSE2 + case(ComputePlatform::SSE2): + identity = new SSE2Identity(parent, mold->identity); + break; +#endif +#ifdef HAVE_NEON + case(ComputePlatform::NEON): + identity = new NEONIdentity(parent, mold->identity); + break; #endif + default: + identity = new Identity(parent, mold->identity); + } + } if (mold->overlap) -#if defined(HAVE_AVX2) - overlap = new AVX2Overlap(parent, mold->overlap); -#elif defined(HAVE_SSE2) - overlap = new SSE2Overlap(parent, mold->overlap); -#elif defined(HAVE_NEON) - overlap = new NEONOverlap(parent, mold->overlap); -#else - overlap = new Overlap(parent, mold->overlap); + switch (platform) { +#ifdef HAVE_AVX2 + case(ComputePlatform::AVX2): + overlap = new AVX2Overlap(parent, mold->overlap); + break; #endif - +#ifdef HAVE_SSE2 + case(ComputePlatform::SSE2): + overlap = new SSE2Overlap(parent, mold->overlap); + break; +#endif +#ifdef HAVE_NEON + case(ComputePlatform::NEON): + overlap = new NEONOverlap(parent, mold->overlap); + break; +#endif + default: + overlap = new Overlap(parent, mold->overlap); + } } Manager::~Manager() { From ee3cfb93ae41570417c8a027902d2209377394f1 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Fri, 26 Jan 2024 21:53:03 +0100 Subject: [PATCH 5/9] Initialize statistics for best available platform in `Manager` --- source/Statistics/Manager.cpp | 162 ++++++++++++++++++++++++---------- 1 file changed, 114 insertions(+), 48 deletions(-) diff --git a/source/Statistics/Manager.cpp b/source/Statistics/Manager.cpp index 619b0dc6c..b3c25736a 100644 --- a/source/Statistics/Manager.cpp +++ b/source/Statistics/Manager.cpp @@ -65,15 +65,26 @@ namespace statistics { // It the similarity statistics object has not been // created we create it if (similarity == nullptr) { -#if defined(HAVE_AVX2) - similarity = new AVX2Similarity(alig); -#elif defined(HAVE_SSE2) - similarity = new SSE2Similarity(alig); -#elif defined(HAVE_NEON) - similarity = new NEONSimilarity(alig); -#else - similarity = new Similarity(alig); + switch (platform) { +#ifdef HAVE_AVX2 + case ComputePlatform::AVX2: + similarity = new AVX2Similarity(alig); + break; +#endif +#ifdef HAVE_SSE2 + case ComputePlatform::SSE2: + similarity = new SSE2Similarity(alig); + break; +#endif +#ifdef HAVE_NEON + case ComputePlatform::NEON: + similarity = new NEONSimilarity(alig); + break; #endif + default: + similarity = new Similarity(alig); + } + similarity->setSimilarityMatrix(_similarityMatrix); similarity->applyWindow(shWindow); } @@ -132,15 +143,25 @@ namespace statistics { // If scons object is not created, we create them if (alig->Statistics->similarity == nullptr) -#if defined(HAVE_AVX2) - alig->Statistics->similarity = new AVX2Similarity(alig); -#elif defined(HAVE_SSE2) - alig->Statistics->similarity = new SSE2Similarity(alig); -#elif defined(HAVE_NEON) - alig->Statistics->similarity = new NEONSimilarity(alig); -#else - alig->Statistics->similarity = new Similarity(alig); + switch (platform) { +#ifdef HAVE_AVX2 + case ComputePlatform::AVX2: + alig->Statistics->similarity = new AVX2Similarity(alig); + break; #endif +#ifdef HAVE_SSE2 + case ComputePlatform::SSE2: + alig->Statistics->similarity = new SSE2Similarity(alig); + break; +#endif +#ifdef HAVE_NEON + case ComputePlatform::NEON: + alig->Statistics->similarity = new NEONSimilarity(alig); + break; +#endif + default: + alig->Statistics->similarity = new Similarity(alig); + } // Associate the matrix to the similarity statistics object // If it's OK, we return true @@ -209,15 +230,25 @@ namespace statistics { // If gaps object is not created, we create them // and calculate the statistics if (gaps == nullptr) { -#if defined(HAVE_AVX2) - gaps = new AVX2Gaps(alig); -#elif defined(HAVE_SSE2) - gaps = new SSE2Gaps(alig); -#elif defined(HAVE_NEON) - gaps = new NEONGaps(alig); -#else - gaps = new Gaps(alig); + switch (platform) { +#ifdef HAVE_AVX2 + case ComputePlatform::AVX2: + gaps = new AVX2Gaps(alig); + break; #endif +#ifdef HAVE_SSE2 + case ComputePlatform::SSE2: + gaps = new SSE2Gaps(alig); + break; +#endif +#ifdef HAVE_NEON + case ComputePlatform::NEON: + gaps = new NEONGaps(alig); + break; +#endif + default: + gaps = new Gaps(alig); + } gaps->CalculateVectors(); } return gaps->applyWindow(ghWindow); @@ -235,15 +266,25 @@ namespace statistics { // If identity object is not created, we create them // and calculate the statistics if (identity == nullptr) { -#if defined(HAVE_AVX2) - identity = new AVX2Identity(alig); -#elif defined(HAVE_SSE2) - identity = new SSE2Identity(alig); -#elif defined(HAVE_NEON) - identity = new NEONIdentity(alig); -#else - identity = new Identity(alig); + switch (platform) { +#ifdef HAVE_AVX2 + case ComputePlatform::AVX2: + identity = new AVX2Identity(alig); + break; +#endif +#ifdef HAVE_SSE2 + case ComputePlatform::SSE2: + identity = new SSE2Identity(alig); + break; +#endif +#ifdef HAVE_NEON + case ComputePlatform::NEON: + identity = new NEONIdentity(alig); + break; #endif + default: + identity = new Identity(alig); + } identity->calculateSeqIdentity(); } return true; @@ -261,15 +302,25 @@ namespace statistics { // If overlap object is not created, we create them // and calculate the statistics if (overlap == nullptr) { -#if defined(HAVE_AVX2) - overlap = new AVX2Overlap(alig); -#elif defined(HAVE_SSE2) - overlap = new SSE2Overlap(alig); -#elif defined(HAVE_NEON) - overlap = new NEONOverlap(alig); -#else - overlap = new Overlap(alig); + switch (platform) { +#ifdef HAVE_AVX2 + case ComputePlatform::AVX2: + overlap = new AVX2Overlap(alig); + break; +#endif +#ifdef HAVE_SSE2 + case ComputePlatform::SSE2: + overlap = new SSE2Overlap(alig); + break; #endif +#ifdef HAVE_NEON + case ComputePlatform::NEON: + overlap = new NEONOverlap(alig); + break; +#endif + default: + overlap = new Overlap(alig); + } overlap->calculateSeqOverlap(); } return true; @@ -287,15 +338,25 @@ namespace statistics { // If overlap object is not created, we create them // and calculate the statistics if (overlap == nullptr) { -#if defined(HAVE_AVX2) - overlap = new AVX2Overlap(alig); -#elif defined(HAVE_SSE2) - overlap = new SSE2Overlap(alig); -#elif defined(HAVE_NEON) - overlap = new NEONOverlap(alig); -#else - overlap = new Overlap(alig); + switch (platform) { +#ifdef HAVE_AVX2 + case ComputePlatform::AVX2: + overlap = new AVX2Overlap(alig); + break; #endif +#ifdef HAVE_SSE2 + case ComputePlatform::SSE2: + overlap = new SSE2Overlap(alig); + break; +#endif +#ifdef HAVE_NEON + case ComputePlatform::NEON: + overlap = new NEONOverlap(alig); + break; +#endif + default: + overlap = new Overlap(alig); + } } return overlap->calculateSpuriousVector(overlapColumn, spuriousVector); @@ -310,9 +371,11 @@ namespace statistics { ghWindow = 0; shWindow = 0; + // Detect the best supported compute platform on the local machine. static const X86Info info = GetX86Info(); static const X86Features features = info.features; + // On x86, test whether SSE2 or AVX2 are supported. #ifdef CPU_FEATURES_ARCH_X86_32 #ifdef HAVE_SSE2 if (features.sse2 != 0) { @@ -326,6 +389,7 @@ namespace statistics { #endif #endif + // On x86-64, SSE2 is always supported, test whether AVX2 is supported. #ifdef CPU_FEATURES_ARCH_X86_64 #ifdef HAVE_SSE2 platform = ComputePlatform::SSE2; @@ -337,12 +401,14 @@ namespace statistics { #endif #endif + // On Aarch64, NEON is always supported. #ifdef CPU_FEATURES_ARCH_AARCH64 #ifdef HAVE_NEON platform = ComputePlatform::NEON; #endif #endif + // On Aarch64, test whether NEON is supported. #ifdef CPU_FEATURES_ARCH_ARM #ifdef HAVE_NEON platform = ComputePlatform::NEON; From c2e46dddd814f927145ff103b7a99ff1ba48ac42 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Fri, 26 Jan 2024 22:03:19 +0100 Subject: [PATCH 6/9] Add debug message listing CPU extensions used in `Manager` --- include/reportsystem.h | 2 ++ source/Statistics/Manager.cpp | 14 ++++++++++++++ source/reportMessages/infoMessages.cpp | 5 ++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/reportsystem.h b/include/reportsystem.h index 5455f8754..8717c5adc 100644 --- a/include/reportsystem.h +++ b/include/reportsystem.h @@ -305,6 +305,8 @@ enum InfoCode { RemovingDuplicateSequences = 4, + UsingPlatform = 5, + __MAXINFO }; diff --git a/source/Statistics/Manager.cpp b/source/Statistics/Manager.cpp index b3c25736a..1e256fac6 100644 --- a/source/Statistics/Manager.cpp +++ b/source/Statistics/Manager.cpp @@ -36,6 +36,7 @@ #include "Statistics/Identity.h" #include "Statistics/Manager.h" #include "Statistics/Overlap.h" +#include "reportsystem.h" #include "InternalBenchmarker.h" #if defined(HAVE_AVX2) @@ -414,6 +415,19 @@ namespace statistics { platform = ComputePlatform::NEON; #endif #endif + + switch(platform) { + case ComputePlatform::AVX2: + debug.report(InfoCode::UsingPlatform, new std::string[1]{"AVX2"}); + break; + case ComputePlatform::SSE2: + debug.report(InfoCode::UsingPlatform, new std::string[1]{"SSE2"}); + break; + case ComputePlatform::NEON: + debug.report(InfoCode::UsingPlatform, new std::string[1]{"NEON"}); + break; + } + } Manager::Manager(Alignment *parent, Manager *mold) { diff --git a/source/reportMessages/infoMessages.cpp b/source/reportMessages/infoMessages.cpp index c93b6d717..ee6bf60ae 100644 --- a/source/reportMessages/infoMessages.cpp +++ b/source/reportMessages/infoMessages.cpp @@ -44,5 +44,8 @@ const std::map reporting::reportManager::InfoMessages = }, {InfoCode::RemovingDuplicateSequences, - "Removing sequence \"[tag]\" as it is a duplicate of \"[tag]\"."} + "Removing sequence \"[tag]\" as it is a duplicate of \"[tag]\"."}, + + {InfoCode::UsingPlatform, + "Using \"[tag]\" CPU extensions to compute statistics."} }; \ No newline at end of file From 4b6752cd246f778e9e713f75acb9105028048bbb Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 27 Jan 2024 11:44:52 +0100 Subject: [PATCH 7/9] Update `build.yml` Actions workflow to checkout `git` submodules --- .github/workflows/build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6fda2c117..9631764e3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,6 +25,8 @@ jobs: steps: - uses: actions/checkout@v3 + with: + submodules: true - name: Build trimal for Linux x86_64 if: matrix.arch == 'x86_64' && matrix.cpu_instr == 'non_SIMD' run: cmake . -DDISABLE_SSE2=1 -DDISABLE_AVX2=1 && make && file bin/trimal && file bin/readal && @@ -67,4 +69,4 @@ jobs: apt-get install -q -y cmake make g++ file run: | cmake . -DDISABLE_SSE2=1 -DDISABLE_AVX2=1 && make && file bin/trimal && file bin/readal && - ./scripts/generate_trimmed_msas.sh && ./scripts/compare_trimmed_msas.sh \ No newline at end of file + ./scripts/generate_trimmed_msas.sh && ./scripts/compare_trimmed_msas.sh From 1d81091e233c6464467f6e69a047d70bac684f1d Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 27 Jan 2024 12:11:35 +0100 Subject: [PATCH 8/9] Fix inclusion of `cpu_features` headers based on target CPU --- source/Statistics/Manager.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/source/Statistics/Manager.cpp b/source/Statistics/Manager.cpp index 1e256fac6..82affd8c2 100644 --- a/source/Statistics/Manager.cpp +++ b/source/Statistics/Manager.cpp @@ -29,7 +29,13 @@ #include +#if defined(__x86_64__) || defined(__i386__) #include "cpuinfo_x86.h" +#elif defined(__arm__) +#include "cpuinfo_arm.h" +#elif defined(__aarch64__) +#include "cpuinfo_aarch64.h" +#endif #include "Statistics/Similarity.h" #include "Statistics/Consistency.h" @@ -373,8 +379,13 @@ namespace statistics { shWindow = 0; // Detect the best supported compute platform on the local machine. +#if defined(CPU_FEATURES_ARCH_X86) static const X86Info info = GetX86Info(); static const X86Features features = info.features; +#elif defined(CPU_FEATURES_ARCH_ARM) + static const ArmInfo info = GetArmInfo(); + static const ArmFeatures features = info.features; +#endif // On x86, test whether SSE2 or AVX2 are supported. #ifdef CPU_FEATURES_ARCH_X86_32 From daf64435919eb803b92962a06d6738e3a2fe6366 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 3 Feb 2024 11:03:12 +0100 Subject: [PATCH 9/9] Fix detection of NEON on Arm platforms --- source/Statistics/Manager.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/Statistics/Manager.cpp b/source/Statistics/Manager.cpp index 82affd8c2..bdd9ddbdd 100644 --- a/source/Statistics/Manager.cpp +++ b/source/Statistics/Manager.cpp @@ -423,7 +423,9 @@ namespace statistics { // On Aarch64, test whether NEON is supported. #ifdef CPU_FEATURES_ARCH_ARM #ifdef HAVE_NEON - platform = ComputePlatform::NEON; + if (features.neon != 0) { + platform = ComputePlatform::NEON; + } #endif #endif