Skip to content

Commit 3fdfeb1

Browse files
committed
ggml: GGML_NATIVE uses -mcpu=native on ARM
Signed-off-by: Adrien Gallouët <[email protected]>
1 parent 08ea539 commit 3fdfeb1

File tree

2 files changed

+30
-65
lines changed

2 files changed

+30
-65
lines changed

ggml/CMakeLists.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ if (NOT GGML_CUDA_GRAPHS_DEFAULT)
7474
endif()
7575

7676
# general
77-
option(GGML_STATIC "ggml: static link libraries" OFF)
78-
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
79-
option(GGML_LTO "ggml: enable link time optimization" OFF)
80-
option(GGML_CCACHE "ggml: use ccache if available" ON)
77+
option(GGML_STATIC "ggml: static link libraries" OFF)
78+
option(GGML_NATIVE "ggml: optimize the build for the current CPU" ${GGML_NATIVE_DEFAULT})
79+
option(GGML_LTO "ggml: enable link time optimization" OFF)
80+
option(GGML_CCACHE "ggml: use ccache if available" ON)
8181

8282
# debug
8383
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)

ggml/src/ggml-cpu/CMakeLists.txt

+26-61
Original file line numberDiff line numberDiff line change
@@ -111,70 +111,35 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
111111
endif ()
112112

113113
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
114-
elseif (APPLE)
114+
else()
115115
if (GGML_NATIVE)
116-
set(USER_PROVIDED_MARCH FALSE)
117-
foreach(flag_var IN ITEMS CMAKE_C_FLAGS CMAKE_CXX_FLAGS CMAKE_REQUIRED_FLAGS)
118-
if ("${${flag_var}}" MATCHES "-march=[a-zA-Z0-9+._-]+")
119-
set(USER_PROVIDED_MARCH TRUE)
120-
break()
116+
list(APPEND ARCH_FLAGS -mcpu=native)
117+
else()
118+
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
119+
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
120+
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
121+
endif()
122+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
123+
# Raspberry Pi 1, Zero
124+
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
125+
endif()
126+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
127+
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
128+
# Android armeabi-v7a
129+
list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
130+
else()
131+
# Raspberry Pi 2
132+
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
121133
endif()
122-
endforeach()
123-
124-
if (NOT USER_PROVIDED_MARCH)
125-
set(MARCH_FLAGS "-march=armv8.2a")
126-
127-
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
128-
if (GGML_COMPILER_SUPPORT_DOTPROD)
129-
set(MARCH_FLAGS "${MARCH_FLAGS}+dotprod")
130-
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
131-
132-
message(STATUS "ARM feature DOTPROD enabled")
133-
endif ()
134-
135-
set(TEST_I8MM_FLAGS "-march=armv8.2a+i8mm")
136-
137-
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
138-
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${TEST_I8MM_FLAGS}")
139-
140-
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
141-
if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
142-
set(MARCH_FLAGS "${MARCH_FLAGS}+i8mm")
143-
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
144-
145-
message(STATUS "ARM feature MATMUL_INT8 enabled")
146-
endif ()
147-
148-
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
149-
150-
list(APPEND ARCH_FLAGS "${MARCH_FLAGS}")
151-
endif ()
152-
endif ()
153-
else()
154-
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
155-
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
156-
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
157-
endif()
158-
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
159-
# Raspberry Pi 1, Zero
160-
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
161-
endif()
162-
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
163-
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
164-
# Android armeabi-v7a
165-
list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
166-
else()
167-
# Raspberry Pi 2
168-
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
169134
endif()
170-
endif()
171-
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
172-
# Android arm64-v8a
173-
# Raspberry Pi 3, 4, Zero 2 (32-bit)
174-
list(APPEND ARCH_FLAGS -mno-unaligned-access)
175-
endif()
176-
if (GGML_SVE)
177-
list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
135+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
136+
# Android arm64-v8a
137+
# Raspberry Pi 3, 4, Zero 2 (32-bit)
138+
list(APPEND ARCH_FLAGS -mno-unaligned-access)
139+
endif()
140+
if (GGML_SVE)
141+
list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
142+
endif()
178143
endif()
179144
endif()
180145
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR

0 commit comments

Comments
 (0)