@@ -111,70 +111,35 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
111
111
endif ()
112
112
113
113
set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV} )
114
- elseif ( APPLE )
114
+ else ( )
115
115
if (GGML_NATIVE)
116
- set (USER_PROVIDED_MARCH FALSE )
117
- foreach (flag_var IN ITEMS CMAKE_C_FLAGS CMAKE_CXX_FLAGS CMAKE_REQUIRED_FLAGS)
118
- if ("${${flag_var} }" MATCHES "-march=[a-zA-Z0-9+._-]+" )
119
- set (USER_PROVIDED_MARCH TRUE )
120
- break ()
116
+ list (APPEND ARCH_FLAGS -mcpu=native)
117
+ else ()
118
+ check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
119
+ if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E} " STREQUAL "" )
120
+ list (APPEND ARCH_FLAGS -mfp16-format=ieee)
121
+ endif ()
122
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6" )
123
+ # Raspberry Pi 1, Zero
124
+ list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
125
+ endif ()
126
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7" )
127
+ if ("${CMAKE_SYSTEM_NAME} " STREQUAL "Android" )
128
+ # Android armeabi-v7a
129
+ list (APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
130
+ else ()
131
+ # Raspberry Pi 2
132
+ list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
121
133
endif ()
122
- endforeach ()
123
-
124
- if (NOT USER_PROVIDED_MARCH)
125
- set (MARCH_FLAGS "-march=armv8.2a" )
126
-
127
- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
128
- if (GGML_COMPILER_SUPPORT_DOTPROD)
129
- set (MARCH_FLAGS "${MARCH_FLAGS} +dotprod" )
130
- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
131
-
132
- message (STATUS "ARM feature DOTPROD enabled" )
133
- endif ()
134
-
135
- set (TEST_I8MM_FLAGS "-march=armv8.2a+i8mm" )
136
-
137
- set (CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS} )
138
- set (CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${TEST_I8MM_FLAGS} " )
139
-
140
- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
141
- if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
142
- set (MARCH_FLAGS "${MARCH_FLAGS} +i8mm" )
143
- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
144
-
145
- message (STATUS "ARM feature MATMUL_INT8 enabled" )
146
- endif ()
147
-
148
- set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE} )
149
-
150
- list (APPEND ARCH_FLAGS "${MARCH_FLAGS} " )
151
- endif ()
152
- endif ()
153
- else ()
154
- check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
155
- if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E} " STREQUAL "" )
156
- list (APPEND ARCH_FLAGS -mfp16-format=ieee)
157
- endif ()
158
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6" )
159
- # Raspberry Pi 1, Zero
160
- list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
161
- endif ()
162
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7" )
163
- if ("${CMAKE_SYSTEM_NAME} " STREQUAL "Android" )
164
- # Android armeabi-v7a
165
- list (APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
166
- else ()
167
- # Raspberry Pi 2
168
- list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
169
134
endif ()
170
- endif ( )
171
- if ( ${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8" )
172
- # Android arm64-v8a
173
- # Raspberry Pi 3, 4, Zero 2 (32-bit )
174
- list ( APPEND ARCH_FLAGS -mno-unaligned-access )
175
- endif ( )
176
- if (GGML_SVE )
177
- list ( APPEND ARCH_FLAGS -march=armv8.6-a+sve )
135
+ if ( ${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8" )
136
+ # Android arm64-v8a
137
+ # Raspberry Pi 3, 4, Zero 2 (32-bit)
138
+ list ( APPEND ARCH_FLAGS -mno-unaligned-access )
139
+ endif ( )
140
+ if (GGML_SVE )
141
+ list ( APPEND ARCH_FLAGS -march=armv8.6-a+sve )
142
+ endif ( )
178
143
endif ()
179
144
endif ()
180
145
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
0 commit comments