Skip to content

Commit ae2585a

Browse files
Merge pull request #16 from menloresearch/update-dev-from-master-2025-03-20-23-14
Sync master with upstream release b4930
2 parents 9937faa + dbb3a47 commit ae2585a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+6016
-1298
lines changed

.github/workflows/build.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,35 @@ jobs:
676676
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
677677
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
678678
679+
macOS-latest-cmake-visionos:
680+
runs-on: macos-latest
681+
682+
steps:
683+
- name: Clone
684+
id: checkout
685+
uses: actions/checkout@v4
686+
687+
- name: Dependencies
688+
id: depends
689+
continue-on-error: true
690+
run: |
691+
brew update
692+
693+
- name: Build
694+
id: cmake_build
695+
run: |
696+
sysctl -a
697+
cmake -B build -G Xcode \
698+
-DGGML_METAL_USE_BF16=ON \
699+
-DGGML_METAL_EMBED_LIBRARY=ON \
700+
-DLLAMA_BUILD_EXAMPLES=OFF \
701+
-DLLAMA_BUILD_TESTS=OFF \
702+
-DLLAMA_BUILD_SERVER=OFF \
703+
-DCMAKE_SYSTEM_NAME=visionOS \
704+
-DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
705+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
706+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
707+
679708
macOS-latest-swift:
680709
runs-on: macos-latest
681710

CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ else()
2929
set(LLAMA_STANDALONE OFF)
3030
endif()
3131

32+
option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
33+
3234
if (EMSCRIPTEN)
3335
set(BUILD_SHARED_LIBS_DEFAULT OFF)
3436

@@ -145,7 +147,13 @@ endif()
145147
# 3rd-party
146148
#
147149

148-
if (NOT TARGET ggml)
150+
if (LLAMA_USE_SYSTEM_GGML)
151+
message(STATUS "Using system-provided libggml, skipping ggml build")
152+
find_package(ggml REQUIRED)
153+
add_library(ggml ALIAS ggml::ggml)
154+
endif()
155+
156+
if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
149157
add_subdirectory(ggml)
150158
# ... otherwise assume ggml is added by a parent CMakeLists.txt
151159
endif()

build-xcframework.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -432,8 +432,8 @@ cmake -B build-visionos -G Xcode \
432432
-DCMAKE_SYSTEM_NAME=visionOS \
433433
-DCMAKE_OSX_SYSROOT=xros \
434434
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
435-
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_C_FLAGS}" \
436-
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_CXX_FLAGS}" \
435+
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
436+
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
437437
-S .
438438
cmake --build build-visionos --config Release -- -quiet
439439

@@ -445,8 +445,8 @@ cmake -B build-visionos-sim -G Xcode \
445445
-DCMAKE_SYSTEM_NAME=visionOS \
446446
-DCMAKE_OSX_SYSROOT=xrsimulator \
447447
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
448-
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_C_FLAGS}" \
449-
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_CXX_FLAGS}" \
448+
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
449+
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
450450
-S .
451451
cmake --build build-visionos-sim --config Release -- -quiet
452452

cmake/common.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
include("ggml/cmake/common.cmake")
2+
13
function(llama_add_compile_flags)
24
if (LLAMA_FATAL_WARNINGS)
35
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")

convert_hf_to_gguf.py

Lines changed: 236 additions & 55 deletions
Large diffs are not rendered by default.

docs/backend/SYCL.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -660,8 +660,9 @@ use 1 SYCL GPUs: [0] with Max compute units:512
660660
|--------------------|---------------------------------------|---------------------------------------------|
661661
| GGML_SYCL | ON (mandatory) | Enable build with SYCL code path.<br>FP32 path - recommended for better perforemance than FP16 on quantized model|
662662
| GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA \| AMD | Set the SYCL target device type. |
663-
| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
663+
| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
664664
| GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
665+
| GGML_SYCL_GRAPH | ON *(default)* \|OFF *(Optional)* | Enable build with [SYCL Graph extension](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc). |
665666
| CMAKE_C_COMPILER | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path. |
666667
| CMAKE_CXX_COMPILER | `icpx` *(Linux)*, `icx` *(Windows)* | Set `icpx/icx` compiler for SYCL code path. |
667668

@@ -671,6 +672,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
671672
|-------------------|------------------|---------------------------------------------------------------------------------------------------------------------------|
672673
| GGML_SYCL_DEBUG | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG |
673674
| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features based on Intel GPU type, to compare the performance increase |
675+
| GGML_SYCL_DISABLE_GRAPH | 0 or 1 (default) | Disable running computations through SYCL Graphs feature. Disabled by default because graph performance isn't yet better than non-graph performance. |
674676
| ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.<br>Recommended to use when --split-mode = layer |
675677

676678

examples/main/README.md

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,24 @@ Once downloaded, place your model in the models folder in llama.cpp.
2727
##### Input prompt (One-and-done)
2828

2929
```bash
30-
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --prompt "Once upon a time"
30+
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf -no-cnv --prompt "Once upon a time"
3131
```
3232
##### Conversation mode (Allow for continuous interaction with the model)
3333

3434
```bash
35-
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf -cnv --chat-template gemma
35+
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --chat-template gemma
36+
```
37+
38+
##### Conversation mode using built-in jinja chat template
39+
40+
```bash
41+
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --jinja
42+
```
43+
44+
##### One-and-done query using jinja with custom system prompt and a starting prompt
45+
46+
```bash
47+
./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --jinja --single-turn -sys "You are a helpful assistant" -p "Hello"
3648
```
3749

3850
##### Infinite text from a starting prompt (you can use `Ctrl-C` to stop it):
@@ -44,12 +56,24 @@ Once downloaded, place your model in the models folder in llama.cpp.
4456

4557
##### Input prompt (One-and-done)
4658
```powershell
47-
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --prompt "Once upon a time"
59+
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf -no-cnv --prompt "Once upon a time"
4860
```
4961
##### Conversation mode (Allow for continuous interaction with the model)
5062

5163
```powershell
52-
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf -cnv --chat-template gemma
64+
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --chat-template gemma
65+
```
66+
67+
##### Conversation mode using built-in jinja chat template
68+
69+
```powershell
70+
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --jinja
71+
```
72+
73+
##### One-and-done query using jinja with custom system prompt and a starting prompt
74+
75+
```powershell
76+
./llama-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --jinja --single-turn -sys "You are a helpful assistant" -p "Hello"
5377
```
5478

5579
#### Infinite text from a starting prompt (you can use `Ctrl-C` to stop it):
@@ -77,6 +101,8 @@ The `llama-cli` program provides several ways to interact with the LLaMA models
77101

78102
- `--prompt PROMPT`: Provide a prompt directly as a command-line option.
79103
- `--file FNAME`: Provide a file containing a prompt or multiple prompts.
104+
- `--system-prompt PROMPT`: Provide a system prompt (will otherwise use the default one in the chat template (if provided)).
105+
- `--system-prompt-file FNAME`: Provide a file containing a system prompt.
80106
- `--interactive-first`: Run the program in interactive mode and wait for input right away. (More on this below.)
81107

82108
## Interaction
@@ -89,7 +115,10 @@ In interactive mode, users can participate in text generation by injecting their
89115

90116
- `-i, --interactive`: Run the program in interactive mode, allowing users to engage in real-time conversations or provide specific instructions to the model.
91117
- `--interactive-first`: Run the program in interactive mode and immediately wait for user input before starting the text generation.
92-
- `-cnv, --conversation`: Run the program in conversation mode (does not print special tokens and suffix/prefix, use default chat template) (default: false)
118+
- `-cnv, --conversation`: Run the program in conversation mode (does not print special tokens and suffix/prefix, use default or provided chat template) (default: true if chat template found)
119+
- `-no-cnv`: Disable conversation mode (default: false)
120+
- `-st, --single-turn`: Only process a single conversation turn (user input) and then exit.
121+
- `--jinja`: Enable jinja chat template parser, will use the model's built-in template or a user-provided one (default: false)
93122
- `--color`: Enable colorized output to differentiate visually distinguishing between prompts, user input, and generated text.
94123

95124
By understanding and utilizing these interaction options, you can create engaging and dynamic experiences with the LLaMA models, tailoring the text generation process to your specific needs.
@@ -125,6 +154,8 @@ When --in-prefix or --in-suffix options are enabled the chat template ( --chat-t
125154

126155
Example usage: `--chat-template gemma`
127156

157+
`--chat-template-file FNAME`: Load a custom jinja chat template from an external file, useful if the model contains outdated or incompatible template, some examples can be found in models/templates. Up-to-date chat templates can be downloaded from Hugging Face using scripts/get_chat_template.py
158+
128159
## Context Management
129160

130161
During text generation, LLaMA models have a limited context size, which means they can only consider a certain number of tokens from the input and generated text. When the context fills up, the model resets internally, potentially losing some information from the beginning of the conversation or instructions. Context management options help maintain continuity and coherence in these situations.

examples/server/server.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,6 +1872,10 @@ struct server_context {
18721872
params_dft.n_gpu_layers = params_base.speculative.n_gpu_layers;
18731873
params_dft.n_parallel = 1;
18741874

1875+
// force F16 KV cache for the draft model for extra performance
1876+
params_dft.cache_type_k = GGML_TYPE_F16;
1877+
params_dft.cache_type_v = GGML_TYPE_F16;
1878+
18751879
llama_init_dft = common_init_from_params(params_dft);
18761880

18771881
model_dft = llama_init_dft.model.get();
@@ -1892,10 +1896,6 @@ struct server_context {
18921896
cparams_dft = common_context_params_to_llama(params_dft);
18931897
cparams_dft.n_batch = n_ctx_dft;
18941898

1895-
// force F16 KV cache for the draft model for extra performance
1896-
cparams_dft.type_k = GGML_TYPE_F16;
1897-
cparams_dft.type_v = GGML_TYPE_F16;
1898-
18991899
// the context is not needed - we will create one for each slot
19001900
llama_init_dft.context.reset();
19011901
}

examples/speculative/speculative.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,11 +331,11 @@ int main(int argc, char ** argv) {
331331
}
332332

333333
active_seqs.erase(s);
334-
for(int i = 0; i < n_seq_dft; i++) {
334+
for (int i = 0; i < n_seq_dft; i++) {
335335
if (i == s) {
336336
continue;
337337
}
338-
if (drafts[i].tokens[i_dft] == drafts[s].tokens[i_dft]) {
338+
if (drafts[i].active && drafts[i].tokens[i_dft] == drafts[s].tokens[i_dft]) {
339339
// synchronize active status for sequences with the same drafted token
340340
drafts[i].active = drafts[i].active && accept;
341341
if (!drafts[i].active) {

ggml/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ option(GGML_OPENMP "ggml: use OpenMP"
186186
option(GGML_RPC "ggml: use RPC" OFF)
187187
option(GGML_SYCL "ggml: use SYCL" OFF)
188188
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
189+
option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
189190
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
190191
"ggml: sycl target device")
191192
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING

ggml/cmake/common.cmake

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
function(ggml_get_flags CCID CCVER)
2+
set(C_FLAGS "")
3+
set(CXX_FLAGS "")
4+
5+
if (CCID MATCHES "Clang")
6+
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
7+
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
8+
9+
if (
10+
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
11+
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
12+
)
13+
list(APPEND C_FLAGS -Wdouble-promotion)
14+
endif()
15+
elseif (CCID STREQUAL "GNU")
16+
set(C_FLAGS -Wdouble-promotion)
17+
set(CXX_FLAGS -Wno-array-bounds)
18+
19+
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
20+
list(APPEND CXX_FLAGS -Wextra-semi)
21+
endif()
22+
endif()
23+
24+
set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
25+
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
26+
endfunction()

ggml/include/ggml.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,7 @@ extern "C" {
454454
GGML_OP_RMS_NORM,
455455
GGML_OP_RMS_NORM_BACK,
456456
GGML_OP_GROUP_NORM,
457+
GGML_OP_L2_NORM,
457458

458459
GGML_OP_MUL_MAT,
459460
GGML_OP_MUL_MAT_ID,
@@ -502,6 +503,7 @@ extern "C" {
502503
GGML_OP_ADD_REL_POS,
503504
GGML_OP_RWKV_WKV6,
504505
GGML_OP_GATED_LINEAR_ATTN,
506+
GGML_OP_RWKV_WKV7,
505507

506508
GGML_OP_UNARY,
507509

@@ -1095,6 +1097,18 @@ extern "C" {
10951097
int n_groups,
10961098
float eps);
10971099

1100+
// l2 normalize along rows
1101+
// used in rwkv v7
1102+
GGML_API struct ggml_tensor * ggml_l2_norm(
1103+
struct ggml_context * ctx,
1104+
struct ggml_tensor * a,
1105+
float eps);
1106+
1107+
GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
1108+
struct ggml_context * ctx,
1109+
struct ggml_tensor * a,
1110+
float eps);
1111+
10981112
// a - x
10991113
// b - dy
11001114
GGML_API struct ggml_tensor * ggml_rms_norm_back(
@@ -1890,6 +1904,16 @@ extern "C" {
18901904
struct ggml_tensor * state,
18911905
float scale);
18921906

1907+
GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
1908+
struct ggml_context * ctx,
1909+
struct ggml_tensor * r,
1910+
struct ggml_tensor * w,
1911+
struct ggml_tensor * k,
1912+
struct ggml_tensor * v,
1913+
struct ggml_tensor * a,
1914+
struct ggml_tensor * b,
1915+
struct ggml_tensor * state);
1916+
18931917
// custom operators
18941918

18951919
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);

ggml/src/CMakeLists.txt

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
include(CheckCXXCompilerFlag)
2+
include("../cmake/common.cmake")
23

34
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
45

@@ -24,33 +25,6 @@ if (NOT MSVC)
2425
endif()
2526
endif()
2627

27-
function(ggml_get_flags CCID CCVER)
28-
set(C_FLAGS "")
29-
set(CXX_FLAGS "")
30-
31-
if (CCID MATCHES "Clang")
32-
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
33-
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
34-
35-
if (
36-
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
37-
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
38-
)
39-
list(APPEND C_FLAGS -Wdouble-promotion)
40-
endif()
41-
elseif (CCID STREQUAL "GNU")
42-
set(C_FLAGS -Wdouble-promotion)
43-
set(CXX_FLAGS -Wno-array-bounds)
44-
45-
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
46-
list(APPEND CXX_FLAGS -Wextra-semi)
47-
endif()
48-
endif()
49-
50-
set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
51-
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
52-
endfunction()
53-
5428
if (GGML_FATAL_WARNINGS)
5529
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
5630
list(APPEND C_FLAGS -Werror)
@@ -351,6 +325,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Android")
351325
target_link_libraries(ggml-base PRIVATE dl)
352326
endif()
353327

328+
if(CMAKE_SYSTEM_NAME MATCHES "visionOS")
329+
target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE)
330+
endif()
331+
354332
if (BUILD_SHARED_LIBS)
355333
foreach (target ggml-base ggml)
356334
set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -287,17 +287,25 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
287287
endif()
288288
endif()
289289
endif()
290-
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
290+
elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR "${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ")
291291
message(STATUS "PowerPC detected")
292-
execute_process(COMMAND bash -c "grep POWER /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER_M)
293-
if (${POWER_M} MATCHES "POWER10")
294-
list(APPEND ARCH_FLAGS -mcpu=power10)
295-
elseif (${POWER_M} MATCHES "POWER9")
296-
list(APPEND ARCH_FLAGS -mcpu=power9)
292+
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
293+
file(READ "/proc/cpuinfo" POWER10_M)
294+
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc")
295+
execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
296+
endif()
297+
298+
string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M}")
299+
string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
300+
301+
if (EXTRACTED_NUMBER GREATER_EQUAL 10)
302+
list(APPEND ARCH_FLAGS -mcpu=power10 -mpowerpc64)
303+
elseif (EXTRACTED_NUMBER EQUAL 9)
304+
list(APPEND ARCH_FLAGS -mcpu=power9 -mpowerpc64)
297305
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
298306
list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
299307
else()
300-
list(APPEND ARCH_FLAGS -mcpu=powerpc64 -mtune=native)
308+
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
301309
endif()
302310
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
303311
message(STATUS "loongarch64 detected")

0 commit comments

Comments
 (0)