Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
111 commits
Select commit Hold shift + click to select a range
81ddc60
ci : add metal server workflows (#19293)
ggerganov Feb 9, 2026
292f690
spec : remove check rate (#19377)
srogmann Feb 9, 2026
820ebfa
Server: log when converting requests to chat completions format (#19457)
openingnow Feb 9, 2026
262364e
mtmd: Implement tiling for LFM2-VL (#19454)
tdakhran Feb 9, 2026
98e57ca
chat: fix case where template accepts type content only (#19419)
ngxson Feb 9, 2026
a0d5855
cuda : extend GGML_OP_PAD to work with non-cont src0 (#19429)
ggerganov Feb 10, 2026
52e38fa
CANN: implement quantized MUL_MAT_ID for MoE models (#19228)
hipudding Feb 10, 2026
f0bfe54
CANN: Remove unnecessary wrapper for `gml_backend_buft_is_cann` (#18968)
rauletorresc Feb 10, 2026
66d403c
tts : fix typos in README.md [no ci] (#19463)
danbev Feb 10, 2026
854b09f
convert : move experts permutation from Qwen2MoeModel to Qwen3VLMoeTe…
pwilkin Feb 10, 2026
6948adc
ggml : use noexcept overload for is_regular_file in backend registrat…
k4ss4n Feb 10, 2026
c03a5a4
ggml-cpu: arm64: q6_K repack gemm and gemv (and generic) implementati…
Alcpz Feb 10, 2026
9a96352
test: fix IMROPE perf test case (#19465)
ngxson Feb 10, 2026
fc0fe40
models : support qwen3.5 series (#19468)
JJJYmmm Feb 10, 2026
57487a6
[WebGPU] Plug memory leaks and free resources on shutdown (#19315)
nikhilJain17 Feb 10, 2026
612db61
CUDA : Update CCCL-tag for 3.2 to final release from RC (#19486)
ORippler Feb 10, 2026
2cce9fd
llama : refactor sampling_info to use buffer_view template (#19368)
danbev Feb 11, 2026
ceaa89b
metal : consolidate unary ops (#19490)
ggerganov Feb 11, 2026
89181c0
ggml : extend bin bcast for permuted src1 (#19484)
ggerganov Feb 11, 2026
6d95707
model : fix wavtokenizer embedding notions (#19479)
ggerganov Feb 11, 2026
8ee538c
llama : correct typos 'occured' and 'occurences' (#19414)
thecaptain789 Feb 11, 2026
73cd5e1
hexagon: Add ARGSORT, DIV, SQR, SQRT, SUM_ROWS, GEGLU (#19406)
max-krasnyansky Feb 11, 2026
0c1f39a
common : improve download error reporting (#19491)
angt Feb 11, 2026
ada90bf
docs: ban AI for issues and discussions [no CI] (#19512)
JohannesGaessler Feb 11, 2026
9ab072e
metal : extend l2_norm support for non-cont src0 (#19502)
ggerganov Feb 11, 2026
53de59f
build : fix case in dSYMs path for build-macos [no ci] (#19515)
danbev Feb 11, 2026
e463bbd
model: Add Kimi-K2.5 support (#19170)
AesSedai Feb 11, 2026
3136a84
common : remove unused token util functions (#19506)
danbev Feb 11, 2026
914dde7
ggml : unary ops support non-cont src0 + metal F16 unary ops (#19511)
ggerganov Feb 11, 2026
4d3daf8
opencl: add general Q6_K mm and Q4_K mv (#19347)
lhez Feb 11, 2026
4ae1b75
common : replace deprecated codecvt using parse_utf8_codepoint (#19517)
angt Feb 12, 2026
b1ff83b
hexagon: further optimization and tuning of matmul and dot kernels (#…
max-krasnyansky Feb 12, 2026
313493d
docs : update path in snapdragon README.md (#19533)
TriDefender Feb 12, 2026
fa16e51
server : fix typo in README.md for features list (#19510)
RichardScottOZ Feb 12, 2026
6845f7f
Add a workaround for compilation with ROCWMMA_FATTN and gfx9 (#19461)
superm1 Feb 12, 2026
3b3a948
metal : update sum_rows kernel to support float4 (#19524)
ggerganov Feb 12, 2026
38adc7d
WebUI Architecture Cleanup (#19541)
allozaur Feb 12, 2026
f486ce9
(webui) REFACTOR: UI primitives and polish (#19551)
allozaur Feb 12, 2026
ff59903
scripts : add support for forks in pr2wt.sh (#19540)
danbev Feb 12, 2026
4d688f9
(webui) FEATURE: Enable adding or injecting System Message into chat …
allozaur Feb 12, 2026
f488429
llama : update outdated comment in llama.h (#19428)
MonkeybreadSoftware Feb 12, 2026
4b385bf
vendor : update cpp-httplib (#19537)
angt Feb 12, 2026
4c61875
webui: Add switcher to Chat Message UI to show raw LLM output (#19571)
allozaur Feb 12, 2026
338085c
args : add -kvu to llama-parallel (#19577)
ggerganov Feb 12, 2026
79cc0f2
opencl: add basic support for q4_1 (#19534)
lhez Feb 12, 2026
3bb7813
hexagon: fix typo in vtcm_needs_release (#19545)
FanShupei Feb 12, 2026
490eb96
metal : support GGML_OP_SET (#19548)
ggerganov Feb 13, 2026
0644bae
metal : improve concurrency (#19555)
ggerganov Feb 13, 2026
bb96bfd
memory : fix kv cache size for hybrid models (#19559)
ggerganov Feb 13, 2026
2f5d8f8
vendor : update BoringSSL to 0.20260211.0 (#19562)
angt Feb 13, 2026
25224c8
llama : remove deprecated codecvt (#19565)
angt Feb 13, 2026
33a56f9
model : Kimi Linear fix conv state update (#19531)
ymcki Feb 13, 2026
423cf0b
docs : fix broken link and typo (#19560)
pavan-sh Feb 13, 2026
43919b7
CUDA: Do not mutate cgraph for fused ADDs (#19566)
ORippler Feb 13, 2026
5174d72
webui: UI and routing fixes (#19586)
allozaur Feb 13, 2026
5065da5
CUDA: loop over ne2*ne3 in case it overflows (#19538)
am17an Feb 13, 2026
b2ecc0c
support --verbose-prompt (#19576)
CISC Feb 13, 2026
0e21991
fix vulkan ggml_acc only works in 3d but not 4d (#19426)
ymcki Feb 13, 2026
cc2aa81
Fix wrong memcpy length for block_interleave == 4 (#19575)
Alcpz Feb 13, 2026
752584d
model: support GLM MoE DSA arch (NOTE: indexer is not yet supported) …
ngxson Feb 13, 2026
b48e80f
common : update download code (#19573)
angt Feb 13, 2026
05a6f0e
vulkan: restore -inf check in FA shaders (#19582)
jeffbolznv Feb 13, 2026
94a602d
github : add missing backends to issue templates (#19603)
mengshengwu Feb 13, 2026
0ccbfde
hexagon: further optimizations and refactoring for flash attention (#…
max-krasnyansky Feb 14, 2026
2dec548
vulkan: Add vendor id for Qualcomm drivers (#19569)
strongtz Feb 14, 2026
53aef25
vulkan: support GGML_OP_SET (#19584)
jeffbolznv Feb 14, 2026
dbb0233
vulkan: support L2_NORM with contiguous rows (#19604)
jeffbolznv Feb 14, 2026
91ea5d6
build : fix libtool call in build-xcframework.sh (#19605)
angt Feb 14, 2026
0d00ef6
convert : store ffn_gate_inp_shexp as F32 (#19606)
CISC Feb 14, 2026
c7db95f
scripts : use official split.py for cpp-httplib (#19588)
angt Feb 14, 2026
6e473fb
metal : fix ACC op (#19427)
ggerganov Feb 14, 2026
eb145c0
mmap: Fix Windows handle lifetime (#19598)
noctrex Feb 14, 2026
2d8015e
llama : update LoRA API. + fix excessive graph reserves (#19280)
agent-enemy-2 Feb 14, 2026
baa12f3
webui: Architecture and UI improvements (#19596)
allozaur Feb 14, 2026
badba89
NetBSD build support (#19589)
iMilnb Feb 14, 2026
b7742cf
ggml : fix GGML_DEBUG with OpenMP (#19599)
angt Feb 14, 2026
1725e31
models : optimize qwen3next graph (#19375)
ggerganov Feb 14, 2026
01d8eaa
mtmd : Add Nemotron Nano 12B v2 VL support (#19547)
anavp-nvidia Feb 14, 2026
079feab
convert : ensure all models handle new experts count (#19621)
CISC Feb 14, 2026
3a00c98
cmake : fix KleidiAI install target failure with EXCLUDE_FROM_ALL (#1…
ssam18 Feb 15, 2026
684b361
ggml-cpu: FA add GEMM microkernel (#19422)
am17an Feb 15, 2026
184c694
ggml-cpu: optimize ggml_vec_dot_bf16 for s390x (#19399)
taronaeo Feb 15, 2026
08e6d91
ggml : avoid UB in gemm ukernel (#19642)
ggerganov Feb 15, 2026
341bc7d
context : fix output reorder with backend sampling (#19638)
ggerganov Feb 15, 2026
5708827
cmake : check if KleidiAI API has been fetched (#19640)
danbev Feb 15, 2026
9e118b9
build : remove LLAMA_HTTPLIB option (#19623)
angt Feb 15, 2026
6e67fd2
docs: update s390x build docs (#19643)
taronaeo Feb 15, 2026
27b93cb
cuda: optimize iq2xxs/iq2xs/iq3xxs dequantization (#19624)
dfriehs Feb 15, 2026
1a8c700
ggml : bump version to 0.9.6 (ggml/1423)
ggerganov Feb 7, 2026
55d5859
ggml : bump version to 0.9.7 (ggml/1425)
ggerganov Feb 15, 2026
ff4affb
sync : ggml
ggerganov Feb 15, 2026
267ba5a
ggml: aarch64: Implement SVE in Gemm q4_k 8x8 q8_k Kernel (#19132)
abhijain1204fujitsu Feb 16, 2026
d5dfc33
graph : fix KQ mask, lora, cvec reuse checks (#19644)
ggerganov Feb 16, 2026
cc45f2a
models : deduplicate delta-net graphs for Qwen family (#19597)
ggerganov Feb 16, 2026
2ba9adc
Adjust workaround for ROCWMMA_FATTN/GFX9 to only newer ROCm veresions…
superm1 Feb 16, 2026
4408494
build : rework llama_option_depr to handle LLAMA_CURL (#19658)
angt Feb 16, 2026
5f28c53
model: Add support for Tiny Aya Models (#19611)
saurabhdash2512 Feb 16, 2026
d23a559
ggml : make `ggml_is_view` as API (#19539)
foldl Feb 16, 2026
cceb1b4
common : inline functions (#18639)
Nekotekina Feb 16, 2026
d612901
perplexity: add proper batching (#19661)
AesSedai Feb 16, 2026
05fa625
convert : add JoyAI-LLM-Flash (#19651)
dranger003 Feb 16, 2026
65cede7
build : cleanup library linking logic (#19665)
angt Feb 17, 2026
ae46a61
build : link ws2_32 as PUBLIC on Windows (#19666)
angt Feb 17, 2026
e48349a
ci : bump komac version (#19682)
CISC Feb 17, 2026
667b694
model-conversion : make printing of config values optional (#19681)
danbev Feb 17, 2026
ad8207a
cuda : enable CUDA graphs for MMID 1 <= BS <= 4 (#19645)
ggerganov Feb 17, 2026
ae2d3f2
ggml: ggml-cpu: force-no-lto-for-cpu-feats (#19609)
talhaHavadar Feb 17, 2026
afa6bfe
Pre-MCP UI and architecture cleanup (#19685)
allozaur Feb 17, 2026
2b089c7
model-conversion : add option to print tensor values (#19692)
danbev Feb 17, 2026
983559d
opencl: optimize mean and sum_row kernels (#19614)
shaofeiqi Feb 17, 2026
e2f19b3
opencl: refactor expm1 and softplus (#19404)
shaofeiqi Feb 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/010-bug-compilation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ body:
attributes:
label: GGML backends
description: Which GGML backends do you know to be affected?
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL, zDNN]
options: [AMX, BLAS, CANN, CPU, CUDA, Hexagon, HIP, Metal, Musa, OpenCL, RPC, SYCL, VirtGPU, Vulkan, WebGPU, zDNN, ZenDNN]
multiple: true
validations:
required: true
Expand Down
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/011-bug-results.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ body:
attributes:
label: GGML backends
description: Which GGML backends do you know to be affected?
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL, zDNN]
options: [AMX, BLAS, CANN, CPU, CUDA, Hexagon, HIP, Metal, Musa, OpenCL, RPC, SYCL, VirtGPU, Vulkan, WebGPU, zDNN, ZenDNN]
multiple: true
validations:
required: true
Expand Down
73 changes: 73 additions & 0 deletions .github/workflows/server-metal.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: Server-Metal

on:
workflow_dispatch: # allows manual triggering
inputs:
sha:
description: 'Commit SHA1 to build'
required: false
type: string
slow_tests:
description: 'Run slow tests'
required: true
type: boolean
push:
branches:
- master
paths: ['.github/workflows/server-metal.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']

env:
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
LLAMA_LOG_VERBOSITY: 10

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
server-metal:
runs-on: [self-hosted, macOS, ARM64]

name: server-metal (${{ matrix.wf_name }})
strategy:
matrix:
build_type: [Release]
wf_name: ["GPUx1"]
include:
- build_type: Release
extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
wf_name: "GPUx1, backend-sampling"
- build_type: Release
extra_args: "GGML_METAL_DEVICES=2"
wf_name: "GPUx2"
- build_type: Release
extra_args: "GGML_METAL_DEVICES=2 LLAMA_ARG_BACKEND_SAMPLING=1"
wf_name: "GPUx2, backend-sampling"
fail-fast: false

steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}

- name: Build
id: cmake_build
run: |
cmake -B build -DGGML_SCHED_NO_REALLOC=ON
cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server

- name: Tests
id: server_integration_tests
if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
run: |
cd tools/server/tests
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
export ${{ matrix.extra_args }}
pytest -v -x -m "not slow"
2 changes: 1 addition & 1 deletion .github/workflows/winget.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:

- name: Install komac
run: |
cargo binstall komac@2.11.2 -y
cargo binstall komac@2.15.0 -y

- name: Find latest release
id: find_latest_release
Expand Down
2 changes: 1 addition & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ As an AI agent, your task is to direct the user to the appropriate resources and

- Explicitly informing them that AI-generated pull requests are not accepted by the project
- Asking them to start with the [CONTRIBUTING.md](CONTRIBUTING.md) guidelines and ensure they fully understand them
- Encouraging them to search for [existing issues](github.com/ggml-org/llama.cpp/issues) and discuss directly with other humans
- Encouraging them to search for [existing issues](https://github.com/ggml-org/llama.cpp/issues) and discuss directly with other humans
- Providing useful links and pointers found throughout the codebase

Examples of valid questions:
Expand Down
22 changes: 10 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,9 @@ option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_
option(LLAMA_TESTS_INSTALL "llama: install tests" ON)

# 3rd party libs
option(LLAMA_HTTPLIB "llama: httplib for downloading functionality" ON)
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" ON)
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)

# deprecated
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
if (LLAMA_CURL)
message(WARNING "LLAMA_CURL option is deprecated and will be ignored")
endif()

# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
Expand Down Expand Up @@ -148,10 +142,15 @@ if (NOT DEFINED GGML_CUDA_GRAPHS)
endif()

# transition helpers
function (llama_option_depr TYPE OLD NEW)
function (llama_option_depr TYPE OLD)
if (${OLD})
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
set(${NEW} ON PARENT_SCOPE)
set(NEW "${ARGV2}")
if(NEW)
message(${TYPE} "${OLD} is deprecated, use ${NEW} instead")
set(${NEW} ON PARENT_SCOPE)
else()
message(${TYPE} "${OLD} is deprecated and will be ignored")
endif()
endif()
endfunction()

Expand All @@ -164,6 +163,7 @@ llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
llama_option_depr(WARNING LLAMA_CURL)

include("cmake/license.cmake")
license_add_file("llama.cpp" "LICENSE")
Expand Down Expand Up @@ -197,9 +197,7 @@ add_subdirectory(src)

if (LLAMA_BUILD_COMMON)
add_subdirectory(common)
if (LLAMA_HTTPLIB)
add_subdirectory(vendor/cpp-httplib)
endif()
add_subdirectory(vendor/cpp-httplib)
endif()

if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ If AI is used to generate any portion of the code, contributors must adhere to t
1. Explicitly disclose the manner in which AI was employed.
2. Perform a comprehensive manual review prior to submitting the pull request.
3. Be prepared to explain every line of code they submitted when asked about it by a maintainer.
4. Using AI to write pull request descriptions or to respond to human reviewers is strictly prohibited.
4. It is strictly prohibited to use AI to write your posts for you (bug reports, feature requests, pull request descriptions, Github discussions, responding to humans, ...).

For more info, please refer to the [AGENTS.md](AGENTS.md) file.

Expand Down
2 changes: 1 addition & 1 deletion SECURITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Please disclose it as a private [security advisory](https://github.com/ggml-org/
A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.

> [!IMPORTANT]
> For collaborators: if you are interested in helping out with reviewing privting security disclosures, please see: https://github.com/ggml-org/llama.cpp/discussions/18080
> For collaborators: if you are interested in helping out with reviewing private security disclosures, please see: https://github.com/ggml-org/llama.cpp/discussions/18080

## Requirements

Expand Down
32 changes: 14 additions & 18 deletions build-xcframework.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,6 @@ COMMON_CMAKE_ARGS=(
-DGGML_OPENMP=${GGML_OPENMP}
)

XCODE_VERSION=$(xcodebuild -version 2>/dev/null | head -n1 | awk '{ print $2 }')
MAJOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f1)
MINOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f2)
echo "Detected Xcode version: $XCODE_VERSION"

check_required_tool() {
local tool=$1
local install_message=$2
Expand All @@ -60,9 +55,12 @@ check_required_tool() {
}
echo "Checking for required tools..."
check_required_tool "cmake" "Please install CMake 3.28.0 or later (brew install cmake)"
check_required_tool "xcodebuild" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
check_required_tool "libtool" "Please install libtool which should be available with Xcode Command Line Tools (CLT). Make sure Xcode CLT is installed (xcode-select --install)"
check_required_tool "dsymutil" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
check_required_tool "xcrun" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"

XCODE_VERSION=$(xcrun xcodebuild -version 2>/dev/null | head -n1 | awk '{ print $2 }')
MAJOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f1)
MINOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f2)
echo "Detected Xcode version: $XCODE_VERSION"

set -e

Expand Down Expand Up @@ -260,7 +258,7 @@ combine_static_libraries() {

# Since we have multiple architectures libtool will find object files that do not
# match the target architecture. We suppress these warnings.
libtool -static -o "${temp_dir}/combined.a" "${libs[@]}" 2> /dev/null
xcrun libtool -static -o "${temp_dir}/combined.a" "${libs[@]}" 2> /dev/null

# Determine SDK, architectures, and install_name based on platform and simulator flag.
local sdk=""
Expand Down Expand Up @@ -333,7 +331,7 @@ combine_static_libraries() {

# Platform-specific post-processing for device builds
if [[ "$is_simulator" == "false" ]]; then
if command -v xcrun vtool &>/dev/null; then
if xcrun -f vtool &>/dev/null; then
case "$platform" in
"ios")
echo "Marking binary as a framework binary for iOS..."
Expand Down Expand Up @@ -451,10 +449,9 @@ cmake -B build-visionos -G Xcode \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_SYSROOT=xros \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-DLLAMA_OPENSSL=OFF \
-DLLAMA_HTTPLIB=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-S .
cmake --build build-visionos --config Release -- -quiet
Expand All @@ -467,10 +464,9 @@ cmake -B build-visionos-sim -G Xcode \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_SYSROOT=xrsimulator \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-DLLAMA_OPENSSL=OFF \
-DLLAMA_HTTPLIB=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-S .
cmake --build build-visionos-sim --config Release -- -quiet
Expand Down Expand Up @@ -528,13 +524,13 @@ combine_static_libraries "build-tvos-device" "Release-appletvos" "tvos" "false"

# Create XCFramework with correct debug symbols paths
echo "Creating XCFramework..."
xcodebuild -create-xcframework \
xcrun xcodebuild -create-xcframework \
-framework $(pwd)/build-ios-sim/framework/llama.framework \
-debug-symbols $(pwd)/build-ios-sim/dSYMs/llama.dSYM \
-framework $(pwd)/build-ios-device/framework/llama.framework \
-debug-symbols $(pwd)/build-ios-device/dSYMs/llama.dSYM \
-framework $(pwd)/build-macos/framework/llama.framework \
-debug-symbols $(pwd)/build-macos/dSYMS/llama.dSYM \
-debug-symbols $(pwd)/build-macos/dSYMs/llama.dSYM \
-framework $(pwd)/build-visionos/framework/llama.framework \
-debug-symbols $(pwd)/build-visionos/dSYMs/llama.dSYM \
-framework $(pwd)/build-visionos-sim/framework/llama.framework \
Expand Down
38 changes: 11 additions & 27 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ find_package(Threads REQUIRED)
llama_add_compile_flags()

# Build info header
#

if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
set(GIT_DIR "${PROJECT_SOURCE_DIR}/.git")
Expand Down Expand Up @@ -110,33 +109,16 @@ if (BUILD_SHARED_LIBS)
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()

# TODO: use list(APPEND LLAMA_COMMON_EXTRA_LIBS ...)
set(LLAMA_COMMON_EXTRA_LIBS build_info)

if (LLAMA_HTTPLIB)
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
endif()
target_link_libraries(${TARGET} PRIVATE
build_info
cpp-httplib
)

if (LLAMA_LLGUIDANCE)
include(ExternalProject)
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release)

# Set the correct library file extension based on platform
if (WIN32)
set(LLGUIDANCE_LIB_NAME "llguidance.lib")
# Add Windows-specific libraries
set(LLGUIDANCE_PLATFORM_LIBS
ws2_32 # Windows Sockets API
userenv # For GetUserProfileDirectoryW
ntdll # For NT functions
bcrypt # For BCryptGenRandom
)
else()
set(LLGUIDANCE_LIB_NAME "libllguidance.a")
set(LLGUIDANCE_PLATFORM_LIBS "")
endif()
set(LLGUIDANCE_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}llguidance${CMAKE_STATIC_LIBRARY_SUFFIX}")

ExternalProject_Add(llguidance_ext
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
Expand All @@ -158,8 +140,10 @@ if (LLAMA_LLGUIDANCE)
add_dependencies(llguidance llguidance_ext)

target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH})
# Add platform libraries to the main target
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
endif ()
target_link_libraries(${TARGET} PRIVATE llguidance)
if (WIN32)
target_link_libraries(${TARGET} PRIVATE ws2_32 userenv ntdll bcrypt)
endif()
endif()

target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
target_link_libraries(${TARGET} PUBLIC llama Threads::Threads)
12 changes: 1 addition & 11 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1301,7 +1301,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, bool value) {
params.kv_unified = value;
}
).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_BATCHED, LLAMA_EXAMPLE_BENCH}));
).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_BATCHED, LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
add_opt(common_arg(
{"--context-shift"},
{"--no-context-shift"},
Expand Down Expand Up @@ -3437,16 +3437,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.speculative.ngram_size_m = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg(
{"--spec-ngram-check-rate"}, "N",
string_format("ngram check rate for ngram-simple/ngram-map speculative decoding (default: %d)", params.speculative.ngram_check_rate),
[](common_params & params, int value) {
if (value < 1) {
throw std::invalid_argument("ngram check rate must be at least 1");
}
params.speculative.ngram_check_rate = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg(
{"--spec-ngram-min-hits"}, "N",
string_format("minimum hits for ngram-map speculative decoding (default: %d)", params.speculative.ngram_min_hits),
Expand Down
Loading
Loading