Skip to content

Commit 6006f68

Browse files
ericcurtinp1-0tr
authored andcommitted
Set ngl to 999 by default to align with upstream
Upstream now defaults to 999 which means use max available layer. We could try and rely on the upstream defaults, but there is one case where we manually set to 0 which makes it tricky so maybe aligning is better for now. Signed-off-by: Eric Curtin <[email protected]>
1 parent 620fba8 commit 6006f68

File tree

4 files changed

+12
-12
lines changed

4 files changed

+12
-12
lines changed

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ help:
7575
@echo " help - Show this help message"
7676
@echo ""
7777
@echo "Backend configuration options:"
78-
@echo " LLAMA_ARGS - Arguments for llama.cpp (e.g., \"--verbose --jinja -ngl 100 --ctx-size 2048\")"
78+
@echo " LLAMA_ARGS - Arguments for llama.cpp (e.g., \"--verbose --jinja -ngl 999 --ctx-size 2048\")"
7979
@echo ""
8080
@echo "Example usage:"
81-
@echo " make run LLAMA_ARGS=\"--verbose --jinja -ngl 100 --ctx-size 2048\""
82-
@echo " make docker-run LLAMA_ARGS=\"--verbose --jinja -ngl 100 --threads 4 --ctx-size 2048\""
81+
@echo " make run LLAMA_ARGS=\"--verbose --jinja -ngl 999 --ctx-size 2048\""
82+
@echo " make docker-run LLAMA_ARGS=\"--verbose --jinja -ngl 999 --threads 4 --ctx-size 2048\""

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ func (l *llamaCpp) GetRequiredMemoryForModel(ctx context.Context, model string,
256256
if runtime.GOOS == "windows" && runtime.GOARCH == "arm64" && mdlConfig.Quantization != "Q4_0" {
257257
ngl = 0 // only Q4_0 models can be accelerated on Adreno
258258
}
259-
ngl = 100
259+
ngl = 999
260260
}
261261

262262
// TODO(p1-0tr): for now assume we are running on GPU (single one) - Devices[1];

pkg/inference/backends/llamacpp/llamacpp_config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ type Config struct {
1818

1919
// NewDefaultLlamaCppConfig creates a new LlamaCppConfig with default values.
2020
func NewDefaultLlamaCppConfig() *Config {
21-
args := append([]string{"--jinja", "-ngl", "100", "--metrics"})
21+
args := append([]string{"--jinja", "-ngl", "999", "--metrics"})
2222

2323
// Special case for Windows ARM64
2424
if runtime.GOOS == "windows" && runtime.GOARCH == "arm64" {

pkg/inference/backends/llamacpp/llamacpp_config_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ func TestNewDefaultLlamaCppConfig(t *testing.T) {
3232
if nglIndex+1 >= len(config.Args) {
3333
t.Error("No value found after -ngl argument")
3434
}
35-
if config.Args[nglIndex+1] != "100" {
36-
t.Errorf("Expected -ngl value to be 100, got %s", config.Args[nglIndex+1])
35+
if config.Args[nglIndex+1] != "999" {
36+
t.Errorf("Expected -ngl value to be 999, got %s", config.Args[nglIndex+1])
3737
}
3838

3939
// Test Windows ARM64 specific case
@@ -87,7 +87,7 @@ func TestGetArgs(t *testing.T) {
8787
},
8888
expected: []string{
8989
"--jinja",
90-
"-ngl", "100",
90+
"-ngl", "999",
9191
"--metrics",
9292
"--model", modelPath,
9393
"--host", socket,
@@ -102,7 +102,7 @@ func TestGetArgs(t *testing.T) {
102102
},
103103
expected: []string{
104104
"--jinja",
105-
"-ngl", "100",
105+
"-ngl", "999",
106106
"--metrics",
107107
"--model", modelPath,
108108
"--host", socket,
@@ -121,7 +121,7 @@ func TestGetArgs(t *testing.T) {
121121
},
122122
expected: []string{
123123
"--jinja",
124-
"-ngl", "100",
124+
"-ngl", "999",
125125
"--metrics",
126126
"--model", modelPath,
127127
"--host", socket,
@@ -143,7 +143,7 @@ func TestGetArgs(t *testing.T) {
143143
},
144144
expected: []string{
145145
"--jinja",
146-
"-ngl", "100",
146+
"-ngl", "999",
147147
"--metrics",
148148
"--model", modelPath,
149149
"--host", socket,
@@ -162,7 +162,7 @@ func TestGetArgs(t *testing.T) {
162162
},
163163
expected: []string{
164164
"--jinja",
165-
"-ngl", "100",
165+
"-ngl", "999",
166166
"--metrics",
167167
"--model", modelPath,
168168
"--host", socket,

0 commit comments

Comments
 (0)