Skip to content

Commit 1120847

Browse files
authored
feat: bump llama.cpp, add gguf support (#943)
**Description** This PR syncs up the `llama` backend to use `gguf` (go-skynet/go-llama.cpp#180). It also adds `llama-stable` to the targets so we can still load ggml. It adapts the current tests to use the `llama-backend` for ggml and uses a `gguf` model to run tests on the new backend. In order to consume the new version of go-llama.cpp, it also bump go to 1.21 (images, pipelines, etc) --------- Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 704323b commit 1120847

File tree

7 files changed

+89
-16
lines changed

7 files changed

+89
-16
lines changed

Diff for: .github/workflows/release.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ jobs:
2222
uses: actions/checkout@v3
2323
with:
2424
submodules: true
25+
- uses: actions/setup-go@v4
26+
with:
27+
go-version: '>=1.21.0'
2528
- name: Dependencies
2629
run: |
2730
sudo apt-get update
@@ -60,6 +63,9 @@ jobs:
6063
uses: actions/checkout@v3
6164
with:
6265
submodules: true
66+
- uses: actions/setup-go@v4
67+
with:
68+
go-version: '>=1.21.0'
6369
- name: Build
6470
id: build
6571
env:

Diff for: .github/workflows/test.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
runs-on: ubuntu-latest
1919
strategy:
2020
matrix:
21-
go-version: ['1.20.x', 'stable']
21+
go-version: ['1.21.x']
2222
steps:
2323
- name: Clone
2424
uses: actions/checkout@v3
@@ -63,7 +63,7 @@ jobs:
6363
runs-on: macOS-latest
6464
strategy:
6565
matrix:
66-
go-version: ['1.20.x', 'stable']
66+
go-version: ['1.21.x']
6767
steps:
6868
- name: Clone
6969
uses: actions/checkout@v3

Diff for: Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG GO_VERSION=1.20-bullseye
1+
ARG GO_VERSION=1.21-bullseye
22

33
FROM golang:$GO_VERSION as requirements
44

Diff for: Makefile

+8-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ GOVET=$(GOCMD) vet
44
BINARY_NAME=local-ai
55

66
# llama.cpp versions
7-
GOLLAMA_VERSION?=f03869d188b72c8a617bea3a36cf8eb43f73445c
7+
GOLLAMA_VERSION?=0ef04cde78e5da41de234832d73bb768ced709e7
88

99
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
1010

@@ -103,7 +103,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
103103
OPTIONAL_GRPC+=backend-assets/grpc/piper
104104
endif
105105

106-
GRPC_BACKENDS?=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
106+
GRPC_BACKENDS?=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
107107

108108
.PHONY: all test build vendor
109109

@@ -302,9 +302,10 @@ test: prepare test-models/testmodel grpcs
302302
export GO_TAGS="tts stablediffusion"
303303
$(MAKE) prepare-test
304304
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
305-
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama" --flake-attempts 5 -v -r ./api ./pkg
305+
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 -v -r ./api ./pkg
306306
$(MAKE) test-gpt4all
307307
$(MAKE) test-llama
308+
$(MAKE) test-llama-gguf
308309
$(MAKE) test-tts
309310
$(MAKE) test-stablediffusion
310311

@@ -316,6 +317,10 @@ test-llama: prepare-test
316317
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
317318
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r ./api ./pkg
318319

320+
test-llama-gguf: prepare-test
321+
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
322+
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r ./api ./pkg
323+
319324
test-tts: prepare-test
320325
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
321326
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r ./api ./pkg

Diff for: api/api_test.go

+71-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ var _ = Describe("API test", func() {
296296
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
297297
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
298298
Name: "openllama_3b",
299-
Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
299+
Overrides: map[string]interface{}{"backend": "llama-stable", "mmap": true, "f16": true, "context_size": 128},
300300
})
301301

302302
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -359,6 +359,76 @@ var _ = Describe("API test", func() {
359359
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
360360
})
361361

362+
It("runs openllama gguf", Label("llama-gguf"), func() {
363+
if runtime.GOOS != "linux" {
364+
Skip("test supported only on linux")
365+
}
366+
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
367+
URL: "github:go-skynet/model-gallery/openllama-3b-gguf.yaml",
368+
Name: "openllama_3b_gguf",
369+
Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
370+
})
371+
372+
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
373+
374+
uuid := response["uuid"].(string)
375+
376+
Eventually(func() bool {
377+
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
378+
return response["processed"].(bool)
379+
}, "360s", "10s").Should(Equal(true))
380+
381+
By("testing completion")
382+
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b_gguf", Prompt: "Count up to five: one, two, three, four, "})
383+
Expect(err).ToNot(HaveOccurred())
384+
Expect(len(resp.Choices)).To(Equal(1))
385+
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
386+
387+
By("testing functions")
388+
resp2, err := client.CreateChatCompletion(
389+
context.TODO(),
390+
openai.ChatCompletionRequest{
391+
Model: "openllama_3b_gguf",
392+
Messages: []openai.ChatCompletionMessage{
393+
{
394+
Role: "user",
395+
Content: "What is the weather like in San Francisco (celsius)?",
396+
},
397+
},
398+
Functions: []openai.FunctionDefinition{
399+
openai.FunctionDefinition{
400+
Name: "get_current_weather",
401+
Description: "Get the current weather",
402+
Parameters: jsonschema.Definition{
403+
Type: jsonschema.Object,
404+
Properties: map[string]jsonschema.Definition{
405+
"location": {
406+
Type: jsonschema.String,
407+
Description: "The city and state, e.g. San Francisco, CA",
408+
},
409+
"unit": {
410+
Type: jsonschema.String,
411+
Enum: []string{"celcius", "fahrenheit"},
412+
},
413+
},
414+
Required: []string{"location"},
415+
},
416+
},
417+
},
418+
})
419+
Expect(err).ToNot(HaveOccurred())
420+
Expect(len(resp2.Choices)).To(Equal(1))
421+
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
422+
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
423+
424+
var res map[string]string
425+
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
426+
Expect(err).ToNot(HaveOccurred())
427+
Expect(res["location"]).To(Equal("San Francisco, California"), fmt.Sprint(res))
428+
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
429+
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
430+
})
431+
362432
It("runs gpt4all", Label("gpt4all"), func() {
363433
if runtime.GOOS != "linux" {
364434
Skip("test supported only on linux")

Diff for: go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module github.com/go-skynet/LocalAI
22

3-
go 1.20
3+
go 1.21
44

55
require (
66
github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df

Diff for: pkg/backend/llm/llama/llama.go

-8
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,6 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
3232
llama.WithRopeFreqScale(ropeFreqScale),
3333
}
3434

35-
if opts.NGQA != 0 {
36-
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
37-
}
38-
39-
if opts.RMSNormEps != 0 {
40-
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
41-
}
42-
4335
if opts.ContextSize != 0 {
4436
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
4537
}

0 commit comments

Comments
 (0)