forked from ggml-org/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdeployment.yaml
102 lines (102 loc) · 3.33 KB
/
deployment.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "server.llama.cpp.fullname" . }}
labels:
{{- include "server.llama.cpp.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "server.llama.cpp.selectorLabels" . | nindent 6 }}
template:
metadata:
annotations:
{{- include "server.llama.cpp.labels" . | nindent 8 }}
{{- if .Values.server.metrics }}
prometheus.io/scrape: 'true'
prometheus.io/port: '{{ .Values.server.port }}'
{{- end }}
{{- with .Values.podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
prometheus.io/scrape: 'true'
{{- include "server.llama.cpp.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.images.server.repository }}:{{ .Values.images.server.name }}-{{ .Values.images.server.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.images.pullPolicy }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
command:
- {{ .Values.server.command }}
args:
- --host
- {{ .Values.server.host }}
- --port
- "{{ .Values.server.port }}"
- --model
- {{ .Values.model.path }}/{{ regexReplaceAll "(.*/)?([^/]+).gguf" .Values.model.file "${2}.gguf" }}
- --cont-batching
- --alias
- {{ .Values.model.alias }}
- --ctx-size
- "{{ .Values.server.kvCache.size }}"
- --parallel
- "{{ .Values.server.slots }}"
{{- if .Values.server.embeddings }}
- --embedding
{{- end }}
{{- if .Values.server.metrics }}
- --metrics
{{- end }}
- --log-format
- {{ .Values.server.log.format }}
{{- if .Values.server.log.disabled }}
- --log-disable
{{- end }}
{{- with .Values.server.extraArgs }}
{{- toYaml . | nindent 12 }}
{{- end }}
ports:
- name: http
containerPort: {{ .Values.server.port }}
protocol: TCP
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
volumeMounts:
- mountPath: {{ .Values.model.path }}
name: models
readOnly: true
volumes:
- name: models
persistentVolumeClaim:
claimName: {{ include "server.llama.cpp.fullname" . }}
readOnly: true