File tree 2 files changed +42
-3
lines changed 2 files changed +42
-3
lines changed Original file line number Diff line number Diff line change @@ -6,4 +6,25 @@ metadata:
6
6
app.kubernetes.io/managed-by : kustomize
7
7
name : nimcache-sample
8
8
spec :
9
- # TODO(user): Add fields here
9
+ source :
10
+ ngc :
11
+ modelPuller : nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
12
+ pullSecret : ngc-secret
13
+ authSecret : ngc-api-secret
14
+ model :
15
+ profiles : []
16
+ autoDetect : true
17
+ precision : " fp8"
18
+ engine : " tensorrt_llm"
19
+ qosProfile : " throughput"
20
+ gpus :
21
+ product : " l40s"
22
+ ids :
23
+ - " 26b5"
24
+ tensorParallelism : " 1"
25
+ storage :
26
+ pvc :
27
+ create : true
28
+ storageClass : " local-path"
29
+ size : " 50Gi"
30
+ volumeAccessMode : ReadWriteOnce
Original file line number Diff line number Diff line change @@ -4,6 +4,24 @@ metadata:
4
4
labels :
5
5
app.kubernetes.io/name : k8s-nim-operator
6
6
app.kubernetes.io/managed-by : kustomize
7
- name : nimservice-sample
7
+ name : meta-llama3-8b-instruct
8
8
spec :
9
- # TODO(user): Add fields here
9
+ image :
10
+ repository : nvcr.io/nim/meta/llama3-8b-instruct
11
+ tag : 1.0.0
12
+ pullPolicy : IfNotPresent
13
+ pullSecrets :
14
+ - ngc-secret
15
+ authSecret : ngc-api-secret
16
+ storage :
17
+ nimCache :
18
+ name : meta-llama3-8b-instruct
19
+ profile : ' '
20
+ replicas : 1
21
+ resources :
22
+ limits :
23
+ nvidia.com/gpu : 1
24
+ expose :
25
+ service :
26
+ type : ClusterIP
27
+ openaiPort : 8000
You can’t perform that action at this time.
0 commit comments