Skip to content

Commit 7c8c460

Browse files
committed
changes for mistral model
1 parent 03a2034 commit 7c8c460

File tree

12 files changed

+777
-1
lines changed

12 files changed

+777
-1
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
apiVersion: v1
2+
kind: Namespace
3+
metadata:
4+
name: gradio-mistral-tran1
5+
---
6+
apiVersion: apps/v1
7+
kind: Deployment
8+
metadata:
9+
name: gradio-deployment
10+
namespace: gradio-mistral-tran1
11+
labels:
12+
app: gradio
13+
spec:
14+
replicas: 1
15+
selector:
16+
matchLabels:
17+
app: gradio
18+
template:
19+
metadata:
20+
labels:
21+
app: gradio
22+
spec:
23+
containers:
24+
- name: gradio
25+
image: public.ecr.aws/data-on-eks/gradio-web-app-base:latest
26+
imagePullPolicy: IfNotPresent
27+
ports:
28+
- containerPort: 7860
29+
resources:
30+
requests:
31+
cpu: "512m"
32+
memory: "2048Mi"
33+
limits:
34+
cpu: "1"
35+
memory: "4096Mi"
36+
env:
37+
- name: MODEL_ENDPOINT
38+
value: "/infer"
39+
- name: SERVICE_NAME
40+
value: "http://mistral-serve-svc.mistral.svc.cluster.local:8000"
41+
volumeMounts:
42+
- name: gradio-app-script
43+
mountPath: /app/gradio-app.py
44+
subPath: gradio-app-mistral-tran1.py
45+
volumes:
46+
- name: gradio-app-script
47+
configMap:
48+
name: gradio-app-script
49+
---
50+
apiVersion: v1
51+
kind: Service
52+
metadata:
53+
name: gradio-service
54+
namespace: gradio-mistral-tran1
55+
annotations:
56+
service.beta.kubernetes.io/aws-load-balancer-type: external
57+
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
58+
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
59+
spec:
60+
selector:
61+
app: gradio
62+
ports:
63+
- name: http
64+
protocol: TCP
65+
port: 80
66+
targetPort: 7860
67+
type: LoadBalancer
68+
---
69+
apiVersion: v1
70+
kind: ConfigMap
71+
metadata:
72+
name: gradio-app-script
73+
namespace: gradio-mistral-tran1
74+
data:
75+
gradio-app-mistral-tran1.py: |
76+
import gradio as gr
77+
import requests
78+
import os
79+
80+
# Constants for model endpoint and service name
81+
model_endpoint = "/infer"
82+
service_name = os.environ.get("SERVICE_NAME", "http://localhost:8000")
83+
84+
# Function to generate text
85+
def text_generation(message, history):
86+
prompt = message
87+
88+
# Create the URL for the inference
89+
url = f"{service_name}{model_endpoint}"
90+
91+
try:
92+
# Send the request to the model service
93+
response = requests.get(url, params={"sentence": prompt}, timeout=180)
94+
response.raise_for_status() # Raise an exception for HTTP errors
95+
96+
full_output = response.json()[0]
97+
# Removing the original question from the output
98+
answer_only = full_output.replace(prompt, "", 1).strip('["]?\n')
99+
100+
# Safety filter to remove harmful or inappropriate content
101+
answer_only = filter_harmful_content(answer_only)
102+
return answer_only
103+
except requests.exceptions.RequestException as e:
104+
# Handle any request exceptions (e.g., connection errors)
105+
return f"AI: Error: {str(e)}"
106+
107+
# Define the safety filter function (you can implement this as needed)
108+
def filter_harmful_content(text):
109+
# TODO: Implement a safety filter to remove any harmful or inappropriate content from the text
110+
111+
# For now, simply return the text as-is
112+
return text
113+
114+
# Define the Gradio ChatInterface
115+
chat_interface = gr.ChatInterface(
116+
text_generation,
117+
chatbot=gr.Chatbot(line_breaks=True),
118+
textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
119+
title="neuron-mistral7bv0.3 AI Chat",
120+
description="Ask me any question",
121+
theme="soft",
122+
examples=["How many languages are in India", "What is Generative AI?"],
123+
cache_examples=False,
124+
retry_btn=None,
125+
undo_btn="Delete Previous",
126+
clear_btn="Clear",
127+
)
128+
129+
# Launch the ChatInterface
130+
chat_interface.launch(server_name="0.0.0.0")
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
resources:
4+
- gradio-ui.yaml

manifests/modules/aiml/chatbot/nodepool/kustomization.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ kind: Kustomization
33
resources:
44
- nodepool-inf2.yaml
55
- nodepool-x86.yaml
6+
- nodepool-tran1.yaml
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
apiVersion: karpenter.sh/v1
2+
kind: NodePool
3+
metadata:
4+
name: trainium-tran1
5+
spec:
6+
template:
7+
metadata:
8+
labels:
9+
instanceType: trainium
10+
provisionerType: Karpenter
11+
neuron.amazonaws.com/neuron-device: "true"
12+
spec:
13+
startupTaints:
14+
- key: node.kubernetes.io/not-ready
15+
effect: "NoExecute"
16+
taints:
17+
- key: aws.amazon.com/neuron
18+
effect: "NoSchedule"
19+
requirements:
20+
- key: node.kubernetes.io/instance-type
21+
operator: In
22+
values: ["trn1.2xlarge"]
23+
- key: "kubernetes.io/arch"
24+
operator: In
25+
values: ["amd64"]
26+
- key: "karpenter.sh/capacity-type"
27+
operator: In
28+
values: ["on-demand"]
29+
expireAfter: 720h
30+
terminationGracePeriod: 24h
31+
nodeClassRef:
32+
group: karpenter.k8s.aws
33+
kind: EC2NodeClass
34+
name: trainium-tran1
35+
limits:
36+
cpu: 100
37+
memory: 400Gi
38+
aws.amazon.com/neuron: 10
39+
disruption:
40+
consolidateAfter: 300s
41+
consolidationPolicy: WhenEmptyOrUnderutilized
42+
43+
---
44+
apiVersion: karpenter.k8s.aws/v1
45+
kind: EC2NodeClass
46+
metadata:
47+
name: trainium-tran1
48+
spec:
49+
amiFamily: AL2
50+
amiSelectorTerms:
51+
- alias: al2@latest
52+
blockDeviceMappings:
53+
- deviceName: /dev/xvda
54+
ebs:
55+
deleteOnTermination: true
56+
encrypted: true
57+
volumeSize: 500Gi
58+
volumeType: gp3
59+
role: ${KARPENTER_NODE_ROLE}
60+
securityGroupSelectorTerms:
61+
- tags:
62+
karpenter.sh/discovery: ${EKS_CLUSTER_NAME}
63+
subnetSelectorTerms:
64+
- tags:
65+
karpenter.sh/discovery: ${EKS_CLUSTER_NAME}
66+
tags:
67+
app.kubernetes.io/created-by: eks-workshop

manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ spec:
1111
provisionerType: Karpenter
1212
workload: rayhead
1313
spec:
14+
startupTaints:
15+
- key: node.kubernetes.io/not-ready
16+
effect: "NoExecute"
1417
requirements:
1518
- key: "karpenter.k8s.aws/instance-family"
1619
operator: In
@@ -20,7 +23,7 @@ spec:
2023
values: ["amd64"]
2124
- key: "karpenter.sh/capacity-type"
2225
operator: In
23-
values: ["on-demand", "spot"]
26+
values: ["on-demand"]
2427
expireAfter: 720h
2528
terminationGracePeriod: 24h
2629
nodeClassRef:
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# https://hub.docker.com/layers/rayproject/ray/2.11.0-py310/images/sha256-de798e487b76a8f2412c718c43c5f342b3eb05e0705a71325102904cd27c3613?context=explore
2+
FROM rayproject/ray:2.32.0-py310
3+
4+
# Maintainer label
5+
LABEL maintainer="DoEKS"
6+
7+
# Set environment variables to non-interactive (this prevents some prompts)
8+
ENV DEBIAN_FRONTEND=non-interactive
9+
10+
# Switch to root to add Neuron repo and install necessary packages
11+
USER root
12+
13+
# Set up the Neuron repository and install Neuron packages
14+
RUN . /etc/os-release && \
15+
sudo echo "deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main" > /etc/apt/sources.list.d/neuron.list && \
16+
sudo wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - && \
17+
sudo apt-get update -y && \
18+
sudo apt-get install aws-neuronx-dkms aws-neuronx-collectives=2.* aws-neuronx-runtime-lib=2.* aws-neuronx-tools=2.* -y && \
19+
sudo apt-get clean && \
20+
sudo rm -rf /var/lib/apt/lists/*
21+
22+
23+
24+
# Switch back to a non-root user for the subsequent commands
25+
USER $USER
26+
27+
# Set pip repository pointing to the Neuron repository and install required Python packages
28+
RUN pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com && \
29+
pip install wget awscli regex neuronx-cc==2.* torch-neuronx torchvision transformers-neuronx sentencepiece transformers huggingface_hub tenacity psutil fastapi uvicorn mistral-inference mistral-common
30+
31+
32+
# Add Neuron path to PATH
33+
ENV PATH /opt/aws/neuron/bin:$PATH
34+
35+
# Set LD_LIBRARY_PATH to include the directory with libpython3.10.so.1.0
36+
ENV LD_LIBRARY_PATH /home/ray/anaconda3/lib:$LD_LIBRARY_PATH
37+
38+
# Create cache directories
39+
RUN mkdir -p /serve_app
40+
41+
# Set working directory
42+
WORKDIR /serve_app
43+
44+
COPY mistral1.py /serve_app/mistral1.py
45+
46+
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
resources:
4+
- ray_service_mistral.yaml

0 commit comments

Comments
 (0)