forked from skypilot-org/skypilot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserve.yaml
39 lines (33 loc) · 1.2 KB
/
serve.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
resources:
accelerators: A100:1
disk_size: 1024
# Note: The disk_tier option is not offered in skypilot<=0.2.5, we need
# to install SkyPilot from source.
disk_tier: high
setup: |
conda activate chatbot
if [ $? -ne 0 ]; then
conda create -n chatbot python=3.9 -y
conda activate chatbot
fi
# Install dependencies
pip install torch==1.12.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
pip install git+https://github.com/lm-sys/FastChat.git
pip install git+https://github.com/huggingface/transformers.git@41a2f3529c6b56866c317031375ffd3e7b8bea01
echo "Downloading model..."
python3 -m fastchat.model.apply_delta \
--base huggyllama/llama-${MODEL_SIZE}b \
--target ~/vicuna-${MODEL_SIZE}b \
--delta lmsys/vicuna-${MODEL_SIZE}b-delta-v1.1
run: |
conda activate chatbot
echo 'Starting controller...'
python3 -m fastchat.serve.controller > ~/controller.log 2>&1 &
sleep 10
echo 'Starting model worker...'
python3 -m fastchat.serve.model_worker --model-path ~/vicuna-${MODEL_SIZE}b > ~/model_worker.log 2>&1 &
sleep 10
echo 'Starting gradio server...'
python3 -m fastchat.serve.gradio_web_server --share | tee ~/gradio.log
envs:
MODEL_SIZE: 7