Skip to content

Commit

Permalink
Merge pull request #1 from stackhpc/feature/pydantic
Browse files Browse the repository at this point in the history
Refactor to use Pydantic-validated config file for frontend web app
  • Loading branch information
sd109 authored Dec 21, 2023
2 parents c9e65c9 + 9dc5974 commit de4324c
Show file tree
Hide file tree
Showing 13 changed files with 172 additions and 191 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/build-push-artifacts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,15 @@ jobs:
type=sha,prefix=
- name: Build and push image
uses: stackhpc/github-actions/docker-multiarch-build-push@master
uses: stackhpc/github-actions/docker-multiarch-build-push@allow-continue-after-scan
with:
cache-key: ${{ matrix.component }}-base
context: ./images/${{ matrix.component }}-base
platforms: linux/amd64 #,linux/arm64
push: true
tags: ${{ steps.image-meta.outputs.tags }}
labels: ${{ steps.image-meta.outputs.labels }}
fail_on_high_severity_cve: false

build_push_chart:
name: Build and push Helm chart
Expand Down
6 changes: 4 additions & 2 deletions .helmignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
# Others
README.md
Dockerfile
gradio-app.py
kubeconfig.yml
venv/
__pycache__/
images/
images/
.hf-token.secret
hu-poc/
test-values.yaml
59 changes: 0 additions & 59 deletions app-dev.py

This file was deleted.

2 changes: 1 addition & 1 deletion images/ui-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ FROM python:3.11-slim

ENV GRADIO_SERVER_PORT=7680

RUN pip install --no-cache-dir gradio==3.50.2 huggingface-hub==0.18.0
RUN pip install --no-cache-dir gradio==4.10.0 huggingface-hub==0.19.4 pydantic-settings==2.1.0
4 changes: 3 additions & 1 deletion templates/ui/app-config-map.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ metadata:
labels:
{{- include "azimuth-llm.labels" . | nindent 4 }}
data:
{{ (.Files.Glob "web-app-utils/*").AsConfig | nindent 2 }}
{{ (.Files.Glob "web-app/*").AsConfig | nindent 2 }}
settings.yml: |
{{- .Values.ui.appSettings | toYaml | nindent 4 }}
3 changes: 2 additions & 1 deletion templates/ui/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ spec:
ports:
- name: ui
containerPort: 7680
workingDir: /etc/web-app
volumeMounts:
- name: app
mountPath: /etc/web-app
command:
- python
args:
- {{ printf "/etc/web-app/%s" .Values.ui.entrypoint }}
- {{ .Values.ui.entrypoint }}
env:
- name: PYTHONUNBUFFERED
value: "1"
Expand Down
12 changes: 8 additions & 4 deletions values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ huggingface:
api:
# Container image config
image:
repository: ghcr.io/stackhpc/azimuth-llm-api-base
version: "6876068"
repository: vllm/vllm-openai
version: v0.2.4
# Service config
service:
name: llm-backend
Expand Down Expand Up @@ -65,8 +65,12 @@ api:

# Configuration for the frontend web interface
ui:
# The file from the UI config map to execute as the frontend app
entrypoint: example_app_vanilla.py
# The file from the UI config map to execute as the entrypoint to the frontend app
entrypoint: app.py
# The values to be written to settings.yml for parsing as frontend app setting
# (see example_app.py and config.py for example using pydantic-settings to configure app)
appSettings:
prompt_template: ""
# Container image config
image:
repository: ghcr.io/stackhpc/azimuth-llm-ui-base
Expand Down
58 changes: 0 additions & 58 deletions web-app-utils/example_app_playful.py

This file was deleted.

62 changes: 0 additions & 62 deletions web-app-utils/example_app_vanilla.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import requests, time
from urllib.parse import urljoin


def wait_for_backend(url):
Expand All @@ -7,10 +8,11 @@ def wait_for_backend(url):
accept requests until the backend API is up and running.
"""
ready = False
endpoint = urljoin(url, "/health")
while not ready:
try:
ready = requests.get(f"{url}/health").status_code == 200
print("Waiting for backend API to start")
ready = requests.get(endpoint).status_code == 200
print(f"Waiting for 200 status from backend API at {endpoint}")
time.sleep(1)
except requests.exceptions.ConnectionError as e:
pass
Expand Down
60 changes: 60 additions & 0 deletions web-app/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import requests, json
from urllib.parse import urljoin
import gradio as gr
from api_startup_check import wait_for_backend
from config import AppSettings

settings = AppSettings.load("./settings.yml")

backend_url = str(settings.backend_url)
wait_for_backend(backend_url)


def inference(message, history):
context = ""
for user_input, system_response in history:
if settings.include_past_system_responses_in_context:
context += settings.user_context_template.format(user_input=user_input)
if settings.include_past_system_responses_in_context:
context += settings.system_context_template.format(
system_response=system_response
)
context += settings.user_context_template.format(user_input=message)

headers = {"User-Agent": "vLLM Client"}
payload = {
"prompt": settings.prompt_template.format(context=context),
"stream": True,
"max_tokens": settings.llm_max_tokens,
**settings.llm_params,
}
response = requests.post(
urljoin(backend_url, "/generate"), headers=headers, json=payload, stream=True
)

for chunk in response.iter_lines(
chunk_size=8192, decode_unicode=False, delimiter=b"\0"
):
if chunk:
data = json.loads(chunk.decode("utf-8"))
output = data["text"][0]
# Manually trim the context from output
delimiter = settings.prompt_template.splitlines()[-1]
if delimiter in output:
output = output.split(delimiter)[-1]
yield output


gr.ChatInterface(
inference,
chatbot=gr.Chatbot(
height=500,
show_copy_button=True,
# layout='panel',
),
textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
title=settings.page_title,
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
).queue().launch(server_name="0.0.0.0")
Loading

0 comments on commit de4324c

Please sign in to comment.