Skip to content

Commit 478bf5b

Browse files
jeremyfowersamd-pworfolkdanielholandaramkrishna2910
committed
Lemonade release v6.0.0: new OpenAI server, improvements, fixes (#291)
Co-authored-by: amd-pworfolk <[email protected]> Co-authored-by: Daniel Holanda <[email protected]> Co-authored-by: Ramakrishnan Sivakumar <[email protected]>
1 parent 633913c commit 478bf5b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+2110
-1272
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
name: Server Installer Windows-Latest Build and Test
2+
3+
on:
4+
push:
5+
branches: ["main"]
6+
tags:
7+
- v*
8+
pull_request:
9+
branches: ["main"]
10+
workflow_dispatch:
11+
12+
jobs:
13+
make-server-installer:
14+
runs-on: windows-latest
15+
steps:
16+
- uses: actions/checkout@v4
17+
18+
- name: Install NSIS
19+
shell: PowerShell
20+
run: |
21+
# Download NSIS installer
22+
Invoke-WebRequest -UserAgent "Wget" -Uri "https://sourceforge.net/projects/nsis/files/NSIS%203/3.10/nsis-3.10-setup.exe" -OutFile "nsis.exe"
23+
24+
# Install NSIS
25+
Start-Process nsis.exe -ArgumentList '/S' -Wait
26+
27+
- name: Verify NSIS installation
28+
shell: PowerShell
29+
run: |
30+
# Check if NSIS is installed
31+
& 'C:\Program Files (x86)\NSIS\makensis.exe' /VERSION
32+
33+
- name: Build the Lemonade Server installer
34+
shell: PowerShell
35+
run: |
36+
cd installer
37+
& 'C:\Program Files (x86)\NSIS\makensis.exe' 'Installer.nsi'
38+
39+
if (Test-Path "Lemonade_Server_Installer.exe") {
40+
Write-Host "Lemonade_Server_Installer.exe has been created successfully."
41+
} else {
42+
Write-Host "Lemonade_Server_Installer.exe was not found."
43+
exit 1
44+
}
45+
46+
- name: Upload Installer
47+
uses: actions/upload-artifact@v4
48+
if: always()
49+
with:
50+
name: LemonadeServerInstaller
51+
path: |
52+
installer\Lemonade_Server_Installer.exe
53+
54+
- name: Attempt to install Lemonade Server using installer
55+
shell: cmd
56+
run: |
57+
cd installer
58+
Lemonade_Server_Installer.exe /S
59+
60+
- name: Ensure the Lemonade serer works properly
61+
shell: pwsh
62+
run: |
63+
Write-Host "Use a function to determine the underlying command from the lemonade server shortcut"
64+
function Get-ShortcutTarget {
65+
param (
66+
[string]$shortcutPath
67+
)
68+
$shell = New-Object -ComObject WScript.Shell
69+
$shortcut = $shell.CreateShortcut($shortcutPath)
70+
$targetPath = $shortcut.TargetPath
71+
$arguments = $shortcut.Arguments
72+
return "$targetPath $arguments"
73+
}
74+
75+
Write-Host "ls of install directory to make sure the server is there"
76+
ls "$HOME\AppData\Local\lemonade_server"
77+
78+
$shortcutPath = "$HOME\AppData\Local\lemonade_server\lemonade-server.lnk"
79+
$fullCommand = Get-ShortcutTarget -shortcutPath $shortcutPath
80+
81+
Write-Host "Server shortcut full command: $fullCommand"
82+
83+
$quotedCommand = "`"$fullCommand`""
84+
85+
$outputFile = "output.log"
86+
$errorFile = "error.log"
87+
$serverProcess = Start-Process -FilePath "cmd.exe" -ArgumentList "/C $quotedCommand" -RedirectStandardOutput $outputFile -RedirectStandardError $errorFile -PassThru -NoNewWindow
88+
89+
Write-Host "Wait for 30 seconds to let the server come up"
90+
Start-Sleep -Seconds 30
91+
92+
Write-Host "Check if server process successfully launched"
93+
$serverRunning = Get-Process -Id $serverProcess.Id -ErrorAction SilentlyContinue
94+
if (-not $serverRunning) {
95+
Write-Host "Error: Server process isn't running, even though we just tried to start it!"
96+
Write-Host "Standard Output:"
97+
Get-Content $outputFile
98+
99+
Write-Host "Standard Error:"
100+
Get-Content $errorFile
101+
exit 1
102+
} else {
103+
Write-Host "Server process is alive."
104+
}
105+
106+
Write-Host "Wait for the server port to come up"
107+
while ($true) {
108+
109+
$llmPortCheck = Test-NetConnection -ComputerName 127.0.0.1 -Port 8000
110+
if (-not $llmPortCheck.TcpTestSucceeded) {
111+
Write-Host "LLM server is not yet running on port 8000!"
112+
Write-Host "Standard Output:"
113+
Get-Content $outputFile
114+
115+
Write-Host "Standard Error:"
116+
Get-Content $errorFile
117+
} else {
118+
Write-Host "LLM server is running on port 8000."
119+
break
120+
}
121+
122+
Start-Sleep -Seconds 30
123+
}
124+
125+
Write-Host "Checking the /health endpoint"
126+
$response = Invoke-WebRequest -Uri http://localhost:8000/api/v0/health -UseBasicParsing
127+
128+
if ($response.StatusCode -eq 200) {
129+
Write-Output "Good: /health status code is 200"
130+
} else {
131+
Write-Output "Error: /health status code is not 200"
132+
Write-Host "Standard Output:"
133+
Get-Content $outputFile
134+
135+
Write-Host "Standard Error:"
136+
Get-Content $errorFile
137+
exit 1
138+
}
139+
140+
$jsonContent = $response.Content | ConvertFrom-Json
141+
if ($jsonContent) {
142+
Write-Output "Good: /health JSON content is not empty: $jsonContent"
143+
} else {
144+
Write-Output "Error: /health JSON content is empty"
145+
Write-Host "Standard Output:"
146+
Get-Content $outputFile
147+
148+
Write-Host "Standard Error:"
149+
Get-Content $errorFile
150+
exit 1
151+
}
152+
153+
Write-Host "Close the server process"
154+
155+
function Kill-Tree {
156+
Param([int]$ppid)
157+
Get-CimInstance Win32_Process | Where-Object { $_.ParentProcessId -eq $ppid } | ForEach-Object { Kill-Tree $_.ProcessId }
158+
Stop-Process -Id $ppid
159+
}
160+
Kill-Tree $serverProcess.Id
161+
162+
- name: Release
163+
uses: softprops/action-gh-release@v2
164+
if: startsWith(github.ref, 'refs/tags/v')
165+
with:
166+
files: installer/Lemonade_Server_Installer.exe
167+
168+
169+
170+

.github/workflows/test_lemonade.yml

+7-10
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,6 @@ jobs:
4646
run: |
4747
pylint src/lemonade --rcfile .pylintrc --disable E0401
4848
pylint examples --rcfile .pylintrc --disable E0401,E0611 --jobs=1
49-
- name: Test HF+CPU server
50-
if: runner.os == 'Windows'
51-
timeout-minutes: 10
52-
uses: ./.github/actions/server-testing
53-
with:
54-
conda_env: -n lemon
55-
load_command: -i facebook/opt-125m huggingface-load
5649
- name: Run lemonade tests
5750
shell: bash -el {0}
5851
run: |
@@ -63,7 +56,11 @@ jobs:
6356
python test/lemonade/llm_api.py
6457
6558
66-
# Test high-level LEAP APIs
67-
python examples/lemonade/leap_basic.py
68-
python examples/lemonade/leap_streaming.py
59+
# Test high-level APIs
60+
python examples/lemonade/api_basic.py
61+
python examples/lemonade/api_streaming.py
62+
63+
# Test server
64+
python test/lemonade/server.py
65+
6966

.github/workflows/test_lemonade_oga_cpu.yml

+3-11
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,7 @@ jobs:
5353
# Test low-level APIs
5454
python test/lemonade/oga_cpu_api.py
5555
56-
# Test high-level LEAP APIs
57-
python examples/lemonade/leap_oga_cpu.py
58-
python examples/lemonade/leap_oga_cpu_streaming.py
59-
- name: Test OGA+CPU server
60-
if: runner.os == 'Windows'
61-
timeout-minutes: 10
62-
uses: ./.github/actions/server-testing
63-
with:
64-
conda_env: -n lemon
65-
load_command: -i TinyPixel/small-llama2 oga-load --device cpu --dtype int4
66-
hf_token: "${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}" # Required by OGA model_builder in OGA 0.4.0 but not future versions
56+
# Test high-level APIs
57+
python examples/lemonade/api_oga_cpu.py
58+
python examples/lemonade/api_oga_cpu_streaming.py
6759

NOTICE.md

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ PORTIONS LICENSED AS FOLLOWS
22

33
\> TurnkeyML used code from the [MLAgility](https://github.com/groq/mlagility) and [GroqFlow](https://github.com/groq/groqflow) projects as a starting point. Much of that code was refactored, improved, or replaced by the time TurnkeyML was published.
44

5+
\> TurnkeyML uses the [Microsoft lemon emoji](https://github.com/microsoft/fluentui-emoji) as an icon for the lemoande tool.
6+
57
>The MIT License
68
>
79
>Copyright 2023 Groq Inc.

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
We are on a mission to make it easy to use the most important tools in the ONNX ecosystem. TurnkeyML accomplishes this by providing no-code CLIs and low-code APIs for both general ONNX workflows with `turnkey` as well as LLMs with `lemonade`.
99

10-
| [**Lemonade**](https://github.com/onnx/turnkeyml/blob/main/docs/lemonade/getting_started.md) | [**Turnkey**](https://github.com/onnx/turnkeyml/blob/main/docs/turnkey/getting_started.md) |
10+
| [**Lemonade SDK**](https://github.com/onnx/turnkeyml/blob/main/docs/lemonade/getting_started.md) | [**Turnkey**](https://github.com/onnx/turnkeyml/blob/main/docs/turnkey/getting_started.md) |
1111
|:----------------------------------------------: |:-----------------------------------------------------------------: |
1212
| Serve and benchmark LLMs on CPU, GPU, and NPU. <br/> [Click here to get started with `lemonade`.](https://github.com/onnx/turnkeyml/blob/main/docs/lemonade/getting_started.md) | Export and optimize ONNX models for CNNs and Transformers. <br/> [Click here to get started with `turnkey`.](https://github.com/onnx/turnkeyml/blob/main/docs/turnkey/getting_started.md) |
1313
| <img src="https://github.com/onnx/turnkeyml/blob/main/img/llm_demo.png?raw=true"/> | <img src="https://github.com/onnx/turnkeyml/blob/main/img/classic_demo.png?raw=true"/> |

docs/lemonade/getting_started.md

+18-32
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,11 @@
1-
# Lemonade
1+
# Lemonade SDK
22

3-
Welcome to the project page for `lemonade` the Turnkey LLM Aide!
4-
5-
1. [Install](#install)
6-
1. [CLI Commands](#cli-commands)
7-
- [Syntax](#syntax)
8-
- [Chatting](#chatting)
9-
- [Accuracy](#accuracy)
10-
- [Benchmarking](#benchmarking)
11-
- [Memory Usage](#memory-usage)
12-
- [Serving](#serving)
13-
1. [API Overview](#api)
14-
1. [Code Organization](#code-organization)
15-
1. [Contributing](#contributing)
3+
The `lemonade` SDK provides everything needed to get up and running quickly with LLMs on OnnxRuntime GenAI (OGA).
164

5+
- [Quick installation from PyPI](#install).
6+
- [CLI with tools for prompting, benchmarking, and accuracy tests](#cli-commands).
7+
- [REST API with OpenAI compatibility](#serving).
8+
- [Python API based on `from_pretrained()` for easy integration with Python apps](#api).
179

1810
# Install
1911

@@ -85,9 +77,9 @@ Can be read like this:
8577
The `lemonade -h` command will show you which options and Tools are available, and `lemonade TOOL -h` will tell you more about that specific Tool.
8678

8779

88-
## Chatting
80+
## Prompting
8981

90-
To chat with your LLM try:
82+
To prompt your LLM try:
9183

9284
OGA iGPU:
9385
```bash
@@ -163,41 +155,35 @@ contains a figure plotting the memory usage over the build time. Learn more by
163155
164156
## Serving
165157
166-
You can launch a WebSocket server for your LLM with:
167-
168-
OGA iGPU:
169-
```bash
170-
lemonade -i microsoft/Phi-3-mini-4k-instruct oga-load --device igpu --dtype int4 serve
171-
```
158+
You can launch an OpenAI-compatible server with:
172159
173-
Hugging Face:
174160
```bash
175-
lemonade -i facebook/opt-125m huggingface-load serve
161+
lemonade serve
176162
```
177163
178-
Once the server has launched, you can connect to it from your own application, or interact directly by following the on-screen instructions to open a basic web app.
164+
Visit the [server spec](https://github.com/onnx/turnkeyml/blob/main/docs/lemonade/server_spec.md) to learn more about the endpoints provided.
179165
180166
# API
181167
182168
Lemonade is also available via API.
183169
184-
## LEAP APIs
170+
## High-Level APIs
185171
186-
The lemonade enablement platform (LEAP) API abstracts loading models from any supported framework (e.g., Hugging Face, OGA) and backend (e.g., CPU, iGPU, Hybrid). This makes it easy to integrate lemonade LLMs into Python applications.
172+
The high-level lemonade API abstracts loading models from any supported framework (e.g., Hugging Face, OGA) and backend (e.g., CPU, iGPU, Hybrid) using the popular `from_pretrained()` function. This makes it easy to integrate lemonade LLMs into Python applications.
187173
188174
OGA iGPU:
189175
```python
190-
from lemonade import leap
176+
from lemonade.api import from_pretrained
191177
192-
model, tokenizer = leap.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct", recipe="oga-igpu")
178+
model, tokenizer = from_pretrained("Qwen/Qwen2.5-0.5B-Instruct", recipe="oga-igpu")
193179
194180
input_ids = tokenizer("This is my prompt", return_tensors="pt").input_ids
195181
response = model.generate(input_ids, max_new_tokens=30)
196182
197183
print(tokenizer.decode(response[0]))
198184
```
199185
200-
You can learn more about the LEAP APIs [here](https://github.com/onnx/turnkeyml/tree/main/examples/lemonade).
186+
You can learn more about the high-level APIs [here](https://github.com/onnx/turnkeyml/tree/main/examples/lemonade).
201187
202188
## Low-Level API
203189
@@ -207,13 +193,13 @@ Here's a quick example of how to prompt a Hugging Face LLM using the low-level A
207193

208194
```python
209195
import lemonade.tools.torch_llm as tl
210-
import lemonade.tools.chat as cl
196+
import lemonade.tools.prompt as pt
211197
from turnkeyml.state import State
212198
213199
state = State(cache_dir="cache", build_name="test")
214200
215201
state = tl.HuggingfaceLoad().run(state, input="facebook/opt-125m")
216-
state = cl.Prompt().run(state, prompt="hi", max_new_tokens=15)
202+
state = pt.Prompt().run(state, prompt="hi", max_new_tokens=15)
217203
218204
print("Response:", state.response)
219205
```

0 commit comments

Comments
 (0)