Skip to content

Commit 0989a7d

Browse files
authored
5.1.0 Update. Avoid build collisions in the lemonade cache. Improve memory tracking and OGA testing. (#288)
- Avoid lemonade build cache collisions (@jeremyfowers). - All builds are now placed under `<cache_dir>/builds/<build_name>` instead of `<cache_dir>/<build_name>` - This creates a more hierarchical cache structure, where builds are peer to models and data. - All build names now include a timestamp - This ensures that build stats and logs will not collide with each other if we build the same model in the same cache, but with different parameters. - Revs the minor version number because all previous caches are invalidated. - Enable ONNX model download for cpu and igpu in oga-load (@jeremyfowers) - Improvements to memory tracking (@amd-pworfolk) - Improve OGA testing (@jeremyfowers). - Run the sever test last, since it is the most complex and has the worst telemetry - Stop deleting the entire cache directory between every test, since that deletes the model builder cache. Instead, just delete the cache/builds directory.
1 parent 638c20f commit 0989a7d

File tree

22 files changed

+327
-176
lines changed

22 files changed

+327
-176
lines changed

.github/workflows/test_lemonade_oga_cpu.yml

+8-8
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,6 @@ jobs:
4242
shell: bash -el {0}
4343
run: |
4444
pylint src/lemonade --rcfile .pylintrc --disable E0401
45-
- name: Test OGA+CPU server
46-
if: runner.os == 'Windows'
47-
timeout-minutes: 10
48-
uses: ./.github/actions/server-testing
49-
with:
50-
conda_env: -n lemon
51-
load_command: -i TinyPixel/small-llama2 oga-load --device cpu --dtype int4
52-
hf_token: "${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}" # Required by OGA model_builder in OGA 0.4.0 but not future versions
5345
- name: Run lemonade tests
5446
shell: bash -el {0}
5547
env:
@@ -64,4 +56,12 @@ jobs:
6456
# Test high-level LEAP APIs
6557
python examples/lemonade/leap_oga_cpu.py
6658
python examples/lemonade/leap_oga_cpu_streaming.py
59+
- name: Test OGA+CPU server
60+
if: runner.os == 'Windows'
61+
timeout-minutes: 10
62+
uses: ./.github/actions/server-testing
63+
with:
64+
conda_env: -n lemon
65+
load_command: -i TinyPixel/small-llama2 oga-load --device cpu --dtype int4
66+
hf_token: "${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}" # Required by OGA model_builder in OGA 0.4.0 but not future versions
6767

docs/lemonade/getting_started.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ To measure the accuracy of an LLM using MMLU, try this:
5252

5353
That command will run just the management test from MMLU on your LLM and save the score to the lemonade cache at `~/.cache/lemonade`.
5454

55-
You can run the full suite of MMLU subjects by omitting the `--test` argument. You can learn more about this with `lemonade accuracy-mmlu -h.
55+
You can run the full suite of MMLU subjects by omitting the `--test` argument. You can learn more about this with `lemonade accuracy-mmlu -h`.
5656

5757
## Benchmarking
5858

docs/turnkey/tools_user_guide.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,9 @@ Each build directory contains:
153153
- The stats file, `turnkey_stats.yaml`, which collects all of the statistics collected by the tools.
154154
- This is what forms the content of the CSV reports generated by the `turnkey report` tool.
155155
- One log file per tool that was executed, which may contain additional information about what happened during the tool run.
156-
- For example, `cache_dir/build_dir/log_discover.txt`.
156+
- For example, `cache_dir/builds/build_dir/log_discover.txt`.
157157
- All of the artifacts produced by the tools.
158-
- For example, `cache_dir/build_dir/onnx/my_model.onnx`.
158+
- For example, `cache_dir/builds/build_dir/onnx/my_model.onnx`.
159159

160160
The `--lean-cache` global argument ensures that all build artifacts are removed at the end of the sequence. This is useful for saving disk space when gathering statistics over a large amount of models. Log files (.txt), json files (.json), and yaml files (.yaml, such as state.yaml and stats.yaml) are not removed.
161161

examples/turnkey/api/loading_a_build.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import onnxruntime as ort
1212
from turnkeyml.common.filesystem import get_available_builds, DEFAULT_CACHE_DIR
1313
from turnkeyml.state import State
14+
from turnkeyml.common.build import output_dir
1415
from turnkeyml.tools.load_build import LoadBuild
1516
from turnkeyml.tools.onnx import ConvertOnnxToFp16
1617

@@ -21,8 +22,7 @@ def main():
2122

2223
# We use the _state.yaml file in the build directory when loading a build
2324
prior_state_file = os.path.join(
24-
DEFAULT_CACHE_DIR,
25-
prerequisite_build,
25+
output_dir(DEFAULT_CACHE_DIR, prerequisite_build),
2626
f"{prerequisite_build}_state.yaml",
2727
)
2828

plugins/devices/test/benchmark.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ def test_010_cli_cache_benchmark(self):
497497
"--cache-dir",
498498
cache_dir,
499499
"-i",
500-
os.path.join(cache_dir, "*", "*_state.yaml"),
500+
os.path.join(cache_dir, "builds", "*", "*_state.yaml"),
501501
"load-build",
502502
"benchmark",
503503
]

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
"pytz",
5050
"tqdm",
5151
"matplotlib",
52+
"tabulate",
5253
# Conditional dependencies for ONNXRuntime backends
5354
"onnxruntime >=1.10.1;platform_system=='Linux' and extra != 'llm-oga-cuda'",
5455
"onnxruntime-directml >=1.19.0;platform_system=='Windows' and extra != 'llm-oga-cuda'",

src/lemonade/cache.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import os
2+
from datetime import datetime, timezone
23

34
# Allow an environment variable to override the default
45
# location for the build cache
56
if os.environ.get("LEMONADE_CACHE_DIR"):
67
DEFAULT_CACHE_DIR = os.path.expanduser(os.environ.get("LEMONADE_CACHE_DIR"))
78
else:
8-
DEFAULT_CACHE_DIR = os.path.expanduser("~/.cache/lemonade")
9+
DEFAULT_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "lemonade")
910

1011

1112
def checkpoint_to_model_name(checkpoint_name: str) -> str:
@@ -16,6 +17,29 @@ def checkpoint_to_model_name(checkpoint_name: str) -> str:
1617
return checkpoint_name.split("/")[1]
1718

1819

20+
def build_name(input_name):
21+
"""
22+
Name the lemonade build by concatenating these two factors:
23+
1. Sanitize the input name (typically a model checkpoint name) by
24+
replacing any `/` characters with `_`.
25+
2. Timestamp in the format:
26+
<month>m_<day>d_<year>y_<hour>h_<minute>m_<second>s
27+
This timestamp ensures that builds in the same cache will not
28+
collide in the same build directory.
29+
"""
30+
31+
# Sanitize the input name
32+
input_name_sanitized = input_name.replace("/", "_")
33+
34+
# Get the current time in GMT
35+
current_time = datetime.now(timezone.utc)
36+
37+
# Format the timestamp string
38+
timestamp = current_time.strftime("%Yy_%mm_%dd_%Hh_%Mm_%Ss")
39+
40+
return f"{input_name_sanitized}_{timestamp}"
41+
42+
1943
class Keys:
2044
MODEL = "model"
2145
PER_ITERATION_LATENCY = "per_iteration_latency"
@@ -37,3 +61,4 @@ class Keys:
3761
DEVICE = "device"
3862
OGA_MODELS_SUBFOLDER = "oga_models_subfolder"
3963
MEMORY_USAGE_PLOT = "memory_usage_plot"
64+
MAX_MEMORY_USED_GB = "max_memory_used_GB"

src/lemonade/cli.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def main():
128128

129129
state = State(
130130
cache_dir=os.path.abspath(global_args["cache_dir"]),
131-
build_name=global_args["input"].replace("/", "_"),
131+
build_name=cache.build_name(global_args["input"]),
132132
sequence_info=sequence.info,
133133
)
134134
sequence.launch(

src/lemonade/tools/chat.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
}
2626

2727
DEFAULT_SERVER_PORT = 8000
28+
DEFAULT_MAX_NEW_TOKENS = 512
29+
DEFAULT_N_TRIALS = 1
2830

2931
END_OF_STREAM = "</s>"
3032

@@ -95,17 +97,19 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
9597
parser.add_argument(
9698
"--max-new-tokens",
9799
"-m",
98-
default=512,
100+
default=DEFAULT_MAX_NEW_TOKENS,
99101
type=int,
100-
help="Maximum number of new tokens in the response",
102+
help=f"Maximum number of new tokens in the response "
103+
f"(default is {DEFAULT_MAX_NEW_TOKENS})",
101104
)
102105

103106
parser.add_argument(
104107
"--n-trials",
105108
"-n",
106-
default=1,
109+
default=DEFAULT_N_TRIALS,
107110
type=positive_int,
108-
help="Number of responses the LLM will generate for the prompt (useful for testing)",
111+
help=f"Number of responses the LLM will generate for the prompt "
112+
f"(useful for testing, default is {DEFAULT_N_TRIALS})",
109113
)
110114

111115
return parser
@@ -126,17 +130,14 @@ def parse(self, state: State, args, known_only=True) -> argparse.Namespace:
126130
# No change to the prompt
127131
pass
128132

129-
if parsed_args.n_trials < 1:
130-
raise ValueError("N_TRIALS should be a positive number")
131-
132133
return parsed_args
133134

134135
def run(
135136
self,
136137
state: State,
137138
prompt: str = "Hello",
138-
max_new_tokens: int = 512,
139-
n_trials: int = 1,
139+
max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
140+
n_trials: int = DEFAULT_N_TRIALS,
140141
) -> State:
141142

142143
model: ModelAdapter = state.model

src/lemonade/tools/ort_genai/oga.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from fnmatch import fnmatch
1717
from queue import Queue
1818
from packaging.version import Version
19-
from huggingface_hub import snapshot_download
19+
from huggingface_hub import snapshot_download, list_repo_files
2020
import onnxruntime_genai as og
2121
import onnxruntime_genai.models.builder as model_builder
2222
from turnkeyml.state import State
@@ -245,7 +245,7 @@ class OgaLoad(FirstTool):
245245
Models on Hugging Face that follow the "amd/**-onnx-ryzen-strix" pattern
246246
Local models for cpu, igpu, or npu:
247247
The specified checkpoint is converted to a local path, via mapping to lower case
248-
and replacing '/' with '_'. If this model already exists in the 'models' folderr
248+
and replacing '/' with '_'. If this model already exists in the 'models' folder
249249
of the lemonade cache and if it has a subfolder <device>-<dtype>, then this model
250250
will be used. If the --force flag is used and the model is built with model_builder,
251251
then it will be rebuilt.
@@ -398,8 +398,16 @@ def run(
398398
+ "."
399399
)
400400

401+
# Check whether the model is a safetensors checkpoint or a pre-exported
402+
# ONNX model
403+
# Note: This approach only supports ONNX models where the ONNX files are in the
404+
# Huggingface repo root. This does not support the case where the ONNX files
405+
# are in a nested directory within the repo.
406+
model_files = list_repo_files(repo_id=checkpoint)
407+
onnx_model = any([filename.endswith(".onnx") for filename in model_files])
408+
401409
# Download the model from HF
402-
if device == "npu" or device == "hybrid":
410+
if onnx_model:
403411

404412
# NPU models on HF are ready to go and HF does its own caching
405413
full_model_path = snapshot_download(
@@ -474,7 +482,7 @@ def run(
474482
os.makedirs(os.path.dirname(dst_dll), exist_ok=True)
475483
shutil.copy2(src_dll, dst_dll)
476484
else:
477-
# device is 'cpu' or 'igpu'
485+
# checkpoint is safetensors, so we need to run it through model_builder
478486

479487
# Use model_builder to download model and convert to ONNX
480488
printing.log_info(f"Building {checkpoint} for {device} using {dtype}")

src/turnkeyml/common/build.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,22 @@ def load_yaml(file_path) -> Dict:
3939
)
4040

4141

42+
def builds_dir(cache_dir):
43+
"""
44+
Each build stores stats, logs, and other files in a build directory.
45+
All build directories are located at:
46+
<cache_dir>/builds
47+
"""
48+
return os.path.join(cache_dir, "builds")
49+
50+
4251
def output_dir(cache_dir, build_name):
43-
path = os.path.join(cache_dir, build_name)
52+
"""
53+
Each build stores stats, logs, and other files in an output directory at:
54+
All build directories are located at:
55+
<builds_dir>/<build_name>
56+
"""
57+
path = os.path.join(builds_dir(cache_dir), build_name)
4458
return path
4559

4660

src/turnkeyml/common/filesystem.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def clean_output_dir(cache_dir: str, build_name: str) -> None:
183183
"""
184184
Delete all elements of the output directory that are not human readable
185185
"""
186-
output_dir = os.path.join(cache_dir, build_name)
186+
output_dir = build.output_dir(cache_dir, build_name)
187187
if os.path.isdir(output_dir) and is_build_dir(cache_dir, build_name):
188188
output_dir = os.path.expanduser(output_dir)
189189
else:
@@ -244,10 +244,10 @@ def get_available_builds(cache_dir):
244244
check_cache_dir(cache_dir)
245245

246246
builds = [
247-
pathlib.PurePath(build).name
248-
for build in os.listdir(os.path.abspath(cache_dir))
249-
if os.path.isdir(os.path.join(cache_dir, build))
250-
and is_build_dir(cache_dir, build)
247+
pathlib.PurePath(build_name).name
248+
for build_name in os.listdir(os.path.abspath(build.builds_dir(cache_dir)))
249+
if os.path.isdir(build.output_dir(cache_dir, build_name))
250+
and is_build_dir(cache_dir, build_name)
251251
]
252252
builds.sort()
253253

@@ -517,7 +517,9 @@ def rebase_cache_dir(input_path: str, build_name: str, new_cache_dir: str):
517517
"""
518518

519519
relative_input_path = input_path.split(build_name, 1)[1][1:]
520-
return os.path.join(new_cache_dir, build_name, relative_input_path)
520+
return os.path.join(
521+
build.output_dir(new_cache_dir, build_name), relative_input_path
522+
)
521523

522524

523525
def check_extension(choices, file_name, error_func):

0 commit comments

Comments
 (0)