diff --git a/.github/workflows/test_lemonade_oga_cpu.yml b/.github/workflows/test_lemonade_oga_cpu.yml index 33edd22a..52474ee8 100644 --- a/.github/workflows/test_lemonade_oga_cpu.yml +++ b/.github/workflows/test_lemonade_oga_cpu.yml @@ -42,14 +42,6 @@ jobs: shell: bash -el {0} run: | pylint src/lemonade --rcfile .pylintrc --disable E0401 - - name: Test OGA+CPU server - if: runner.os == 'Windows' - timeout-minutes: 10 - uses: ./.github/actions/server-testing - with: - conda_env: -n lemon - load_command: -i TinyPixel/small-llama2 oga-load --device cpu --dtype int4 - hf_token: "${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}" # Required by OGA model_builder in OGA 0.4.0 but not future versions - name: Run lemonade tests shell: bash -el {0} env: @@ -64,4 +56,12 @@ jobs: # Test high-level LEAP APIs python examples/lemonade/leap_oga_cpu.py python examples/lemonade/leap_oga_cpu_streaming.py + - name: Test OGA+CPU server + if: runner.os == 'Windows' + timeout-minutes: 10 + uses: ./.github/actions/server-testing + with: + conda_env: -n lemon + load_command: -i TinyPixel/small-llama2 oga-load --device cpu --dtype int4 + hf_token: "${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}" # Required by OGA model_builder in OGA 0.4.0 but not future versions diff --git a/docs/lemonade/getting_started.md b/docs/lemonade/getting_started.md index d62f648b..5506329d 100644 --- a/docs/lemonade/getting_started.md +++ b/docs/lemonade/getting_started.md @@ -52,7 +52,7 @@ To measure the accuracy of an LLM using MMLU, try this: That command will run just the management test from MMLU on your LLM and save the score to the lemonade cache at `~/.cache/lemonade`. -You can run the full suite of MMLU subjects by omitting the `--test` argument. You can learn more about this with `lemonade accuracy-mmlu -h. +You can run the full suite of MMLU subjects by omitting the `--test` argument. You can learn more about this with `lemonade accuracy-mmlu -h`. ## Benchmarking diff --git a/docs/turnkey/tools_user_guide.md b/docs/turnkey/tools_user_guide.md index a2191bdd..b582cbac 100644 --- a/docs/turnkey/tools_user_guide.md +++ b/docs/turnkey/tools_user_guide.md @@ -153,9 +153,9 @@ Each build directory contains: - The stats file, `turnkey_stats.yaml`, which collects all of the statistics collected by the tools. - This is what forms the content of the CSV reports generated by the `turnkey report` tool. - One log file per tool that was executed, which may contain additional information about what happened during the tool run. - - For example, `cache_dir/build_dir/log_discover.txt`. + - For example, `cache_dir/builds/build_dir/log_discover.txt`. - All of the artifacts produced by the tools. - - For example, `cache_dir/build_dir/onnx/my_model.onnx`. + - For example, `cache_dir/builds/build_dir/onnx/my_model.onnx`. The `--lean-cache` global argument ensures that all build artifacts are removed at the end of the sequence. This is useful for saving disk space when gathering statistics over a large amount of models. Log files (.txt), json files (.json), and yaml files (.yaml, such as state.yaml and stats.yaml) are not removed. diff --git a/examples/turnkey/api/loading_a_build.py b/examples/turnkey/api/loading_a_build.py index 234f5959..172fc62f 100644 --- a/examples/turnkey/api/loading_a_build.py +++ b/examples/turnkey/api/loading_a_build.py @@ -11,6 +11,7 @@ import onnxruntime as ort from turnkeyml.common.filesystem import get_available_builds, DEFAULT_CACHE_DIR from turnkeyml.state import State +from turnkeyml.common.build import output_dir from turnkeyml.tools.load_build import LoadBuild from turnkeyml.tools.onnx import ConvertOnnxToFp16 @@ -21,8 +22,7 @@ def main(): # We use the _state.yaml file in the build directory when loading a build prior_state_file = os.path.join( - DEFAULT_CACHE_DIR, - prerequisite_build, + output_dir(DEFAULT_CACHE_DIR, prerequisite_build), f"{prerequisite_build}_state.yaml", ) diff --git a/plugins/devices/test/benchmark.py b/plugins/devices/test/benchmark.py index f54b265f..fc0111aa 100644 --- a/plugins/devices/test/benchmark.py +++ b/plugins/devices/test/benchmark.py @@ -497,7 +497,7 @@ def test_010_cli_cache_benchmark(self): "--cache-dir", cache_dir, "-i", - os.path.join(cache_dir, "*", "*_state.yaml"), + os.path.join(cache_dir, "builds", "*", "*_state.yaml"), "load-build", "benchmark", ] diff --git a/setup.py b/setup.py index 346acb4e..73c40bbe 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,7 @@ "pytz", "tqdm", "matplotlib", + "tabulate", # Conditional dependencies for ONNXRuntime backends "onnxruntime >=1.10.1;platform_system=='Linux' and extra != 'llm-oga-cuda'", "onnxruntime-directml >=1.19.0;platform_system=='Windows' and extra != 'llm-oga-cuda'", diff --git a/src/lemonade/cache.py b/src/lemonade/cache.py index cab73999..dcd6233c 100644 --- a/src/lemonade/cache.py +++ b/src/lemonade/cache.py @@ -1,11 +1,12 @@ import os +from datetime import datetime, timezone # Allow an environment variable to override the default # location for the build cache if os.environ.get("LEMONADE_CACHE_DIR"): DEFAULT_CACHE_DIR = os.path.expanduser(os.environ.get("LEMONADE_CACHE_DIR")) else: - DEFAULT_CACHE_DIR = os.path.expanduser("~/.cache/lemonade") + DEFAULT_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "lemonade") def checkpoint_to_model_name(checkpoint_name: str) -> str: @@ -16,6 +17,29 @@ def checkpoint_to_model_name(checkpoint_name: str) -> str: return checkpoint_name.split("/")[1] +def build_name(input_name): + """ + Name the lemonade build by concatenating these two factors: + 1. Sanitize the input name (typically a model checkpoint name) by + replacing any `/` characters with `_`. + 2. Timestamp in the format: + m_d_y_h_m_s + This timestamp ensures that builds in the same cache will not + collide in the same build directory. + """ + + # Sanitize the input name + input_name_sanitized = input_name.replace("/", "_") + + # Get the current time in GMT + current_time = datetime.now(timezone.utc) + + # Format the timestamp string + timestamp = current_time.strftime("%Yy_%mm_%dd_%Hh_%Mm_%Ss") + + return f"{input_name_sanitized}_{timestamp}" + + class Keys: MODEL = "model" PER_ITERATION_LATENCY = "per_iteration_latency" @@ -37,3 +61,4 @@ class Keys: DEVICE = "device" OGA_MODELS_SUBFOLDER = "oga_models_subfolder" MEMORY_USAGE_PLOT = "memory_usage_plot" + MAX_MEMORY_USED_GB = "max_memory_used_GB" diff --git a/src/lemonade/cli.py b/src/lemonade/cli.py index 6a0ef452..890dcae8 100644 --- a/src/lemonade/cli.py +++ b/src/lemonade/cli.py @@ -128,7 +128,7 @@ def main(): state = State( cache_dir=os.path.abspath(global_args["cache_dir"]), - build_name=global_args["input"].replace("/", "_"), + build_name=cache.build_name(global_args["input"]), sequence_info=sequence.info, ) sequence.launch( diff --git a/src/lemonade/tools/chat.py b/src/lemonade/tools/chat.py index 9519c783..3c093ee4 100644 --- a/src/lemonade/tools/chat.py +++ b/src/lemonade/tools/chat.py @@ -25,6 +25,8 @@ } DEFAULT_SERVER_PORT = 8000 +DEFAULT_MAX_NEW_TOKENS = 512 +DEFAULT_N_TRIALS = 1 END_OF_STREAM = "" @@ -95,17 +97,19 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser: parser.add_argument( "--max-new-tokens", "-m", - default=512, + default=DEFAULT_MAX_NEW_TOKENS, type=int, - help="Maximum number of new tokens in the response", + help=f"Maximum number of new tokens in the response " + f"(default is {DEFAULT_MAX_NEW_TOKENS})", ) parser.add_argument( "--n-trials", "-n", - default=1, + default=DEFAULT_N_TRIALS, type=positive_int, - help="Number of responses the LLM will generate for the prompt (useful for testing)", + help=f"Number of responses the LLM will generate for the prompt " + f"(useful for testing, default is {DEFAULT_N_TRIALS})", ) return parser @@ -126,17 +130,14 @@ def parse(self, state: State, args, known_only=True) -> argparse.Namespace: # No change to the prompt pass - if parsed_args.n_trials < 1: - raise ValueError("N_TRIALS should be a positive number") - return parsed_args def run( self, state: State, prompt: str = "Hello", - max_new_tokens: int = 512, - n_trials: int = 1, + max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS, + n_trials: int = DEFAULT_N_TRIALS, ) -> State: model: ModelAdapter = state.model diff --git a/src/lemonade/tools/ort_genai/oga.py b/src/lemonade/tools/ort_genai/oga.py index e487ae8a..e38a1dad 100644 --- a/src/lemonade/tools/ort_genai/oga.py +++ b/src/lemonade/tools/ort_genai/oga.py @@ -16,7 +16,7 @@ from fnmatch import fnmatch from queue import Queue from packaging.version import Version -from huggingface_hub import snapshot_download +from huggingface_hub import snapshot_download, list_repo_files import onnxruntime_genai as og import onnxruntime_genai.models.builder as model_builder from turnkeyml.state import State @@ -245,7 +245,7 @@ class OgaLoad(FirstTool): Models on Hugging Face that follow the "amd/**-onnx-ryzen-strix" pattern Local models for cpu, igpu, or npu: The specified checkpoint is converted to a local path, via mapping to lower case - and replacing '/' with '_'. If this model already exists in the 'models' folderr + and replacing '/' with '_'. If this model already exists in the 'models' folder of the lemonade cache and if it has a subfolder -, then this model will be used. If the --force flag is used and the model is built with model_builder, then it will be rebuilt. @@ -398,8 +398,16 @@ def run( + "." ) + # Check whether the model is a safetensors checkpoint or a pre-exported + # ONNX model + # Note: This approach only supports ONNX models where the ONNX files are in the + # Huggingface repo root. This does not support the case where the ONNX files + # are in a nested directory within the repo. + model_files = list_repo_files(repo_id=checkpoint) + onnx_model = any([filename.endswith(".onnx") for filename in model_files]) + # Download the model from HF - if device == "npu" or device == "hybrid": + if onnx_model: # NPU models on HF are ready to go and HF does its own caching full_model_path = snapshot_download( @@ -474,7 +482,7 @@ def run( os.makedirs(os.path.dirname(dst_dll), exist_ok=True) shutil.copy2(src_dll, dst_dll) else: - # device is 'cpu' or 'igpu' + # checkpoint is safetensors, so we need to run it through model_builder # Use model_builder to download model and convert to ONNX printing.log_info(f"Building {checkpoint} for {device} using {dtype}") diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py index 5637836b..6ef6260f 100644 --- a/src/turnkeyml/common/build.py +++ b/src/turnkeyml/common/build.py @@ -39,8 +39,22 @@ def load_yaml(file_path) -> Dict: ) +def builds_dir(cache_dir): + """ + Each build stores stats, logs, and other files in a build directory. + All build directories are located at: + /builds + """ + return os.path.join(cache_dir, "builds") + + def output_dir(cache_dir, build_name): - path = os.path.join(cache_dir, build_name) + """ + Each build stores stats, logs, and other files in an output directory at: + All build directories are located at: + / + """ + path = os.path.join(builds_dir(cache_dir), build_name) return path diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py index df4c3a60..06e1bfbf 100644 --- a/src/turnkeyml/common/filesystem.py +++ b/src/turnkeyml/common/filesystem.py @@ -183,7 +183,7 @@ def clean_output_dir(cache_dir: str, build_name: str) -> None: """ Delete all elements of the output directory that are not human readable """ - output_dir = os.path.join(cache_dir, build_name) + output_dir = build.output_dir(cache_dir, build_name) if os.path.isdir(output_dir) and is_build_dir(cache_dir, build_name): output_dir = os.path.expanduser(output_dir) else: @@ -244,10 +244,10 @@ def get_available_builds(cache_dir): check_cache_dir(cache_dir) builds = [ - pathlib.PurePath(build).name - for build in os.listdir(os.path.abspath(cache_dir)) - if os.path.isdir(os.path.join(cache_dir, build)) - and is_build_dir(cache_dir, build) + pathlib.PurePath(build_name).name + for build_name in os.listdir(os.path.abspath(build.builds_dir(cache_dir))) + if os.path.isdir(build.output_dir(cache_dir, build_name)) + and is_build_dir(cache_dir, build_name) ] builds.sort() @@ -517,7 +517,9 @@ def rebase_cache_dir(input_path: str, build_name: str, new_cache_dir: str): """ relative_input_path = input_path.split(build_name, 1)[1][1:] - return os.path.join(new_cache_dir, build_name, relative_input_path) + return os.path.join( + build.output_dir(new_cache_dir, build_name), relative_input_path + ) def check_extension(choices, file_name, error_func): diff --git a/src/turnkeyml/memory_tracker.py b/src/turnkeyml/memory_tracker.py new file mode 100644 index 00000000..52258a9c --- /dev/null +++ b/src/turnkeyml/memory_tracker.py @@ -0,0 +1,205 @@ +import os +import time +from multiprocessing import Process, Queue +import matplotlib.pyplot as plt +import psutil +import yaml + + +DEFAULT_TRACK_MEMORY_INTERVAL = 0.25 +MEMORY_USAGE_YAML_FILENAME = "memory_usage.yaml" +MEMORY_USAGE_PNGL_FILENAME = "memory_usage.png" + + +class MemoryTracker: + + @staticmethod + def get_time_mem_list(process): + return [time.time(), process.memory_info().rss] + + def __init__(self): + self.process_being_tracked = None + self.build_dir = None + self.queue = None + self.tracker_process = None + self.tracking_active = False + self.yaml_path = None + + def start( + self, track_pid, build_dir, track_memory_interval=DEFAULT_TRACK_MEMORY_INTERVAL + ): + if self.tracking_active: + raise RuntimeError("Cannot start tracking while already tracking") + + # Get the process being tracked + self.process_being_tracked = psutil.Process(track_pid) + + # Save the folder where data and plot will be stored + self.build_dir = build_dir + + # Create queue for passing messages to the tracker + self.queue = Queue() + + # The yaml file where the memory usage data will be saved + self.yaml_path = os.path.join(build_dir, MEMORY_USAGE_YAML_FILENAME) + + # Create process to continuously sample memory usage + self.tracker_process = Process( + target=self._memory_tracker_, + args=( + track_pid, + self.queue, + self.yaml_path, + track_memory_interval, + ), + ) + self.tracker_process.start() + self.tracking_active = True + self.set_label("start") + self.sample() + + def set_label(self, label): + if self.tracking_active: + self.queue.put(label) + + def sample(self): + if self.tracking_active: + self.queue.put(MemoryTracker.get_time_mem_list(self.process_being_tracked)) + + def stop(self): + if self.tracking_active: + self.queue.put(None) + self.tracking_active = False + + def create_plot(self, build_name: None): + if self.tracker_process is None: + return None + + if self.tracking_active: + self.stop() + + # Wait for memory tracker to finish writing yaml data file + while self.tracker_process.is_alive(): + self.tracker_process.join(timeout=0.5) + + try: + with open(self.yaml_path, "r", encoding="utf-8") as f: + memory_tracks = yaml.safe_load(f) + except FileNotFoundError as e: + print(f"Memory tracker file not found: {e.filename}") + return None + + # Find final time in the startup track (first track) to subtract from all other times + _, track = memory_tracks[0] + t0 = track[-1][0] + + # last_t and last_y are used to draw a line between the last point of the prior + # track and the first point of the current track + last_t = None + last_y = None + + plt.figure() + for k, v in memory_tracks[1:]: + if len(v) > 0: + t = [x[0] - t0 for x in v] + y = [float(x[1]) / 1024**3 for x in v] + # draw new memory usage track + if last_t is not None: + plt.plot([last_t] + t, [last_y] + y, label=k, marker=".") + else: + plt.plot(t, y, label=k, marker=".") + last_t = t[-1] + last_y = y[-1] + plt.xlabel("Time (sec)") + plt.ylabel("GB") + title_str = "Physical Memory Usage" + if build_name is not None: + title_str += "\n" + build_name + plt.title(title_str) + plt.legend() + plt.grid() + plot_path = os.path.join(self.build_dir, MEMORY_USAGE_PNGL_FILENAME) + plt.savefig(plot_path) + + return plot_path + + @staticmethod + def _memory_tracker_( + tracked_pid, + input_queue: Queue, + yaml_path: str, + track_memory_interval: float, + ): + """ + Tracks memory usage during build and saves to yaml file + The build communicates with the tracker though the input_queue. It may pass: + 1) string - This is to indicate that a new track is starting and the string is the label + for the next segment. The tracker will automatically track memory usage at + the track_memory_interval once a first track_name is given to it. + 2) list - A time and a current memory usage value that is added to the current track + (typically used at the end of a segment to make sure that each segment is + sampled at least once + 3) None - This indicates that the tracker should stop tracking, save its data to a file + and end + """ + memory_tracks = [] + current_track = [] + track_name = None + tracker_exit = False + + try: + tracked_process = psutil.Process(tracked_pid) + while ( + not tracker_exit and tracked_process.status() == psutil.STATUS_RUNNING + ): + + time.sleep(track_memory_interval) + + # Read any messages from the parent process + while not input_queue.empty(): + try: + message = input_queue.get(timeout=0.001) + if message is None or isinstance(message, str): + # Save current track. + if track_name is not None: + memory_tracks.append([track_name, current_track]) + track_name = message + current_track = [] + if message is None: + # Wrap up + tracker_exit = True + break + elif isinstance(message, list): + # Add time and memory data to current track + if track_name is not None: + current_track.append(message) + else: + raise TypeError( + "Track name must be passed to memory tracker prior to " + "sending data" + ) + else: + raise TypeError( + "Unrecognized message type in memory_tracker input queue: " + f"{message}" + ) + + except input_queue.Empty: + # input_queue.empty had not been updated + pass + + if not tracker_exit and track_name is not None: + # Save current time and memory usage + current_track.append( + MemoryTracker.get_time_mem_list(tracked_process) + ) + + # Save the collected memory tracks + with open(yaml_path, "w", encoding="utf-8") as f: + yaml.dump(memory_tracks, f) + + except psutil.NoSuchProcess: + # If the parent process stopped existing, we can + # safely assume that tracking is no longer needed + # NOTE: this only seems to be needed on Windows + pass diff --git a/src/turnkeyml/sequence/sequence.py b/src/turnkeyml/sequence/sequence.py index 76964361..b388bcc9 100644 --- a/src/turnkeyml/sequence/sequence.py +++ b/src/turnkeyml/sequence/sequence.py @@ -1,15 +1,11 @@ -import queue import sys import time import os -from multiprocessing import Process, Queue import platform import copy from datetime import datetime from typing import List, Dict, Optional -import yaml import pytz -import matplotlib.pyplot as plt import psutil import turnkeyml.common.printing as printing import turnkeyml.common.exceptions as exp @@ -34,95 +30,6 @@ def _rewind_stdout(lines: int = 1): sys.stdout.flush() -def _get_time_mem_list(process): - """Returns a list containing current time and current process memory usage""" - return [time.time(), process.memory_info().rss] - - -def _memory_tracker(input_queue: Queue, yaml_path, track_memory_interval, track_names): - """ - Tracks memory usage during build and saves to yaml file - """ - memory_tracks = [] - current_track = [] - track_counter = 0 - - try: - parent_process = psutil.Process(pid=os.getppid()) - while ( - track_counter < len(track_names) - and parent_process.status() == psutil.STATUS_RUNNING - ): - - time.sleep(track_memory_interval) - - # Read any messages from the parent process - while track_counter < len(track_names) and not input_queue.empty(): - try: - message = input_queue.get(timeout=0.001) - if message is None: - # Current track is complete - memory_tracks.append( - [track_names[track_counter], current_track] - ) - current_track = [] - track_counter += 1 - else: - # Message is the output of _get_time_mem_list, so add to current track - current_track.append(message) - except queue.Empty: - # input_queue.empty had not been updated - pass - - # Save current time and memory usage - current_track.append(_get_time_mem_list(parent_process)) - - # Save the collected memory tracks - with open(yaml_path, "w", encoding="utf-8") as f: - yaml.dump(memory_tracks, f) - - except psutil.NoSuchProcess: - # If the parent process stopped existing, we can - # safely assume that tracking is no longer needed - # NOTE: this only seems to be needed on Windows - pass - - -def _plot_memory_usage(state: State, memory_tracks): - - # Find final time in the startup track (before first tool) to subtract from all other times - _, track = memory_tracks[0] - t0 = track[-1][0] - - # last_t and last_y are used to draw a line between the last point of the prior - # track and the first point of the current track - last_t = None - last_y = None - - plt.figure() - for k, v in memory_tracks[1:]: - t = [x[0] - t0 for x in v] - y = [float(x[1]) / 1024**3 for x in v] - # draw new memory usage track - if last_t is not None: - plt.plot([last_t] + t, [last_y] + y, label=k, marker=".") - else: - plt.plot(t, y, label=k, marker=".") - last_t = t[-1] - last_y = y[-1] - plt.xlabel("Time (sec)") - plt.ylabel("GB") - plt.title(f"Physical Memory Usage\n{state.build_name}") - plt.legend() - plt.grid() - figure_path = os.path.join( - build.output_dir(state.cache_dir, state.build_name), "memory_usage.png" - ) - plt.savefig(figure_path) - printing.log_info(f"Saved plot of memory usage to {figure_path}") - state.save_stat(fs.Keys.MEMORY_USAGE_PLOT, figure_path) - - class Sequence: """ Helper class to launch and manage build tools. @@ -216,26 +123,8 @@ def launch( # Start tracking memory usage if track_memory_interval is not None: - # Create queue for passing messages to the tracker - memory_tracker_queue = Queue() - # The yaml file where the memory usage data will be saved - yaml_path = os.path.join( - build.output_dir(state.cache_dir, state.build_name), "memory_usage.yaml" - ) - # The names of each memory track segment - track_names = ["start-up"] + [tool.unique_name for tool in self.tools] - # Create process to continuously update queue - memory_tracker_process = Process( - target=_memory_tracker, - args=( - memory_tracker_queue, - yaml_path, - track_memory_interval, - track_names, - ), - ) - memory_tracker_process.start() - memory_tracker_queue.put(_get_time_mem_list(self.process)) + build_dir = build.output_dir(state.cache_dir, state.build_name) + state.memory_tracker.start(os.getpid(), build_dir, track_memory_interval) # Create a build directory in the cache fs.make_build_dir(state.cache_dir, state.build_name) @@ -295,7 +184,7 @@ def launch( for stat_key, stat_value in stats_to_save.items(): state.save_stat(stat_key, stat_value) - # Save initial memory and create dict for tracking memory usage + # Save initial memory as a build statistic state.save_stat(f"{fs.Keys.TOOL_MEMORY}:__init__", self._get_mem_usage_str()) # Run the build @@ -304,9 +193,8 @@ def launch( start_time = time.time() - # Insert None into memory tracker queue before new tool starts - if track_memory_interval is not None: - memory_tracker_queue.put(None) + # Insert tool name into memory tracker queue before new tool starts + state.memory_tracker.set_label(tool.unique_name) try: @@ -387,13 +275,12 @@ def launch( # Store current memory and peak working memory state.save_stat(tool.memory_key, self._get_mem_usage_str()) - if track_memory_interval is not None: - # sample each tool at least once - memory_tracker_queue.put(_get_time_mem_list(self.process)) - # Send final None to memory_tracker so that is stops ands saves data to file - if track_memory_interval is not None: - memory_tracker_queue.put(None) + # sample each tool at least once + state.memory_tracker.sample() + + # Stop tracking memory + state.memory_tracker.stop() if not saved_exception: state.build_status = build.FunctionStatus.SUCCESSFUL @@ -404,14 +291,13 @@ def launch( ) state.invocation_info.status_message_color = printing.Colors.OKGREEN - if track_memory_interval is not None: - # Wait for memory tracker to finish writing yaml data file - while memory_tracker_process.is_alive(): - memory_tracker_process.join(timeout=1.0) - if os.path.exists(yaml_path): - with open(yaml_path, "r", encoding="utf-8") as f: - memory_tracks = yaml.safe_load(f) - _plot_memory_usage(state, memory_tracks) + plot_path = state.memory_tracker.create_plot(state.build_name) + if plot_path is not None: + printing.log_info(f"Saved plot of memory usage to {plot_path}") + state.save_stat(fs.Keys.MEMORY_USAGE_PLOT, plot_path) + else: + printing.log_info("Error in memory usage tracking, no plot generated") + state.save_stat(fs.Keys.MEMORY_USAGE_PLOT, "NONE") if vars(state).get("models_found") and vars(state).get("invocation_info"): diff --git a/src/turnkeyml/state.py b/src/turnkeyml/state.py index d46e9eab..82021a06 100644 --- a/src/turnkeyml/state.py +++ b/src/turnkeyml/state.py @@ -5,6 +5,7 @@ import turnkeyml.common.build as build import turnkeyml.common.filesystem as fs from turnkeyml.version import __version__ as turnkey_version +from turnkeyml.memory_tracker import MemoryTracker def _is_nice_to_write(value): @@ -80,6 +81,7 @@ def __init__( self.downcast_applied = False self.uid = build.unique_id() self.results = None + self.memory_tracker = MemoryTracker() # Store any additional kwargs as members for key, value in kwargs.items(): diff --git a/src/turnkeyml/tools/load_build.py b/src/turnkeyml/tools/load_build.py index 85e8662a..32ae0c5d 100644 --- a/src/turnkeyml/tools/load_build.py +++ b/src/turnkeyml/tools/load_build.py @@ -74,7 +74,7 @@ def run(self, state: State, input: str = "", skip_policy=skip_policy_default): # Extract the cache directory, build directory, and build name from the input source_build_dir = pathlib.Path(input).parent source_build_dir_name = source_build_dir.name - source_cache_dir = source_build_dir.parent + source_cache_dir = source_build_dir.parent.parent # Make sure that the target yaml file is actually the state of a turnkey build if not fs.is_build_dir(source_cache_dir, source_build_dir_name): diff --git a/src/turnkeyml/tools/management_tools.py b/src/turnkeyml/tools/management_tools.py index c92dd52e..8f5ffcd7 100644 --- a/src/turnkeyml/tools/management_tools.py +++ b/src/turnkeyml/tools/management_tools.py @@ -1,6 +1,5 @@ import argparse import abc -import os from typing import List import turnkeyml.common.filesystem as fs import turnkeyml.common.exceptions as exp @@ -8,6 +7,7 @@ from turnkeyml.tools.tool import ToolParser from turnkeyml.version import __version__ as turnkey_version from turnkeyml.common.system_info import get_system_info_dict +from turnkeyml.common.build import output_dir class ManagementTool(abc.ABC): @@ -208,7 +208,7 @@ def run( printing.log_warning("No builds found.") for build in builds: - build_path = os.path.join(cache_dir, build) + build_path = output_dir(cache_dir, build_name=build) if fs.is_build_dir(cache_dir, build): # Run actions on the build # These actions are intended to be mutually exclusive, so we diff --git a/src/turnkeyml/version.py b/src/turnkeyml/version.py index 9fff231c..0d72820f 100644 --- a/src/turnkeyml/version.py +++ b/src/turnkeyml/version.py @@ -1 +1 @@ -__version__ = "5.0.5" +__version__ = "5.1.0" diff --git a/test/lemonade/llm_api.py b/test/lemonade/llm_api.py index a8f4b170..f3b42464 100644 --- a/test/lemonade/llm_api.py +++ b/test/lemonade/llm_api.py @@ -10,6 +10,7 @@ from turnkeyml.state import State import turnkeyml.common.filesystem as fs import turnkeyml.common.test_helpers as common +import turnkeyml.common.build as build from lemonade.tools.huggingface_load import HuggingfaceLoad from lemonade.tools.huggingface_bench import HuggingfaceBench from lemonade.tools.mmlu import AccuracyMMLU @@ -354,7 +355,10 @@ def test_006_multiple_prompt_responses(self): ), stats["response_tokens"] # Check that histogram figure was generated assert os.path.exists( - os.path.join(state.cache_dir, state.build_name, "response_lengths.png") + os.path.join( + build.output_dir(state.cache_dir, state.build_name), + "response_lengths.png", + ) ) diff --git a/test/lemonade/oga_cpu_api.py b/test/lemonade/oga_cpu_api.py index 0cbc6ad4..32608ae8 100644 --- a/test/lemonade/oga_cpu_api.py +++ b/test/lemonade/oga_cpu_api.py @@ -5,6 +5,7 @@ from turnkeyml.state import State import turnkeyml.common.test_helpers as common import turnkeyml.common.filesystem as fs +from turnkeyml.common.build import builds_dir from lemonade.tools.ort_genai.oga import OgaLoad from lemonade.tools.chat import LLMPrompt from lemonade.tools.mmlu import AccuracyMMLU @@ -22,7 +23,7 @@ class Testing(unittest.TestCase): def setUp(self) -> None: - shutil.rmtree(cache_dir, ignore_errors=True) + shutil.rmtree(builds_dir(cache_dir), ignore_errors=True) def test_001_ogaload(self): # Test the OgaLoad and LLMPrompt tools on an NPU model diff --git a/test/turnkey/analysis.py b/test/turnkey/analysis.py index 45ba7520..58e06f7b 100644 --- a/test/turnkey/analysis.py +++ b/test/turnkey/analysis.py @@ -16,6 +16,7 @@ import turnkeyml.common.filesystem as filesystem import turnkeyml.common.test_helpers as common import turnkeyml.common.exceptions as exp +from turnkeyml.common.build import output_dir try: # pylint: disable=unused-import @@ -144,7 +145,7 @@ def __init__(self, **kwargs): def cache_is_lean(cache_dir, build_name): - files = list(glob.glob(f"{cache_dir}/{build_name}/**/*", recursive=True)) + files = list(glob.glob(f"{cache_dir}/builds/{build_name}/**/*", recursive=True)) is_lean = len([x for x in files if ".onnx" in x]) == 0 metadata_found = len([x for x in files if ".txt" in x]) > 0 return is_lean and metadata_found @@ -170,7 +171,7 @@ def run_analysis(args): def check_discover_log(build_name: str, expected_content: str): - log_path = os.path.join(cache_dir, build_name, "log_discover.txt") + log_path = os.path.join(output_dir(cache_dir, build_name), "log_discover.txt") with open(log_path, "r", encoding="utf-8") as log_file: log_content = log_file.read() assert expected_content in log_content, log_content diff --git a/test/turnkey/cli.py b/test/turnkey/cli.py index 969f6ddb..085f8ffb 100644 --- a/test/turnkey/cli.py +++ b/test/turnkey/cli.py @@ -791,7 +791,8 @@ def test_022_cli_cache_move(self): # Get the build state file in its new location selected_build = fs.get_available_builds(new_cache_dir)[-1] state_file_path = os.path.join( - new_cache_dir, selected_build, f"{selected_build}_state.yaml" + build.output_dir(new_cache_dir, selected_build), + f"{selected_build}_state.yaml", ) # Build the cached build in its new location