diff --git a/.github/workflows/test_lemonade_oga_cpu.yml b/.github/workflows/test_lemonade_oga_cpu.yml
index 33edd22a..52474ee8 100644
--- a/.github/workflows/test_lemonade_oga_cpu.yml
+++ b/.github/workflows/test_lemonade_oga_cpu.yml
@@ -42,14 +42,6 @@ jobs:
         shell: bash -el {0}
         run: |
           pylint src/lemonade --rcfile .pylintrc --disable E0401
-      - name: Test OGA+CPU server
-        if: runner.os == 'Windows'
-        timeout-minutes: 10
-        uses: ./.github/actions/server-testing
-        with:
-          conda_env: -n lemon
-          load_command: -i TinyPixel/small-llama2 oga-load --device cpu --dtype int4
-          hf_token: "${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}" # Required by OGA model_builder in OGA 0.4.0 but not future versions
       - name: Run lemonade tests
         shell: bash -el {0}
         env:
@@ -64,4 +56,12 @@ jobs:
           # Test high-level LEAP APIs
           python examples/lemonade/leap_oga_cpu.py
           python examples/lemonade/leap_oga_cpu_streaming.py
+      - name: Test OGA+CPU server
+        if: runner.os == 'Windows'
+        timeout-minutes: 10
+        uses: ./.github/actions/server-testing
+        with:
+          conda_env: -n lemon
+          load_command: -i TinyPixel/small-llama2 oga-load --device cpu --dtype int4
+          hf_token: "${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}" # Required by OGA model_builder in OGA 0.4.0 but not future versions
 
diff --git a/docs/lemonade/getting_started.md b/docs/lemonade/getting_started.md
index d62f648b..5506329d 100644
--- a/docs/lemonade/getting_started.md
+++ b/docs/lemonade/getting_started.md
@@ -52,7 +52,7 @@ To measure the accuracy of an LLM using MMLU, try this:
 
 That command will run just the management test from MMLU on your LLM and save the score to the lemonade cache at `~/.cache/lemonade`.
 
-You can run the full suite of MMLU subjects by omitting the `--test` argument. You can learn more about this with `lemonade accuracy-mmlu -h.
+You can run the full suite of MMLU subjects by omitting the `--test` argument. You can learn more about this with `lemonade accuracy-mmlu -h`.
 
 ## Benchmarking
 
diff --git a/docs/turnkey/tools_user_guide.md b/docs/turnkey/tools_user_guide.md
index a2191bdd..b582cbac 100644
--- a/docs/turnkey/tools_user_guide.md
+++ b/docs/turnkey/tools_user_guide.md
@@ -153,9 +153,9 @@ Each build directory contains:
 - The stats file, `turnkey_stats.yaml`, which collects all of the statistics collected by the tools.
   - This is what forms the content of the CSV reports generated by the `turnkey report` tool.
 - One log file per tool that was executed, which may contain additional information about what happened during the tool run.
-  - For example, `cache_dir/build_dir/log_discover.txt`.
+  - For example, `cache_dir/builds/build_dir/log_discover.txt`.
 - All of the artifacts produced by the tools.
-  - For example, `cache_dir/build_dir/onnx/my_model.onnx`.
+  - For example, `cache_dir/builds/build_dir/onnx/my_model.onnx`.
 
 The `--lean-cache` global argument ensures that all build artifacts are removed at the end of the sequence. This is useful for saving disk space when gathering statistics over a large amount of models. Log files (.txt), json files (.json), and yaml files (.yaml, such as state.yaml and stats.yaml) are not removed.
 
diff --git a/examples/turnkey/api/loading_a_build.py b/examples/turnkey/api/loading_a_build.py
index 234f5959..172fc62f 100644
--- a/examples/turnkey/api/loading_a_build.py
+++ b/examples/turnkey/api/loading_a_build.py
@@ -11,6 +11,7 @@
 import onnxruntime as ort
 from turnkeyml.common.filesystem import get_available_builds, DEFAULT_CACHE_DIR
 from turnkeyml.state import State
+from turnkeyml.common.build import output_dir
 from turnkeyml.tools.load_build import LoadBuild
 from turnkeyml.tools.onnx import ConvertOnnxToFp16
 
@@ -21,8 +22,7 @@ def main():
 
     # We use the _state.yaml file in the build directory when loading a build
     prior_state_file = os.path.join(
-        DEFAULT_CACHE_DIR,
-        prerequisite_build,
+        output_dir(DEFAULT_CACHE_DIR, prerequisite_build),
         f"{prerequisite_build}_state.yaml",
     )
 
diff --git a/plugins/devices/test/benchmark.py b/plugins/devices/test/benchmark.py
index f54b265f..fc0111aa 100644
--- a/plugins/devices/test/benchmark.py
+++ b/plugins/devices/test/benchmark.py
@@ -497,7 +497,7 @@ def test_010_cli_cache_benchmark(self):
             "--cache-dir",
             cache_dir,
             "-i",
-            os.path.join(cache_dir, "*", "*_state.yaml"),
+            os.path.join(cache_dir, "builds", "*", "*_state.yaml"),
             "load-build",
             "benchmark",
         ]
diff --git a/setup.py b/setup.py
index 346acb4e..73c40bbe 100644
--- a/setup.py
+++ b/setup.py
@@ -49,6 +49,7 @@
         "pytz",
         "tqdm",
         "matplotlib",
+        "tabulate",
         # Conditional dependencies for ONNXRuntime backends
         "onnxruntime >=1.10.1;platform_system=='Linux' and extra != 'llm-oga-cuda'",
         "onnxruntime-directml >=1.19.0;platform_system=='Windows' and extra != 'llm-oga-cuda'",
diff --git a/src/lemonade/cache.py b/src/lemonade/cache.py
index cab73999..dcd6233c 100644
--- a/src/lemonade/cache.py
+++ b/src/lemonade/cache.py
@@ -1,11 +1,12 @@
 import os
+from datetime import datetime, timezone
 
 # Allow an environment variable to override the default
 # location for the build cache
 if os.environ.get("LEMONADE_CACHE_DIR"):
     DEFAULT_CACHE_DIR = os.path.expanduser(os.environ.get("LEMONADE_CACHE_DIR"))
 else:
-    DEFAULT_CACHE_DIR = os.path.expanduser("~/.cache/lemonade")
+    DEFAULT_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "lemonade")
 
 
 def checkpoint_to_model_name(checkpoint_name: str) -> str:
@@ -16,6 +17,29 @@ def checkpoint_to_model_name(checkpoint_name: str) -> str:
     return checkpoint_name.split("/")[1]
 
 
+def build_name(input_name):
+    """
+    Name the lemonade build by concatenating these two factors:
+        1. Sanitize the input name (typically a model checkpoint name) by
+            replacing any `/` characters with `_`.
+        2. Timestamp in the format:
+                <month>m_<day>d_<year>y_<hour>h_<minute>m_<second>s
+            This timestamp ensures that builds in the same cache will not
+            collide in the same build directory.
+    """
+
+    # Sanitize the input name
+    input_name_sanitized = input_name.replace("/", "_")
+
+    # Get the current time in GMT
+    current_time = datetime.now(timezone.utc)
+
+    # Format the timestamp string
+    timestamp = current_time.strftime("%Yy_%mm_%dd_%Hh_%Mm_%Ss")
+
+    return f"{input_name_sanitized}_{timestamp}"
+
+
 class Keys:
     MODEL = "model"
     PER_ITERATION_LATENCY = "per_iteration_latency"
@@ -37,3 +61,4 @@ class Keys:
     DEVICE = "device"
     OGA_MODELS_SUBFOLDER = "oga_models_subfolder"
     MEMORY_USAGE_PLOT = "memory_usage_plot"
+    MAX_MEMORY_USED_GB = "max_memory_used_GB"
diff --git a/src/lemonade/cli.py b/src/lemonade/cli.py
index 6a0ef452..890dcae8 100644
--- a/src/lemonade/cli.py
+++ b/src/lemonade/cli.py
@@ -128,7 +128,7 @@ def main():
 
         state = State(
             cache_dir=os.path.abspath(global_args["cache_dir"]),
-            build_name=global_args["input"].replace("/", "_"),
+            build_name=cache.build_name(global_args["input"]),
             sequence_info=sequence.info,
         )
         sequence.launch(
diff --git a/src/lemonade/tools/chat.py b/src/lemonade/tools/chat.py
index 9519c783..3c093ee4 100644
--- a/src/lemonade/tools/chat.py
+++ b/src/lemonade/tools/chat.py
@@ -25,6 +25,8 @@
 }
 
 DEFAULT_SERVER_PORT = 8000
+DEFAULT_MAX_NEW_TOKENS = 512
+DEFAULT_N_TRIALS = 1
 
 END_OF_STREAM = "</s>"
 
@@ -95,17 +97,19 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
         parser.add_argument(
             "--max-new-tokens",
             "-m",
-            default=512,
+            default=DEFAULT_MAX_NEW_TOKENS,
             type=int,
-            help="Maximum number of new tokens in the response",
+            help=f"Maximum number of new tokens in the response "
+            f"(default is {DEFAULT_MAX_NEW_TOKENS})",
         )
 
         parser.add_argument(
             "--n-trials",
             "-n",
-            default=1,
+            default=DEFAULT_N_TRIALS,
             type=positive_int,
-            help="Number of responses the LLM will generate for the prompt (useful for testing)",
+            help=f"Number of responses the LLM will generate for the prompt "
+            f"(useful for testing, default is {DEFAULT_N_TRIALS})",
         )
 
         return parser
@@ -126,17 +130,14 @@ def parse(self, state: State, args, known_only=True) -> argparse.Namespace:
             # No change to the prompt
             pass
 
-        if parsed_args.n_trials < 1:
-            raise ValueError("N_TRIALS should be a positive number")
-
         return parsed_args
 
     def run(
         self,
         state: State,
         prompt: str = "Hello",
-        max_new_tokens: int = 512,
-        n_trials: int = 1,
+        max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
+        n_trials: int = DEFAULT_N_TRIALS,
     ) -> State:
 
         model: ModelAdapter = state.model
diff --git a/src/lemonade/tools/ort_genai/oga.py b/src/lemonade/tools/ort_genai/oga.py
index e487ae8a..e38a1dad 100644
--- a/src/lemonade/tools/ort_genai/oga.py
+++ b/src/lemonade/tools/ort_genai/oga.py
@@ -16,7 +16,7 @@
 from fnmatch import fnmatch
 from queue import Queue
 from packaging.version import Version
-from huggingface_hub import snapshot_download
+from huggingface_hub import snapshot_download, list_repo_files
 import onnxruntime_genai as og
 import onnxruntime_genai.models.builder as model_builder
 from turnkeyml.state import State
@@ -245,7 +245,7 @@ class OgaLoad(FirstTool):
             Models on Hugging Face that follow the "amd/**-onnx-ryzen-strix" pattern
         Local models for cpu, igpu, or npu:
             The specified checkpoint is converted to a local path, via mapping to lower case
-            and replacing '/' with '_'.  If this model already exists in the 'models' folderr
+            and replacing '/' with '_'.  If this model already exists in the 'models' folder
             of the lemonade cache and if it has a subfolder <device>-<dtype>, then this model
             will be used.  If the --force flag is used and the model is built with model_builder,
             then it will be rebuilt.
@@ -398,8 +398,16 @@ def run(
                     + "."
                 )
 
+            # Check whether the model is a safetensors checkpoint or a pre-exported
+            # ONNX model
+            # Note: This approach only supports ONNX models where the ONNX files are in the
+            #   Huggingface repo root. This does not support the case where the ONNX files
+            #   are in a nested directory within the repo.
+            model_files = list_repo_files(repo_id=checkpoint)
+            onnx_model = any([filename.endswith(".onnx") for filename in model_files])
+
             # Download the model from HF
-            if device == "npu" or device == "hybrid":
+            if onnx_model:
 
                 # NPU models on HF are ready to go and HF does its own caching
                 full_model_path = snapshot_download(
@@ -474,7 +482,7 @@ def run(
                         os.makedirs(os.path.dirname(dst_dll), exist_ok=True)
                         shutil.copy2(src_dll, dst_dll)
             else:
-                # device is 'cpu' or 'igpu'
+                # checkpoint is safetensors, so we need to run it through model_builder
 
                 # Use model_builder to download model and convert to ONNX
                 printing.log_info(f"Building {checkpoint} for {device} using {dtype}")
diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index 5637836b..6ef6260f 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -39,8 +39,22 @@ def load_yaml(file_path) -> Dict:
             )
 
 
+def builds_dir(cache_dir):
+    """
+    Each build stores stats, logs, and other files in a build directory.
+    All build directories are located at:
+        <cache_dir>/builds
+    """
+    return os.path.join(cache_dir, "builds")
+
+
 def output_dir(cache_dir, build_name):
-    path = os.path.join(cache_dir, build_name)
+    """
+    Each build stores stats, logs, and other files in an output directory at:
+    All build directories are located at:
+        <builds_dir>/<build_name>
+    """
+    path = os.path.join(builds_dir(cache_dir), build_name)
     return path
 
 
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index df4c3a60..06e1bfbf 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -183,7 +183,7 @@ def clean_output_dir(cache_dir: str, build_name: str) -> None:
     """
     Delete all elements of the output directory that are not human readable
     """
-    output_dir = os.path.join(cache_dir, build_name)
+    output_dir = build.output_dir(cache_dir, build_name)
     if os.path.isdir(output_dir) and is_build_dir(cache_dir, build_name):
         output_dir = os.path.expanduser(output_dir)
     else:
@@ -244,10 +244,10 @@ def get_available_builds(cache_dir):
     check_cache_dir(cache_dir)
 
     builds = [
-        pathlib.PurePath(build).name
-        for build in os.listdir(os.path.abspath(cache_dir))
-        if os.path.isdir(os.path.join(cache_dir, build))
-        and is_build_dir(cache_dir, build)
+        pathlib.PurePath(build_name).name
+        for build_name in os.listdir(os.path.abspath(build.builds_dir(cache_dir)))
+        if os.path.isdir(build.output_dir(cache_dir, build_name))
+        and is_build_dir(cache_dir, build_name)
     ]
     builds.sort()
 
@@ -517,7 +517,9 @@ def rebase_cache_dir(input_path: str, build_name: str, new_cache_dir: str):
     """
 
     relative_input_path = input_path.split(build_name, 1)[1][1:]
-    return os.path.join(new_cache_dir, build_name, relative_input_path)
+    return os.path.join(
+        build.output_dir(new_cache_dir, build_name), relative_input_path
+    )
 
 
 def check_extension(choices, file_name, error_func):
diff --git a/src/turnkeyml/memory_tracker.py b/src/turnkeyml/memory_tracker.py
new file mode 100644
index 00000000..52258a9c
--- /dev/null
+++ b/src/turnkeyml/memory_tracker.py
@@ -0,0 +1,205 @@
+import os
+import time
+from multiprocessing import Process, Queue
+import matplotlib.pyplot as plt
+import psutil
+import yaml
+
+
+DEFAULT_TRACK_MEMORY_INTERVAL = 0.25
+MEMORY_USAGE_YAML_FILENAME = "memory_usage.yaml"
+MEMORY_USAGE_PNGL_FILENAME = "memory_usage.png"
+
+
+class MemoryTracker:
+
+    @staticmethod
+    def get_time_mem_list(process):
+        return [time.time(), process.memory_info().rss]
+
+    def __init__(self):
+        self.process_being_tracked = None
+        self.build_dir = None
+        self.queue = None
+        self.tracker_process = None
+        self.tracking_active = False
+        self.yaml_path = None
+
+    def start(
+        self, track_pid, build_dir, track_memory_interval=DEFAULT_TRACK_MEMORY_INTERVAL
+    ):
+        if self.tracking_active:
+            raise RuntimeError("Cannot start tracking while already tracking")
+
+        # Get the process being tracked
+        self.process_being_tracked = psutil.Process(track_pid)
+
+        # Save the folder where data and plot will be stored
+        self.build_dir = build_dir
+
+        # Create queue for passing messages to the tracker
+        self.queue = Queue()
+
+        # The yaml file where the memory usage data will be saved
+        self.yaml_path = os.path.join(build_dir, MEMORY_USAGE_YAML_FILENAME)
+
+        # Create process to continuously sample memory usage
+        self.tracker_process = Process(
+            target=self._memory_tracker_,
+            args=(
+                track_pid,
+                self.queue,
+                self.yaml_path,
+                track_memory_interval,
+            ),
+        )
+        self.tracker_process.start()
+        self.tracking_active = True
+        self.set_label("start")
+        self.sample()
+
+    def set_label(self, label):
+        if self.tracking_active:
+            self.queue.put(label)
+
+    def sample(self):
+        if self.tracking_active:
+            self.queue.put(MemoryTracker.get_time_mem_list(self.process_being_tracked))
+
+    def stop(self):
+        if self.tracking_active:
+            self.queue.put(None)
+            self.tracking_active = False
+
+    def create_plot(self, build_name: None):
+        if self.tracker_process is None:
+            return None
+
+        if self.tracking_active:
+            self.stop()
+
+        # Wait for memory tracker to finish writing yaml data file
+        while self.tracker_process.is_alive():
+            self.tracker_process.join(timeout=0.5)
+
+        try:
+            with open(self.yaml_path, "r", encoding="utf-8") as f:
+                memory_tracks = yaml.safe_load(f)
+        except FileNotFoundError as e:
+            print(f"Memory tracker file not found: {e.filename}")
+            return None
+
+        # Find final time in the startup track (first track) to subtract from all other times
+        _, track = memory_tracks[0]
+        t0 = track[-1][0]
+
+        # last_t and last_y are used to draw a line between the last point of the prior
+        # track and the first point of the current track
+        last_t = None
+        last_y = None
+
+        plt.figure()
+        for k, v in memory_tracks[1:]:
+            if len(v) > 0:
+                t = [x[0] - t0 for x in v]
+                y = [float(x[1]) / 1024**3 for x in v]
+                # draw new memory usage track
+                if last_t is not None:
+                    plt.plot([last_t] + t, [last_y] + y, label=k, marker=".")
+                else:
+                    plt.plot(t, y, label=k, marker=".")
+                last_t = t[-1]
+                last_y = y[-1]
+        plt.xlabel("Time (sec)")
+        plt.ylabel("GB")
+        title_str = "Physical Memory Usage"
+        if build_name is not None:
+            title_str += "\n" + build_name
+        plt.title(title_str)
+        plt.legend()
+        plt.grid()
+        plot_path = os.path.join(self.build_dir, MEMORY_USAGE_PNGL_FILENAME)
+        plt.savefig(plot_path)
+
+        return plot_path
+
+    @staticmethod
+    def _memory_tracker_(
+        tracked_pid,
+        input_queue: Queue,
+        yaml_path: str,
+        track_memory_interval: float,
+    ):
+        """
+        Tracks memory usage during build and saves to yaml file
+        The build communicates with the tracker though the input_queue.  It may pass:
+          1) string - This is to indicate that a new track is starting and the string is the label
+                    for the next segment.  The tracker will automatically track memory usage at
+                    the track_memory_interval once a first track_name is given to it.
+          2) list - A time and a current memory usage value that is added to the current track
+                    (typically used at the end of a segment to make sure that each segment is
+                    sampled at least once
+          3) None - This indicates that the tracker should stop tracking, save its data to a file
+                    and end
+        """
+        memory_tracks = []
+        current_track = []
+        track_name = None
+        tracker_exit = False
+
+        try:
+            tracked_process = psutil.Process(tracked_pid)
+            while (
+                not tracker_exit and tracked_process.status() == psutil.STATUS_RUNNING
+            ):
+
+                time.sleep(track_memory_interval)
+
+                # Read any messages from the parent process
+                while not input_queue.empty():
+                    try:
+                        message = input_queue.get(timeout=0.001)
+                        if message is None or isinstance(message, str):
+                            # Save current track.
+                            if track_name is not None:
+                                memory_tracks.append([track_name, current_track])
+                            track_name = message
+                            current_track = []
+                            if message is None:
+                                # Wrap up
+                                tracker_exit = True
+                                break
+                        elif isinstance(message, list):
+                            # Add time and memory data to current track
+                            if track_name is not None:
+                                current_track.append(message)
+                            else:
+                                raise TypeError(
+                                    "Track name must be passed to memory tracker prior to "
+                                    "sending data"
+                                )
+                        else:
+                            raise TypeError(
+                                "Unrecognized message type in memory_tracker input queue: "
+                                f"{message}"
+                            )
+
+                    except input_queue.Empty:
+                        # input_queue.empty had not been updated
+                        pass
+
+                if not tracker_exit and track_name is not None:
+                    # Save current time and memory usage
+                    current_track.append(
+                        MemoryTracker.get_time_mem_list(tracked_process)
+                    )
+
+            # Save the collected memory tracks
+            with open(yaml_path, "w", encoding="utf-8") as f:
+                yaml.dump(memory_tracks, f)
+
+        except psutil.NoSuchProcess:
+            # If the parent process stopped existing, we can
+            # safely assume that tracking is no longer needed
+            # NOTE: this only seems to be needed on Windows
+            pass
diff --git a/src/turnkeyml/sequence/sequence.py b/src/turnkeyml/sequence/sequence.py
index 76964361..b388bcc9 100644
--- a/src/turnkeyml/sequence/sequence.py
+++ b/src/turnkeyml/sequence/sequence.py
@@ -1,15 +1,11 @@
-import queue
 import sys
 import time
 import os
-from multiprocessing import Process, Queue
 import platform
 import copy
 from datetime import datetime
 from typing import List, Dict, Optional
-import yaml
 import pytz
-import matplotlib.pyplot as plt
 import psutil
 import turnkeyml.common.printing as printing
 import turnkeyml.common.exceptions as exp
@@ -34,95 +30,6 @@ def _rewind_stdout(lines: int = 1):
     sys.stdout.flush()
 
 
-def _get_time_mem_list(process):
-    """Returns a list containing current time and current process memory usage"""
-    return [time.time(), process.memory_info().rss]
-
-
-def _memory_tracker(input_queue: Queue, yaml_path, track_memory_interval, track_names):
-    """
-    Tracks memory usage during build and saves to yaml file
-    """
-    memory_tracks = []
-    current_track = []
-    track_counter = 0
-
-    try:
-        parent_process = psutil.Process(pid=os.getppid())
-        while (
-            track_counter < len(track_names)
-            and parent_process.status() == psutil.STATUS_RUNNING
-        ):
-
-            time.sleep(track_memory_interval)
-
-            # Read any messages from the parent process
-            while track_counter < len(track_names) and not input_queue.empty():
-                try:
-                    message = input_queue.get(timeout=0.001)
-                    if message is None:
-                        # Current track is complete
-                        memory_tracks.append(
-                            [track_names[track_counter], current_track]
-                        )
-                        current_track = []
-                        track_counter += 1
-                    else:
-                        # Message is the output of _get_time_mem_list, so add to current track
-                        current_track.append(message)
-                except queue.Empty:
-                    # input_queue.empty had not been updated
-                    pass
-
-            # Save current time and memory usage
-            current_track.append(_get_time_mem_list(parent_process))
-
-        # Save the collected memory tracks
-        with open(yaml_path, "w", encoding="utf-8") as f:
-            yaml.dump(memory_tracks, f)
-
-    except psutil.NoSuchProcess:
-        # If the parent process stopped existing, we can
-        # safely assume that tracking is no longer needed
-        # NOTE: this only seems to be needed on Windows
-        pass
-
-
-def _plot_memory_usage(state: State, memory_tracks):
-
-    # Find final time in the startup track (before first tool) to subtract from all other times
-    _, track = memory_tracks[0]
-    t0 = track[-1][0]
-
-    # last_t and last_y are used to draw a line between the last point of the prior
-    # track and the first point of the current track
-    last_t = None
-    last_y = None
-
-    plt.figure()
-    for k, v in memory_tracks[1:]:
-        t = [x[0] - t0 for x in v]
-        y = [float(x[1]) / 1024**3 for x in v]
-        # draw new memory usage track
-        if last_t is not None:
-            plt.plot([last_t] + t, [last_y] + y, label=k, marker=".")
-        else:
-            plt.plot(t, y, label=k, marker=".")
-        last_t = t[-1]
-        last_y = y[-1]
-    plt.xlabel("Time (sec)")
-    plt.ylabel("GB")
-    plt.title(f"Physical Memory Usage\n{state.build_name}")
-    plt.legend()
-    plt.grid()
-    figure_path = os.path.join(
-        build.output_dir(state.cache_dir, state.build_name), "memory_usage.png"
-    )
-    plt.savefig(figure_path)
-    printing.log_info(f"Saved plot of memory usage to {figure_path}")
-    state.save_stat(fs.Keys.MEMORY_USAGE_PLOT, figure_path)
-
-
 class Sequence:
     """
     Helper class to launch and manage build tools.
@@ -216,26 +123,8 @@ def launch(
 
         # Start tracking memory usage
         if track_memory_interval is not None:
-            # Create queue for passing messages to the tracker
-            memory_tracker_queue = Queue()
-            # The yaml file where the memory usage data will be saved
-            yaml_path = os.path.join(
-                build.output_dir(state.cache_dir, state.build_name), "memory_usage.yaml"
-            )
-            # The names of each memory track segment
-            track_names = ["start-up"] + [tool.unique_name for tool in self.tools]
-            # Create process to continuously update queue
-            memory_tracker_process = Process(
-                target=_memory_tracker,
-                args=(
-                    memory_tracker_queue,
-                    yaml_path,
-                    track_memory_interval,
-                    track_names,
-                ),
-            )
-            memory_tracker_process.start()
-            memory_tracker_queue.put(_get_time_mem_list(self.process))
+            build_dir = build.output_dir(state.cache_dir, state.build_name)
+            state.memory_tracker.start(os.getpid(), build_dir, track_memory_interval)
 
         # Create a build directory in the cache
         fs.make_build_dir(state.cache_dir, state.build_name)
@@ -295,7 +184,7 @@ def launch(
             for stat_key, stat_value in stats_to_save.items():
                 state.save_stat(stat_key, stat_value)
 
-        # Save initial memory and create dict for tracking memory usage
+        # Save initial memory as a build statistic
         state.save_stat(f"{fs.Keys.TOOL_MEMORY}:__init__", self._get_mem_usage_str())
 
         # Run the build
@@ -304,9 +193,8 @@ def launch(
 
             start_time = time.time()
 
-            # Insert None into memory tracker queue before new tool starts
-            if track_memory_interval is not None:
-                memory_tracker_queue.put(None)
+            # Insert tool name into memory tracker queue before new tool starts
+            state.memory_tracker.set_label(tool.unique_name)
 
             try:
 
@@ -387,13 +275,12 @@ def launch(
 
                 # Store current memory and peak working memory
                 state.save_stat(tool.memory_key, self._get_mem_usage_str())
-                if track_memory_interval is not None:
-                    # sample each tool at least once
-                    memory_tracker_queue.put(_get_time_mem_list(self.process))
 
-        # Send final None to memory_tracker so that is stops ands saves data to file
-        if track_memory_interval is not None:
-            memory_tracker_queue.put(None)
+                # sample each tool at least once
+                state.memory_tracker.sample()
+
+        # Stop tracking memory
+        state.memory_tracker.stop()
 
         if not saved_exception:
             state.build_status = build.FunctionStatus.SUCCESSFUL
@@ -404,14 +291,13 @@ def launch(
                 )
                 state.invocation_info.status_message_color = printing.Colors.OKGREEN
 
-        if track_memory_interval is not None:
-            # Wait for memory tracker to finish writing yaml data file
-            while memory_tracker_process.is_alive():
-                memory_tracker_process.join(timeout=1.0)
-            if os.path.exists(yaml_path):
-                with open(yaml_path, "r", encoding="utf-8") as f:
-                    memory_tracks = yaml.safe_load(f)
-                _plot_memory_usage(state, memory_tracks)
+        plot_path = state.memory_tracker.create_plot(state.build_name)
+        if plot_path is not None:
+            printing.log_info(f"Saved plot of memory usage to {plot_path}")
+            state.save_stat(fs.Keys.MEMORY_USAGE_PLOT, plot_path)
+        else:
+            printing.log_info("Error in memory usage tracking, no plot generated")
+            state.save_stat(fs.Keys.MEMORY_USAGE_PLOT, "NONE")
 
         if vars(state).get("models_found") and vars(state).get("invocation_info"):
 
diff --git a/src/turnkeyml/state.py b/src/turnkeyml/state.py
index d46e9eab..82021a06 100644
--- a/src/turnkeyml/state.py
+++ b/src/turnkeyml/state.py
@@ -5,6 +5,7 @@
 import turnkeyml.common.build as build
 import turnkeyml.common.filesystem as fs
 from turnkeyml.version import __version__ as turnkey_version
+from turnkeyml.memory_tracker import MemoryTracker
 
 
 def _is_nice_to_write(value):
@@ -80,6 +81,7 @@ def __init__(
         self.downcast_applied = False
         self.uid = build.unique_id()
         self.results = None
+        self.memory_tracker = MemoryTracker()
 
         # Store any additional kwargs as members
         for key, value in kwargs.items():
diff --git a/src/turnkeyml/tools/load_build.py b/src/turnkeyml/tools/load_build.py
index 85e8662a..32ae0c5d 100644
--- a/src/turnkeyml/tools/load_build.py
+++ b/src/turnkeyml/tools/load_build.py
@@ -74,7 +74,7 @@ def run(self, state: State, input: str = "", skip_policy=skip_policy_default):
         # Extract the cache directory, build directory, and build name from the input
         source_build_dir = pathlib.Path(input).parent
         source_build_dir_name = source_build_dir.name
-        source_cache_dir = source_build_dir.parent
+        source_cache_dir = source_build_dir.parent.parent
 
         # Make sure that the target yaml file is actually the state of a turnkey build
         if not fs.is_build_dir(source_cache_dir, source_build_dir_name):
diff --git a/src/turnkeyml/tools/management_tools.py b/src/turnkeyml/tools/management_tools.py
index c92dd52e..8f5ffcd7 100644
--- a/src/turnkeyml/tools/management_tools.py
+++ b/src/turnkeyml/tools/management_tools.py
@@ -1,6 +1,5 @@
 import argparse
 import abc
-import os
 from typing import List
 import turnkeyml.common.filesystem as fs
 import turnkeyml.common.exceptions as exp
@@ -8,6 +7,7 @@
 from turnkeyml.tools.tool import ToolParser
 from turnkeyml.version import __version__ as turnkey_version
 from turnkeyml.common.system_info import get_system_info_dict
+from turnkeyml.common.build import output_dir
 
 
 class ManagementTool(abc.ABC):
@@ -208,7 +208,7 @@ def run(
             printing.log_warning("No builds found.")
 
         for build in builds:
-            build_path = os.path.join(cache_dir, build)
+            build_path = output_dir(cache_dir, build_name=build)
             if fs.is_build_dir(cache_dir, build):
                 # Run actions on the build
                 # These actions are intended to be mutually exclusive, so we
diff --git a/src/turnkeyml/version.py b/src/turnkeyml/version.py
index 9fff231c..0d72820f 100644
--- a/src/turnkeyml/version.py
+++ b/src/turnkeyml/version.py
@@ -1 +1 @@
-__version__ = "5.0.5"
+__version__ = "5.1.0"
diff --git a/test/lemonade/llm_api.py b/test/lemonade/llm_api.py
index a8f4b170..f3b42464 100644
--- a/test/lemonade/llm_api.py
+++ b/test/lemonade/llm_api.py
@@ -10,6 +10,7 @@
 from turnkeyml.state import State
 import turnkeyml.common.filesystem as fs
 import turnkeyml.common.test_helpers as common
+import turnkeyml.common.build as build
 from lemonade.tools.huggingface_load import HuggingfaceLoad
 from lemonade.tools.huggingface_bench import HuggingfaceBench
 from lemonade.tools.mmlu import AccuracyMMLU
@@ -354,7 +355,10 @@ def test_006_multiple_prompt_responses(self):
         ), stats["response_tokens"]
         # Check that histogram figure was generated
         assert os.path.exists(
-            os.path.join(state.cache_dir, state.build_name, "response_lengths.png")
+            os.path.join(
+                build.output_dir(state.cache_dir, state.build_name),
+                "response_lengths.png",
+            )
         )
 
 
diff --git a/test/lemonade/oga_cpu_api.py b/test/lemonade/oga_cpu_api.py
index 0cbc6ad4..32608ae8 100644
--- a/test/lemonade/oga_cpu_api.py
+++ b/test/lemonade/oga_cpu_api.py
@@ -5,6 +5,7 @@
 from turnkeyml.state import State
 import turnkeyml.common.test_helpers as common
 import turnkeyml.common.filesystem as fs
+from turnkeyml.common.build import builds_dir
 from lemonade.tools.ort_genai.oga import OgaLoad
 from lemonade.tools.chat import LLMPrompt
 from lemonade.tools.mmlu import AccuracyMMLU
@@ -22,7 +23,7 @@
 class Testing(unittest.TestCase):
 
     def setUp(self) -> None:
-        shutil.rmtree(cache_dir, ignore_errors=True)
+        shutil.rmtree(builds_dir(cache_dir), ignore_errors=True)
 
     def test_001_ogaload(self):
         # Test the OgaLoad and LLMPrompt tools on an NPU model
diff --git a/test/turnkey/analysis.py b/test/turnkey/analysis.py
index 45ba7520..58e06f7b 100644
--- a/test/turnkey/analysis.py
+++ b/test/turnkey/analysis.py
@@ -16,6 +16,7 @@
 import turnkeyml.common.filesystem as filesystem
 import turnkeyml.common.test_helpers as common
 import turnkeyml.common.exceptions as exp
+from turnkeyml.common.build import output_dir
 
 try:
     # pylint: disable=unused-import
@@ -144,7 +145,7 @@ def __init__(self, **kwargs):
 
 
 def cache_is_lean(cache_dir, build_name):
-    files = list(glob.glob(f"{cache_dir}/{build_name}/**/*", recursive=True))
+    files = list(glob.glob(f"{cache_dir}/builds/{build_name}/**/*", recursive=True))
     is_lean = len([x for x in files if ".onnx" in x]) == 0
     metadata_found = len([x for x in files if ".txt" in x]) > 0
     return is_lean and metadata_found
@@ -170,7 +171,7 @@ def run_analysis(args):
 
 
 def check_discover_log(build_name: str, expected_content: str):
-    log_path = os.path.join(cache_dir, build_name, "log_discover.txt")
+    log_path = os.path.join(output_dir(cache_dir, build_name), "log_discover.txt")
     with open(log_path, "r", encoding="utf-8") as log_file:
         log_content = log_file.read()
         assert expected_content in log_content, log_content
diff --git a/test/turnkey/cli.py b/test/turnkey/cli.py
index 969f6ddb..085f8ffb 100644
--- a/test/turnkey/cli.py
+++ b/test/turnkey/cli.py
@@ -791,7 +791,8 @@ def test_022_cli_cache_move(self):
         # Get the build state file in its new location
         selected_build = fs.get_available_builds(new_cache_dir)[-1]
         state_file_path = os.path.join(
-            new_cache_dir, selected_build, f"{selected_build}_state.yaml"
+            build.output_dir(new_cache_dir, selected_build),
+            f"{selected_build}_state.yaml",
         )
 
         # Build the cached build in its new location