diff --git a/Makefile b/Makefile
index 2e2621ea2d..a5e8a7eca3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 .PHONY: help
-.PHONY: test test-parallel test-serial test-benchmark test-watch test-coverage test-profile warm-fastembed-cache
+.PHONY: test test-parallel test-serial test-benchmark test-watch test-coverage test-profile record-cassettes rewrite-cassettes replay-cassettes snapshot-cassettes check-record-test-env warm-fastembed-cache
 .PHONY: docs-fern docs-fern-strict docs-fern-live docs-fern-preview-watch docs-fern-generate-sdk docs-fern-fix-empty-links docs-check-redirects docs-fern-publish-staging docs-fern-publish-public
 .PHONY: pre-commit
 
@@ -13,6 +13,11 @@ WORKERS ?= auto
 DIST ?= worksteal
 
 PYTEST ?= poetry run pytest
+RECORDED_TESTS ?= tests/recorded
+RECORDED_RECORD_MODE ?= once
+RECORDED_SNAPSHOT_MODE ?= create
+RECORDED_REQUIRED_KEYS ?= OPENAI_API_KEY NVIDIA_API_KEY
+RECORDED_REPLAY_ENV ?= env -u HTTP_PROXY -u HTTPS_PROXY -u ALL_PROXY -u http_proxy -u https_proxy -u all_proxy
 # These targets assume a Unix-like shell for env -u; use bash, Git Bash, or WSL on Windows.
 UNIT_TEST_ENV ?= env -u OPENAI_API_KEY -u NVIDIA_API_KEY \
 	-u LIVE_TEST -u LIVE_TEST_MODE -u TEST_LIVE_MODE
@@ -43,6 +48,33 @@ test-coverage:
 test-profile:
 	$(PYTEST) -vv --profile-svg $(ARGS) $(TEST)
 
+record-cassettes: check-record-test-env
+	$(PYTEST) $(RECORDED_TESTS) --record-mode=$(RECORDED_RECORD_MODE) -m "not fake_cassette"
+	$(RECORDED_REPLAY_ENV) $(PYTEST) $(RECORDED_TESTS) --block-network --inline-snapshot=$(RECORDED_SNAPSHOT_MODE)
+	$(RECORDED_REPLAY_ENV) $(PYTEST) $(RECORDED_TESTS) --block-network
+
+rewrite-cassettes:
+	$(MAKE) record-cassettes RECORDED_RECORD_MODE=rewrite RECORDED_SNAPSHOT_MODE=fix
+
+replay-cassettes:
+	$(RECORDED_REPLAY_ENV) $(PYTEST) $(RECORDED_TESTS) --block-network
+
+snapshot-cassettes:
+	$(RECORDED_REPLAY_ENV) $(PYTEST) $(RECORDED_TESTS) --block-network --inline-snapshot=fix
+
+check-record-test-env:
+	@missing=""; \
+	for key in $(RECORDED_REQUIRED_KEYS); do \
+		if [ -z "$$(printenv "$$key")" ]; then \
+			missing="$$missing $$key"; \
+		fi; \
+	done; \
+	if [ -n "$$missing" ]; then \
+		printf '%s\n' "Missing required env var(s):$$missing" \
+			"Set them before make record-cassettes, or override RECORDED_REQUIRED_KEYS for a focused refresh."; \
+		exit 2; \
+	fi
+
 warm-fastembed-cache:
 	$(FASTEMBED_ENV) poetry run python -c 'from fastembed import TextEmbedding; model = TextEmbedding("$(FASTEMBED_MODEL)"); next(model.embed(["warmup"]))'
 
@@ -86,6 +118,10 @@ help:
 		'  make test-benchmark [ARGS="-q"]' \
 		'  make test-parallel [TEST=path] [WORKERS=auto] [ARGS="-q --tb=short"]' \
 		'  make test-watch [TEST=path]' \
+		'  make record-cassettes [RECORDED_TESTS=tests/recorded] [RECORDED_RECORD_MODE=once] [RECORDED_SNAPSHOT_MODE=create] [RECORDED_REQUIRED_KEYS="OPENAI_API_KEY NVIDIA_API_KEY"]' \
+		'  make rewrite-cassettes [RECORDED_TESTS=tests/recorded] [RECORDED_REQUIRED_KEYS="OPENAI_API_KEY NVIDIA_API_KEY"]' \
+		'  make replay-cassettes [RECORDED_TESTS=tests/recorded]' \
+		'  make snapshot-cassettes [RECORDED_TESTS=tests/recorded]' \
 		'' \
 		'Tests:' \
 		'  test                  Run pytest.ini testpaths with pytest-xdist' \
@@ -95,6 +131,10 @@ help:
 		'  test-watch            Run pytest in watch mode' \
 		'  test-coverage         Run pytest with coverage' \
 		'  test-profile          Run pytest with profiling' \
+		'  record-cassettes      Record missing or selected cassettes, fill snapshots, and verify replay' \
+		'  rewrite-cassettes     Rewrite selected cassettes, fill snapshots, and verify replay' \
+		'  replay-cassettes      Verify selected cassettes offline without recording' \
+		'  snapshot-cassettes    Update inline snapshots from existing cassettes offline' \
 		'  warm-fastembed-cache  Prime the repo-local FastEmbed cache' \
 		'' \
 		'Docs:' \
diff --git a/nemoguardrails/llm/prompts.py b/nemoguardrails/llm/prompts.py
index 104ea2e7c9..3a32186525 100644
--- a/nemoguardrails/llm/prompts.py
+++ b/nemoguardrails/llm/prompts.py
@@ -41,6 +41,8 @@ def _load_prompts() -> List[TaskPrompt]:
 
     for path in prompts_dirs:
         for root, dirs, files in os.walk(path):
+            dirs.sort()
+            files.sort()
             for filename in files:
                 if filename.endswith(".yml") or filename.endswith(".yaml"):
                     with open(os.path.join(root, filename), encoding="utf-8") as prompts_file:
diff --git a/nemoguardrails/rails/llm/llmrails.py b/nemoguardrails/rails/llm/llmrails.py
index 63c0fa27b7..390958a1f0 100644
--- a/nemoguardrails/rails/llm/llmrails.py
+++ b/nemoguardrails/rails/llm/llmrails.py
@@ -318,9 +318,13 @@ def __init__(
             self.config.flows.extend(default_flows)
 
             # We also need to load the content from the components library.
+            # Sort entries so the traversal order is filesystem-independent;
+            # otherwise the order in which library bot_messages are inserted
+            # (and which definition wins on collisions) varies between platforms.
             library_path = os.path.join(os.path.dirname(__file__), "../../library")
             for root, dirs, files in os.walk(library_path):
-                for file in files:
+                dirs.sort()
+                for file in sorted(files):
                     # Extract the full path for the file
                     full_path = os.path.join(root, file)
                     if file.endswith(".co"):
diff --git a/poetry.lock b/poetry.lock
index 0b7500a30f..57c5ec61e2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -302,6 +302,21 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}
 [package.extras]
 trio = ["trio (>=0.31.0)", "trio (>=0.32.0)"]
 
+[[package]]
+name = "asttokens"
+version = "3.0.1"
+description = "Annotate AST trees with source code positions"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "asttokens-3.0.1-py3-none-any.whl", hash = "sha256:15a3ebc0f43c2d0a50eeafea25e19046c68398e487b9f1f5b517f7c0f40f976a"},
+    {file = "asttokens-3.0.1.tar.gz", hash = "sha256:71a4ee5de0bde6a31d64f6b13f2293ac190344478f081c3d1bccfcf5eacb0cb7"},
+]
+
+[package.extras]
+astroid = ["astroid (>=2,<5)"]
+test = ["astroid (>=2,<5)", "pytest (<9.0)", "pytest-cov", "pytest-xdist"]
+
 [[package]]
 name = "async-timeout"
 version = "4.0.3"
@@ -1380,6 +1395,20 @@ files = [
 [package.extras]
 testing = ["hatch", "pre-commit", "pytest", "tox"]
 
+[[package]]
+name = "executing"
+version = "2.2.1"
+description = "Get the currently executing AST node of a frame, and other information"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017"},
+    {file = "executing-2.2.1.tar.gz", hash = "sha256:3632cc370565f6648cc328b32435bd120a1e4ebb20c77e3fdde9a13cd1e533c4"},
+]
+
+[package.extras]
+tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]
+
 [[package]]
 name = "fast-langdetect"
 version = "1.0.0"
@@ -2284,6 +2313,29 @@ files = [
     {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"},
 ]
 
+[[package]]
+name = "inline-snapshot"
+version = "0.33.0"
+description = "golden master/snapshot/approval testing library which puts the values right into your source code"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "inline_snapshot-0.33.0-py3-none-any.whl", hash = "sha256:76b8c2c5899d27d3d464d1160eb3b8eee179ba635bb80a8e5e93220f10b60207"},
+    {file = "inline_snapshot-0.33.0.tar.gz", hash = "sha256:856cfc18dea755dd78ffa0fbac8c161038ca0bfb4bd0bbb5d519f4bca3dfeff4"},
+]
+
+[package.dependencies]
+asttokens = ">=2.0.5"
+executing = ">=2.2.0"
+pytest = ">=8.3.4"
+rich = ">=13.7.1"
+tomli = {version = ">=2.0.0", markers = "python_version < \"3.11\""}
+typing-extensions = "*"
+
+[package.extras]
+black = ["black (>=23.3.0)"]
+dirty-equals = ["dirty-equals (>=0.9.0)"]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -6147,6 +6199,25 @@ gprof2dot = "*"
 pytest = "*"
 six = "*"
 
+[[package]]
+name = "pytest-recording"
+version = "0.13.4"
+description = "A pytest plugin powered by VCR.py to record and replay HTTP traffic"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pytest_recording-0.13.4-py3-none-any.whl", hash = "sha256:ad49a434b51b1c4f78e85b1e6b74fdcc2a0a581ca16e52c798c6ace971f7f439"},
+    {file = "pytest_recording-0.13.4.tar.gz", hash = "sha256:568d64b2a85992eec4ae0a419c855d5fd96782c5fb016784d86f18053792768c"},
+]
+
+[package.dependencies]
+pytest = ">=3.5.0"
+vcrpy = ">=2.0.1"
+
+[package.extras]
+dev = ["pytest-httpbin", "pytest-mock", "requests", "werkzeug (==3.1.3)"]
+tests = ["pytest-httpbin", "pytest-mock", "requests", "werkzeug (==3.1.3)"]
+
 [[package]]
 name = "pytest-xdist"
 version = "3.8.0"
@@ -8328,6 +8399,24 @@ typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""}
 [package.extras]
 standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1)", "watchfiles (>=0.20)", "websockets (>=10.4)"]
 
+[[package]]
+name = "vcrpy"
+version = "8.1.1"
+description = "Automatically mock your HTTP interactions to simplify and speed up testing"
+optional = false
+python-versions = ">=3.10"
+files = [
+    {file = "vcrpy-8.1.1-py3-none-any.whl", hash = "sha256:2d16f31ad56493efb6165182dd99767207031b0da3f68b18f975545ede8ac4b9"},
+    {file = "vcrpy-8.1.1.tar.gz", hash = "sha256:58e3053e33b423f3594031cb758c3f4d1df931307f1e67928e30cf352df7709f"},
+]
+
+[package.dependencies]
+PyYAML = "*"
+wrapt = "*"
+
+[package.extras]
+tests = ["aiohttp", "boto3", "cryptography", "httpbin", "httpcore", "httplib2", "httpx", "pycurl", "pytest", "pytest-aiohttp", "pytest-asyncio", "pytest-cov", "pytest-httpbin", "requests (>=2.22.0)", "tornado", "urllib3", "werkzeug (==2.0.3)"]
+
 [[package]]
 name = "virtualenv"
 version = "21.1.0"
@@ -9250,4 +9339,4 @@ tracing = ["aiofiles", "opentelemetry-api"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.14"
-content-hash = "05fc5e457ae4deadcbcd7c4211b7e2c16a6913b992eb418e3f8aaf423eb93d7b"
+content-hash = "89555ebef15f93613e338ac9d68a3af93bc6cf939861584ea65d70a469c1626a"
diff --git a/pyproject.toml b/pyproject.toml
index 80741d538c..a12d5590ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -159,6 +159,7 @@ pytest-asyncio = ">=0.21.0, <1.0.0"
 pytest-cov = ">=4.1.0"
 pytest-httpx = ">=0.22.0"
 pytest-xdist = "^3.8.0"
+pytest-recording = "^0.13.4"
 streamlit = ">=1.37.0"
 tox = "^4.23.2"
 pytest-profiling = "^1.7.0"
@@ -182,6 +183,7 @@ langchain-core = ">=0.2.14,<2.0.0"
 langchain-community = ">=0.2.5,<2.0.0"
 langchain-openai = ">=0.1.0"
 langchain-nvidia-ai-endpoints = ">=0.2.0"
+inline-snapshot = "^0.33.0"
 
 # Directories in which to run Pyright type-checking
 [tool.pyright]
@@ -230,6 +232,9 @@ log-level = "DEBUG"
 # phase, which will cause tests to fail or "magically" ignored.
 log_cli = "False"
 
+[tool.inline-snapshot]
+format-command = "ruff format --stdin-filename {filename} -"
+
 [build-system]
 requires = ["poetry-core>=1.0.0,<2.0.0"]
 build-backend = "poetry.core.masonry.api"
diff --git a/pytest.ini b/pytest.ini
index 746fd6dddb..96c4e599a4 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -11,10 +11,13 @@ log_cli = False
 asyncio_default_fixture_loop_scope = function
 
 markers =
+    recorded: deterministic cassette replay tests
     serial: documentation-only marker for tests that may need serial scheduling
     slow: high-runtime tests that may need separate scheduling
     perf: wall-clock performance/timing tests; excluded from CI by default, run on demand with -m perf
     live: tests that require real provider credentials or external services
+    vcr: marker used by pytest-recording
+    fake_cassette: cassette is hand-authored; refresh workflow must skip these tests
     real_embeddings: tests that intentionally exercise configured real embedding providers
 
 testpaths =
diff --git a/tests/recorded/README.md b/tests/recorded/README.md
new file mode 100644
index 0000000000..507305c497
--- /dev/null
+++ b/tests/recorded/README.md
@@ -0,0 +1,183 @@
+# Recorded Tests
+
+Recorded tests replay provider traffic through pytest-recording cassettes and must run without live network access by default.
+
+## Adding a test
+
+Markers are applied once per module via `pytestmark`; do not stack `@pytest.mark.recorded` / `vcr` / `asyncio` on each test. Use a module-level list, and fold in `vcr`/`asyncio` only when every test in the module needs them:
+
+```python
+import pytest
+
+from nemoguardrails import LLMRails
+from tests.recorded.rails.public_api.configs import OPENAI_BASELINE_CONFIG
+from tests.recorded.rails_config import load_config
+from tests.recorded.snapshots import snapshot
+
+pytestmark = [pytest.mark.recorded, pytest.mark.vcr, pytest.mark.asyncio]
+
+
+async def test_my_case(openai_api_key):
+    rails = LLMRails(load_config(OPENAI_BASELINE_CONFIG), verbose=False)
+    result = await rails.generate_async(prompt="...")
+    assert result == snapshot()
+```
+
+Config constants (``OPENAI_BASELINE_CONFIG`` and friends) live in the suite-local
+``configs.py`` next to the tests; ``snapshot`` is the suite-local re-export in
+``tests/recorded/snapshots.py``, not ``inline_snapshot`` directly.
+
+Request credentials as fixture parameters (`openai_api_key`, `nvidia_api_key`) rather than calling `request.getfixturevalue(...)`. In modules that mix sync/async tests or vcr/non-vcr tests, keep only `recorded` in `pytestmark` and apply `@pytest.mark.vcr` / `@pytest.mark.asyncio` per test. Then use the Makefile refresh workflow to record once with credentials, fill the snapshot offline, and verify the replay:
+
+```bash
+OPENAI_API_KEY=... make record-cassettes \
+  RECORDED_TESTS=path::test_my_case \
+  RECORDED_REQUIRED_KEYS=OPENAI_API_KEY
+```
+
+## Negative paths
+
+This suite owns **pipeline-level** failures (how `LLMRails` behaves when a model call
+fails) and **public-API input validation**. Client/wire-level conditions (status code to
+exception mapping, retries, SSE, malformed bodies) belong in `tests/llm/clients/`, which
+covers them with `httpx.MockTransport` + JSON fixtures; do not duplicate them here.
+
+Prefer mechanisms in this order:
+
+1. **Recordable real error** (refreshable cassette). A nonexistent model name yields a real,
+   deterministic 404, so error paths record and refresh like any happy path. Use a config
+   whose model is invalid (see `OPENAI_INVALID_MODEL_CONFIG`,
+   `CONTENT_SAFETY_INVALID_MODEL_CONFIG`).
+2. **Pure runtime** `pytest.raises` for input validation (no cassette, no transport).
+3. **Fake cassette** (`@pytest.mark.fake_cassette`) only as a last resort, for a synthetic
+   response that must flow through the full pipeline and cannot be reproduced by 1 or 2.
+
+Observed behavior these tests pin: a failing model call (main *or* a rail's own model)
+propagates as `LLMCallException` — a safety-model failure does not let content through
+silently. Name negative tests `test_<surface>_<failure>_<behavior>` with the suffixes
+`_raises` / `_fails_closed` / `_invalid_*` so they are greppable
+(`pytest -k "raises or invalid"`), and co-locate each with its happy-path sibling module.
+
+## Replay
+
+```bash
+poetry run pytest tests/recorded --block-network -v --durations=10
+```
+
+Focused rails replay:
+
+```bash
+poetry run pytest tests/recorded/rails/public_api --block-network -v
+poetry run pytest tests/recorded/rails/library --block-network -v
+```
+
+Replay mode installs dummy API keys from `tests/recorded/utils.py`. A cassette miss with `--block-network` is a test failure.
+
+## Refresh
+
+Refresh only in a trusted environment with real provider credentials. The
+record -> fill-snapshots -> verify loop is wrapped in make targets.
+
+```bash
+OPENAI_API_KEY=... NVIDIA_API_KEY=... make record-cassettes
+```
+
+`record-cassettes` defaults to `RECORDED_RECORD_MODE=once` and
+`RECORDED_SNAPSHOT_MODE=create`, which records missing cassettes, fills empty
+snapshots, and replays existing cassettes. This is the safest mode when adding
+new tests because it will not rewrite unrelated existing cassettes selected by
+the same path.
+
+For a focused new cassette that only touches one provider, pass the test node and
+override the preflight list:
+
+```bash
+OPENAI_API_KEY=... make record-cassettes \
+  RECORDED_TESTS=tests/recorded/rails/public_api/test_generate.py::test_new_case \
+  RECORDED_REQUIRED_KEYS=OPENAI_API_KEY
+```
+
+`RECORDED_TESTS` is passed directly to pytest, so it can be a single test, a
+test class, several files, or a directory:
+
+```bash
+OPENAI_API_KEY=... make record-cassettes \
+  RECORDED_TESTS="tests/recorded/rails/public_api/test_generate.py tests/recorded/clients/test_openai_chat.py" \
+  RECORDED_REQUIRED_KEYS=OPENAI_API_KEY
+```
+
+For an intentional rewrite of existing cassettes, use `rewrite-cassettes`:
+
+```bash
+OPENAI_API_KEY=... make rewrite-cassettes \
+  RECORDED_TESTS=tests/recorded/rails/public_api/test_generate.py::test_openai_generate_async_public_contract \
+  RECORDED_REQUIRED_KEYS=OPENAI_API_KEY
+```
+
+`rewrite-cassettes` uses `RECORDED_RECORD_MODE=rewrite` and
+`RECORDED_SNAPSHOT_MODE=fix`, so changed recorded outputs update existing inline
+snapshots before the final offline replay verification.
+
+For a full trusted refresh, set the record mode explicitly:
+
+```bash
+OPENAI_API_KEY=... NVIDIA_API_KEY=... make record-cassettes RECORDED_RECORD_MODE=all RECORDED_SNAPSHOT_MODE=fix
+```
+
+Replay and snapshot-only workflows do not need real provider credentials:
+
+```bash
+make replay-cassettes RECORDED_TESTS=tests/recorded/rails/public_api/test_generate.py::test_openai_generate_async_public_contract
+make snapshot-cassettes RECORDED_TESTS=tests/recorded/rails/public_api/test_generate.py::test_openai_generate_async_public_contract
+```
+
+## Cassettes
+
+Rails tests use pytest-recording's default names:
+
+```text
+tests/recorded/rails/<suite>/cassettes/<test_module>/<test_name>.yaml
+```
+
+Parameterized tests include the parameter id in the cassette filename. Every test (rails and clients) uses this default naming; do not add `@pytest.mark.default_cassette(...)`.
+
+JSON request and response bodies are stored as `parsed_body` and rehydrated by `ReadableYamlSerializer` during replay. SSE responses also use parseable `parsed_body` events.
+
+Cassettes preserve scrubbed JSON text without smart-character normalization so provider payloads stay inspectable. Request matching and snapshot helpers normalize smart quotes, dash variants, ellipses, and NFKC at comparison time. Response headers are dropped by exact name and by prefix (`x-`, `cf-`, `openai-`); `tests/recorded/sanitization.py` holds the `ALLOWED_HEADERS` exceptions that must survive the prefix sweep (currently `content-type`).
+
+Inspect a cassette:
+
+```bash
+poetry run python -m tests.recorded.inspect_cassette tests/recorded/rails/public_api/cassettes/test_stream/test_openai_stream_async_public_contract.yaml
+```
+
+## Snapshots
+
+Rails replay outputs are pinned with inline snapshots after normalization. Create or fix snapshots with:
+
+```bash
+poetry run pytest tests/recorded/rails --block-network --inline-snapshot=create
+poetry run pytest tests/recorded/rails --block-network --inline-snapshot=fix
+poetry run pytest tests/recorded/rails --block-network --inline-snapshot=review
+```
+
+Snapshot formatting uses `ruff format` through `[tool.inline-snapshot]` in `pyproject.toml`.
+Snapshot create/fix/review runs must be serial. Use `make record-cassettes` or a
+direct `poetry run pytest ... --inline-snapshot=<mode>` command; the default
+`make test` path uses xdist, where inline-snapshot disables update and report
+modes.
+
+Volatile response fields (ids, timestamps, fingerprints) are scrubbed to fixed sentinels in the cassette, so snapshots assert them directly without needing loose matchers.
+
+## Fake Outputs
+
+Prefer `FakeLLMModel` when a test needs the main model to emit a specific output and provider-backed rail/task calls can still replay from VCR. This keeps the test refreshable.
+
+Use a fake cassette only when runtime injection cannot model the behavior clearly, such as a provider stream/error path. Fake cassettes must:
+
+- live under a `cassettes/**/fake/` directory,
+- use `@pytest.mark.fake_cassette`,
+- be excluded from refresh with `-m "not fake_cassette"`,
+- include YAML header metadata with `reason`, `frozen_fields`, and `fake_llm_model_considered`.
+
+The fake-cassette metadata validator is in `tests/recorded/fake_cassettes.py`.
diff --git a/tests/recorded/__init__.py b/tests/recorded/__init__.py
new file mode 100644
index 0000000000..467079831e
--- /dev/null
+++ b/tests/recorded/__init__.py
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/recorded/assertions.py b/tests/recorded/assertions.py
new file mode 100644
index 0000000000..a7d333cc47
--- /dev/null
+++ b/tests/recorded/assertions.py
@@ -0,0 +1,164 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+from nemoguardrails.rails.llm.options import GenerationResponse, RailsResult, RailStatus
+from tests.recorded.cassette import RecordedChatResponse, cassette_request_jsons
+
+
+def assert_rails_result(
+    result: RailsResult,
+    *,
+    status: RailStatus,
+    rail: str | None = None,
+    content: str | None = None,
+) -> None:
+    assert result.status == status
+    assert isinstance(result.content, str)
+    if rail is not None:
+        assert result.rail == rail
+    if content is not None:
+        assert result.content == content
+
+
+def assert_generation_response(result: Any) -> GenerationResponse:
+    assert isinstance(result, GenerationResponse)
+    assert result.response
+    if isinstance(result.response, list):
+        assert result.response[-1]["role"] == "assistant"
+        assert result.response[-1]["content"].strip()
+    else:
+        assert result.response.strip()
+    return result
+
+
+def assert_blocked_generation(result: Any, *, refusal: str | None = None) -> GenerationResponse:
+    """Assert a generate-surface result was blocked by a rail.
+
+    Unlike ``assert_generation_response`` (which only checks the assistant message is
+    non-empty), this asserts the block semantics: a rail decided to ``stop``, and the
+    assistant message carries the refusal text (matched exactly when ``refusal`` is given).
+    """
+    result = assert_generation_response(result)
+    assert result.log is not None
+    assert any(rail.stop for rail in result.log.activated_rails)
+    message = result.response[-1]["content"] if isinstance(result.response, list) else result.response
+    if refusal is not None:
+        assert message == refusal
+    return result
+
+
+def assert_activated_rails(result: GenerationResponse, expected: set[str]) -> None:
+    assert result.log is not None
+    activated = {rail.name for rail in result.log.activated_rails}
+    assert expected <= activated
+
+
+def assert_llm_tasks(result: GenerationResponse, expected: set[str]) -> None:
+    assert result.log is not None
+    assert result.log.llm_calls is not None
+    tasks = {call.task for call in result.log.llm_calls}
+    assert expected <= tasks
+
+
+def assert_generated_text(result: Any) -> None:
+    assert isinstance(result, str)
+    assert result.strip()
+
+
+def assert_generated_message(result: Any) -> None:
+    assert isinstance(result, dict)
+    assert result.get("role") == "assistant"
+    content = result.get("content")
+    assert isinstance(content, str)
+    assert content.strip()
+
+
+def assert_stream_contract(chunks: list[Any], *, expect_multiple: bool = True) -> str:
+    assert chunks
+    if expect_multiple:
+        assert len(chunks) > 1
+
+    content_parts = []
+    for chunk in chunks:
+        assert isinstance(chunk, (str, dict))
+        if isinstance(chunk, str):
+            content_parts.append(chunk)
+        elif "text" in chunk and isinstance(chunk["text"], str):
+            content_parts.append(chunk["text"])
+        elif "content" in chunk and isinstance(chunk["content"], str):
+            content_parts.append(chunk["content"])
+
+    content = "".join(content_parts)
+    assert content.strip()
+    return content
+
+
+def assert_no_stream_error(chunks: list[Any]) -> None:
+    for chunk in chunks:
+        if isinstance(chunk, str) and chunk.startswith('{"error":'):
+            raise AssertionError(chunk)
+
+
+def assert_blocked_stream_error(chunks: list[Any]) -> None:
+    errors = [json.loads(chunk) for chunk in chunks if isinstance(chunk, str) and chunk.startswith('{"error":')]
+    assert errors
+    assert errors[-1]["error"]["type"] == "guardrails_violation"
+    assert errors[-1]["error"]["code"] == "content_blocked"
+
+
+def assert_llm_call_usage(llm_call: Any, expected: RecordedChatResponse) -> None:
+    assert expected.usage
+    assert llm_call.prompt_tokens == expected.usage["input_tokens"]
+    assert llm_call.completion_tokens == expected.usage["output_tokens"]
+    assert llm_call.total_tokens == expected.usage["total_tokens"]
+
+
+def assert_runtime_model_matches(llm_call: Any, *, configured_model: str, recorded_model: str | None) -> None:
+    assert llm_call.llm_model_name
+    assert llm_call.llm_model_name in {configured_model, recorded_model}
+    assert llm_call.task
+
+
+def assert_request_payload(
+    cassette_path: Path,
+    *,
+    model: str,
+    stream: bool | None = None,
+    expected_params: dict[str, Any] | None = None,
+    absent_params: set[str] | None = None,
+) -> None:
+    """Assert that the last recorded request for ``model`` in the cassette matches.
+
+    Optionally checks the ``stream`` flag, that specific ``expected_params`` were
+    sent, and that none of ``absent_params`` appear in the payload.
+    """
+    payloads = cassette_request_jsons(cassette_path)
+    matches = [payload for payload in payloads if payload.get("model") == model]
+    assert matches, f"{cassette_path} does not contain a request for {model}"
+    payload = matches[-1]
+    if stream is not None:
+        assert payload.get("stream") is stream
+    if expected_params:
+        for key, value in expected_params.items():
+            assert payload.get(key) == value
+    if absent_params:
+        for key in absent_params:
+            assert key not in payload
diff --git a/tests/recorded/cassette.py b/tests/recorded/cassette.py
new file mode 100644
index 0000000000..80abd2a509
--- /dev/null
+++ b/tests/recorded/cassette.py
@@ -0,0 +1,373 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import json
+import unicodedata
+from copy import deepcopy
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+SMART_CHAR_MAP = {
+    "‘": "'",
+    "’": "'",
+    "“": '"',
+    "”": '"',
+    "‐": "-",
+    "‑": "-",
+    "‒": "-",
+    "–": "-",
+    "—": "--",
+    "…": "...",
+}
+SMART_CHAR_TRANS = str.maketrans(SMART_CHAR_MAP)
+
+
+def normalize_smart_chars(text: str) -> str:
+    """Map smart punctuation to stable ASCII forms for comparisons."""
+    return unicodedata.normalize("NFKC", text.translate(SMART_CHAR_TRANS))
+
+
+def normalize_body(value: Any) -> Any:
+    """Normalize JSON-like payloads for stable assertions and request matching."""
+    if isinstance(value, str):
+        return normalize_smart_chars(value)
+    if isinstance(value, dict):
+        return {key: normalize_body(nested) for key, nested in value.items()}
+    if isinstance(value, list):
+        return [normalize_body(item) for item in value]
+    return value
+
+
+@dataclass(frozen=True)
+class RecordedChatResponse:
+    """Parsed chat-completion response extracted from a cassette interaction.
+
+    ``usage`` is normalized across provider conventions (input/output/total tokens);
+    ``raw_usage`` keeps the original payload for assertions that need it.
+    """
+
+    content: str
+    usage: dict[str, int | None]
+    model: str | None
+    finish_reason: str | None
+    request_id: str | None
+    raw_usage: dict[str, Any] | None
+
+
+def decode_body_json(body: Any) -> Any:
+    if isinstance(body, bytes):
+        body = body.decode("utf-8")
+    if isinstance(body, str):
+        return json.loads(body)
+    if isinstance(body, dict):
+        if "parsed_body" in body:
+            return body["parsed_body"]
+        if "string" in body:
+            return decode_body_json(body["string"])
+        return body
+    return None
+
+
+def decode_body_text(body: Any) -> str:
+    if isinstance(body, bytes):
+        return body.decode("utf-8")
+    if isinstance(body, str):
+        return body
+    if isinstance(body, dict) and "string" in body:
+        return decode_body_text(body["string"])
+    return ""
+
+
+def _request_json(interaction: dict[str, Any]) -> Any:
+    request = interaction.get("request", {})
+    if "parsed_body" in request:
+        return request["parsed_body"]
+    return decode_body_json(request.get("body"))
+
+
+def _header_values(headers: dict[str, Any], name: str) -> list[str]:
+    for key, value in headers.items():
+        if key.lower() == name:
+            return value if isinstance(value, list) else [value]
+    return []
+
+
+def _is_sse_response(response: dict[str, Any]) -> bool:
+    content_types = [value.lower() for value in _header_values(response.get("headers", {}), "content-type")]
+    return any("text/event-stream" in value for value in content_types)
+
+
+def _json_body(value: Any) -> Any:
+    try:
+        return decode_body_json(value)
+    except (json.JSONDecodeError, UnicodeDecodeError, TypeError):
+        return None
+
+
+def _sse_body_payloads(text: str) -> list[Any] | None:
+    """Parse only lossless single-line ``data:`` SSE streams.
+
+    Returning ``None`` leaves the original body untouched, which avoids rewriting
+    event ids, comments, multi-line data blocks, or other SSE features this
+    serializer cannot round-trip exactly.
+    """
+    if not text.endswith("\n\n"):
+        return None
+
+    parts = text.split("\n\n")
+    if parts[-1] != "" or any(not event for event in parts[:-1]):
+        return None
+
+    payloads = []
+    for event in parts[:-1]:
+        lines = event.splitlines()
+        if len(lines) != 1:
+            return None
+
+        line = lines[0]
+        if not line.startswith("data: "):
+            return None
+
+        payload = line.removeprefix("data: ")
+        if payload == "[DONE]":
+            payloads.append("[DONE]")
+            continue
+        try:
+            payloads.append(json.loads(payload))
+        except json.JSONDecodeError:
+            return None
+    return payloads
+
+
+def _sse_payloads_body(payloads: list[Any]) -> str:
+    """Rehydrate parsed SSE payloads using the strict format accepted above."""
+    events = []
+    for payload in payloads:
+        if payload == "[DONE]":
+            events.append("data: [DONE]")
+        else:
+            events.append("data: " + json.dumps(payload, separators=(",", ":")))
+    return "\n\n".join(events) + "\n\n"
+
+
+def _json_body_text(data: Any) -> str:
+    return json.dumps(data, separators=(",", ":"))
+
+
+def cassette_with_parsed_bodies(cassette: dict[str, Any]) -> dict[str, Any]:
+    """Store JSON bodies as readable ``parsed_body`` blocks in committed cassettes.
+
+    SSE responses are converted only when they match the strict format that can
+    be rehydrated without changing stream semantics.
+    """
+    cassette = deepcopy(cassette)
+    for interaction in cassette.get("interactions") or []:
+        request = interaction.get("request", {})
+        request_body = request.get("body")
+        request_data = _json_body(request_body)
+        if request_data is not None:
+            request["parsed_body"] = request_data
+            request.pop("body", None)
+
+        response = interaction.get("response", {})
+        body = response.get("body")
+        if not isinstance(body, dict):
+            continue
+        body_text = decode_body_text(body)
+        if _is_sse_response(response) and body_text:
+            payloads = _sse_body_payloads(body_text)
+            if payloads is not None:
+                body["parsed_body"] = payloads
+                body.pop("string", None)
+            continue
+        response_data = _json_body(body)
+        if response_data is not None:
+            body["parsed_body"] = response_data
+            body.pop("string", None)
+    return cassette
+
+
+def cassette_with_rehydrated_bodies(cassette: dict[str, Any]) -> dict[str, Any]:
+    """Convert readable cassette bodies back to raw strings for VCR replay."""
+    cassette = deepcopy(cassette)
+    for interaction in cassette.get("interactions") or []:
+        request = interaction.get("request", {})
+        if "parsed_body" in request:
+            request["body"] = _json_body_text(request["parsed_body"])
+            request.pop("parsed_body", None)
+
+        response = interaction.get("response", {})
+        body = response.get("body")
+        if not isinstance(body, dict) or "parsed_body" not in body:
+            continue
+        parsed_body = body["parsed_body"]
+        if _is_sse_response(response) and isinstance(parsed_body, list):
+            body["string"] = _sse_payloads_body(parsed_body)
+        else:
+            body["string"] = _json_body_text(parsed_body)
+        body.pop("parsed_body", None)
+    return cassette
+
+
+@lru_cache(maxsize=None)
+def _cached_cassette_interactions(cassette_path: Path) -> list[dict[str, Any]]:
+    data = yaml.safe_load(cassette_path.read_text(encoding="utf-8"))
+    if not isinstance(data, dict):
+        return []
+    return data.get("interactions") or []
+
+
+def _cassette_interactions(cassette_path: Path) -> list[dict[str, Any]]:
+    return deepcopy(_cached_cassette_interactions(cassette_path))
+
+
+def cassette_request_jsons(cassette_path: Path) -> list[dict[str, Any]]:
+    """Decode every request body in the cassette as JSON; skip non-JSON bodies."""
+    bodies = []
+    for interaction in _cassette_interactions(cassette_path):
+        payload = _request_json(interaction)
+        if isinstance(payload, dict):
+            bodies.append(payload)
+    return bodies
+
+
+def _normalize_usage(raw_usage: dict[str, Any] | None) -> dict[str, int | None]:
+    if raw_usage is None:
+        return {}
+    input_tokens = raw_usage.get("input_tokens", raw_usage.get("prompt_tokens"))
+    output_tokens = raw_usage.get("output_tokens", raw_usage.get("completion_tokens"))
+    total_tokens = raw_usage.get("total_tokens")
+    if total_tokens is None and input_tokens is not None and output_tokens is not None:
+        total_tokens = input_tokens + output_tokens
+    usage: dict[str, int | None] = {
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+        "total_tokens": total_tokens,
+    }
+    prompt_details = raw_usage.get("prompt_tokens_details") or {}
+    completion_details = raw_usage.get("completion_tokens_details") or {}
+    cached_tokens = raw_usage.get("cached_tokens", prompt_details.get("cached_tokens"))
+    reasoning_tokens = raw_usage.get("reasoning_tokens", completion_details.get("reasoning_tokens"))
+    if cached_tokens is not None:
+        usage["cached_tokens"] = cached_tokens
+    if reasoning_tokens is not None:
+        usage["reasoning_tokens"] = reasoning_tokens
+    return usage
+
+
+def _stream_payloads(text: str) -> list[dict[str, Any]]:
+    payloads = []
+    for line in text.splitlines():
+        line = line.strip()
+        if not line.startswith("data: "):
+            continue
+        payload = line.removeprefix("data: ")
+        if payload == "[DONE]":
+            continue
+        try:
+            payload = json.loads(payload)
+        except json.JSONDecodeError:
+            continue
+        if isinstance(payload, dict):
+            payloads.append(payload)
+    return payloads
+
+
+def stream_payloads_from_body(body: Any) -> list[dict[str, Any]]:
+    """Return parsed streaming payloads from either readable or raw cassette bodies."""
+    if isinstance(body, dict) and isinstance(body.get("parsed_body"), list):
+        return [payload for payload in body["parsed_body"] if isinstance(payload, dict)]
+    return _stream_payloads(decode_body_text(body))
+
+
+def _non_streaming_chat_response(interaction: dict[str, Any]) -> RecordedChatResponse | None:
+    body = interaction.get("response", {}).get("body")
+    payload = _json_body(body)
+    if not isinstance(payload, dict):
+        return None
+    choices = payload.get("choices") or []
+    choice = choices[0] if choices else {}
+    message = choice.get("message") or {}
+    raw_usage = payload.get("usage")
+    return RecordedChatResponse(
+        content=message.get("content") or "",
+        usage=_normalize_usage(raw_usage),
+        model=payload.get("model"),
+        finish_reason=choice.get("finish_reason"),
+        request_id=payload.get("id"),
+        raw_usage=raw_usage,
+    )
+
+
+def _streaming_chat_response(interaction: dict[str, Any]) -> RecordedChatResponse:
+    body = interaction.get("response", {}).get("body")
+    payloads = stream_payloads_from_body(body)
+    content_parts = []
+    raw_usage = None
+    model = None
+    finish_reason = None
+    request_id = None
+    for payload in payloads:
+        model = payload.get("model") or model
+        request_id = payload.get("id") or request_id
+        if payload.get("usage"):
+            raw_usage = payload["usage"]
+        for choice in payload.get("choices") or []:
+            delta = choice.get("delta") or {}
+            if isinstance(delta.get("content"), str):
+                content_parts.append(delta["content"])
+            finish_reason = choice.get("finish_reason") or finish_reason
+    return RecordedChatResponse(
+        content="".join(content_parts),
+        usage=_normalize_usage(raw_usage),
+        model=model,
+        finish_reason=finish_reason,
+        request_id=request_id,
+        raw_usage=raw_usage,
+    )
+
+
+def recorded_chat_response(
+    cassette_path: Path,
+    *,
+    request_model: str | None = None,
+    stream: bool = False,
+) -> RecordedChatResponse:
+    """Return the last recorded chat-completion in the cassette matching the filters.
+
+    Selects interactions whose request payload has the given ``request_model`` (if
+    set) and whose ``stream`` flag matches. Asserts at least one match exists.
+    """
+    matches = []
+    for interaction in _cassette_interactions(cassette_path):
+        request_payload = _request_json(interaction)
+        if not isinstance(request_payload, dict):
+            continue
+        if request_model and request_payload.get("model") != request_model:
+            continue
+        if (request_payload.get("stream") is True) != stream:
+            continue
+        parser = _streaming_chat_response if stream else _non_streaming_chat_response
+        response = parser(interaction)
+        if response is not None:
+            matches.append(response)
+    assert matches, f"{cassette_path} does not contain a {'streaming' if stream else 'non-streaming'} chat response"
+    return matches[-1]
diff --git a/tests/recorded/clients/__init__.py b/tests/recorded/clients/__init__.py
new file mode 100644
index 0000000000..5228fd1ba1
--- /dev/null
+++ b/tests/recorded/clients/__init__.py
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Recorded client tests."""
diff --git a/tests/recorded/clients/cassettes/test_openai_chat/test_openai_chat_generate_text.yaml b/tests/recorded/clients/cassettes/test_openai_chat/test_openai_chat_generate_text.yaml
new file mode 100644
index 0000000000..f8f9923cd3
--- /dev/null
+++ b/tests/recorded/clients/cassettes/test_openai_chat/test_openai_chat_generate_text.yaml
@@ -0,0 +1,71 @@
+version: 1
+interactions:
+- request:
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+    headers:
+      Host:
+      - api.openai.com
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      User-Agent:
+      - python-httpx/0.28.1
+      Content-Type:
+      - application/json
+    parsed_body:
+      model: gpt-4o-mini
+      messages:
+      - role: user
+        content: Say hello in one word
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Access-Control-Expose-Headers:
+      - CF-Ray
+      alt-svc:
+      - h3=":443"; ma=86400
+    body:
+      parsed_body:
+        id: '[RECORDED_RESPONSE_ID]'
+        object: chat.completion
+        created: 0
+        model: gpt-4o-mini-2024-07-18
+        choices:
+        - index: 0
+          message:
+            role: assistant
+            content: Hello!
+            refusal: null
+            annotations: []
+          logprobs: null
+          finish_reason: stop
+        usage:
+          prompt_tokens: 12
+          completion_tokens: 2
+          total_tokens: 14
+          prompt_tokens_details:
+            cached_tokens: 0
+            audio_tokens: 0
+          completion_tokens_details:
+            reasoning_tokens: 0
+            audio_tokens: 0
+            accepted_prediction_tokens: 0
+            rejected_prediction_tokens: 0
+        service_tier: default
+        system_fingerprint: '[RECORDED_SYSTEM_FINGERPRINT]'
diff --git a/tests/recorded/clients/cassettes/test_openai_embeddings/test_openai_embeddings_sync.yaml b/tests/recorded/clients/cassettes/test_openai_embeddings/test_openai_embeddings_sync.yaml
new file mode 100644
index 0000000000..2df8a73c17
--- /dev/null
+++ b/tests/recorded/clients/cassettes/test_openai_embeddings/test_openai_embeddings_sync.yaml
@@ -0,0 +1,53 @@
+version: 1
+interactions:
+- request:
+    method: POST
+    uri: https://api.openai.com/v1/embeddings
+    headers:
+      Host:
+      - api.openai.com
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Accept:
+      - application/json
+      Content-Type:
+      - application/json
+      User-Agent:
+      - OpenAI/Python 2.24.0
+    parsed_body:
+      input:
+      - test
+      model: text-embedding-3-small
+      encoding_format: base64
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Access-Control-Expose-Headers:
+      - CF-Ray
+      alt-svc:
+      - h3=":443"; ma=86400
+    body:
+      parsed_body:
+        object: list
+        data:
+        - object: embedding
+          embedding: AGAhvADgzDoAoIA8AKBgvQBA07sAAFS8AEAfPADAXbwA4Oo8AKAAPABAAj0AwNa7AOB+OwAgJjwAoGY8AAA3PQDgdL0AIB67AKBRvQBgFT0AIBQ9AIDHPADAIz0AoDK9AEANPQBAoLwAwGS8AAAuPACA/zwAQCq9AKBwPQCA7LwAQLa6AAAfvQAgYj0AIIg7AGDBPADgmjwAILu7AGCBuwDgKb0AIDi9AGA6PAAA3zwAwPU8AICZvADADL0AYB69AIBGPQAg5DwAIAK9AKA1uwBgRT0AwI89AGCePABgxDwAABC9AGCjPABglzwA4K47AAAZuwBAmrwAIB09AOC0OwAAA70AAHa9AMBHPADAAD0AwAE6AGCwPABgPD0AgEI8AMBRvACAiTwAQA48AMCjPACAMzsAYJU8AABvPABABLwAgKU8AIAzPACgoLwAAB48ACD/PABAUrwAYLm9AEDNOwCA4TsAYBW9AOA3vABga7sAwHo8AMBTPQDgvjwAAOG8AMAmuwBAk7wAgB87AGDuuwCAKTwAAB88AECuugDgkjwAALg8AMCYPAAAAr0AwMA8AGBNvABACD0A4L+7AOCovACgj7wAQPE8ACA3PQBgdzwA4OA9AMDTvQAgCz0AAJK9AEDgPAAg+jwAoEw8AAC7vACAAT0AYKy8AGDHuwAgG7wAoLu8AADTNwDgAb0AwKA7AGAHPQCAHTwAADG8AKAHvABAzrwAYJQ7ACBwvQBAKj0AIDk8AGDFPADg0DsAoAw6AOCEvABA57wAwIm8AAAlPQDgujwAwOY8AAA8vACAuTsAIAO8AMBuOwAgXL0AoBq9AICBvABAobwAAAC9AODmugBAxjwAAKO7AIA5PAAA0LwAING8AOAMPAAgAbwAQI+8AOBXvQBgnTwAQNo8AMB9PADgXzwAwEu8AACpvAAAxTwAgLe8AEApPAAgt7wA4AY7AICkvADgTz0AYNG8AECaPAAgeDwAAHK9AADGPABAsDsAAEQ7ACBivACAxrsAoBm8AMDuPACAHD0AQOI8AICevQBglzwAAP08AEBevADgJj0AwA+9ACBCuwCgJzwAoDu8AKB3PABAXzwA4MO8AKBAPQDgjrwAgMy8AACPPQBAsroAgPi6AGBBOwAAtLsAgPg7AGBQPQBATjwAABC8AGB6PQAgBbwA4CQ8AKCzPQBASTwAoCe8ACCrPQBgNzwAQGi8AECzOgBgZ70AADC9AOCSPACA+zsAoIm7AADDvADg1TwAIAm9AACIuwBgD70AQFO9AEAavQCgGzwAIPO8AIAgvQDAqbwAIFa8AADLPACgHbwAQNI8ACB4PADgqjsAQBm9AIAZvQAAjb0AAOA8ACAsvACA0DwA4CE7AECrPABABbwAQFG9AGBuuwAALr0AIEy9AGCHvQDAqjsA4O88ACBpvADgWjwAADa9ACDauwCAeDwAIJg8AKCrvADA/7wAICk9AOCkugDgmjwAgNQ8AEDbPADALb0AIF68ACCguwBg6bwAIDm8ACDGPABAMzwA4Du9AKDrOwBgeT0AIFK8AGAxvACgrjoA4AY9AICEPADguzwAQKY8AEAkvQDgBjoAQGq9AIABPQAg9zwA4Ks7AKDoPACAGbwAwDk9AMBhPADgArwAgG87AICIvAAgCTwAAKE8AOBQugCg9DsAwGw8AOBQvQBAWbwAgGo9AIATPQAgurwA4Ew8AEC5vACgCz0AoAQ9AOB+vABAL7sAQMg6AIDbvACAajwA4Ac9AIDDvQDg5LwA4Ge7ACANvACgED0AQAQ9AMAXPABAYz0AoL+7AAArPQAAib0AYDk8AGA5uwAgRbwAgDw9AAC5PAAAOT0AgMW8ACD8PAAgaTwAoDA9AAAuPAAAErsAALA8AACwPADAUL0AIIm8ACB1PABgBT0AgOe6AOBnPABAMD0AoDg9AAApPADgPz0AgKM8AEDavABAOrwAgG+8AAB2OgAgKjwAQA49AOA6PQDgjDwAQA69ACD6PABALz0AAI49ACC9PACgOz0AoOg8ACAePACAED0AADW8AEDnPAAAITsAQIC8AEDdPAAgabsAYKK8AGBSPQAgp7wAIIy8AKCcvQBggT0AYM08ACCbPADgoj0AIJU8ACAtuwCgtzwAQCG9AAArvQCAJT0AQEm7AMAVPAAABL0AAGO8AGAFPQBAKLsAQNe8ACAJPQCABj0AwAe7ACBOvADgubwAAPs8AAB2uwCAAj0AIDM8AIAHvQCgHL0AoD+8AEB9PABgt7wAgM06AIA4PQBAmzwAIBA9AOC8vACAFbsAIKO8AOASvQDAFz0AIAy9ACCFvADA7jsAQHg7AOCquwBgYDwAQMW8ACCouwBgeTwAILK8AKAqvQAAmDwAQA+9AAAtvQBgDTwAQKC8AKAtPACgEb0A4F28AGB1vQDggDwAoFy9AIBGPABAXLsA4IQ9AACUPADAFT0A4Dw9AKCAPADgRT0AwFA9AKDDuwAA1jwAAA49AMAuPQBAJL0A4A+8AECjvAAAPT0AwKQ7ACAQvQDAc7wAYMc8AABnuwDgOToAwKC9AEADvQDgB70AQP28AECEvADgRDwAgOo9AAAdvQDgnToAwJ+8AAB5vAAgCD0AQF86AMC+PAAgf7wAIMg7ACALPQCA4TwAwKQ8ACCFugDAADwAQJS9AKDDvADAEL0AQIk9ACDRPADApjwAYDK8AKCYvADATj0AYEi7AGCQvACgYjsAwPW5AEAauwDA/bwAQGC8ACCxvADA8TwAAAW8AEA7vAAAaLwAwKs8AODKvAAAjrwA4KS8AADCvADg97sA4NG9AABVvAAAfr0AgA69AEADvQBAlzwAoM+8AGAeuwBgm7sA4Ca8AKAyPABg8LwAoGu8AEDLugBgkjwAwIi7AABMuwAgFLwAoCY9AKBqOgCgrbwAoF68AOC3vACATDwAwC69AIAePQBAeTsAoEg9AEB7ugBgOzwAwIC8AGCQPADAD7wAAIM7AKDBOwBgnjwA4DK9AGBUvADgSzwAAJk7AGCnvAAAwzwAAGE7AOBGvACgHD0A4AG9AECgvAAAQbwAAP27AADetgBAuLwAANY7ACBbuwBgRzsAQEG8AACRvAAAsTwAILe8ACBCOwBAID0AgBu9AGAmvQBgn7wAYN48AGAUPQCguTwAYC68AMBuPQBguzsAQIg8AAAUvADg0bwAAG28AIDmOwBAr7wAIGY9AICgPACg7zsAYFY8AOD2vACAizsAYJ+8AMBFPACApjwAYDy7AGAnvABACb0AQAI8AOCAPACgoTwAQGa8AOB+PACgi7wAwI+9AKAWuwCgeTsAAIo8AAAIPABgujwAQCM8AEDPOwAg8DsAwF88AGAnvQAgLzwAwDg8AEARvQAgmrwAwES9AGDHuwBA9jwAQOq8AECLvABAFT0A4OM8ACB3PAAg6TsAgA09AMAwvAAApLsAQMe8AKAXvACg9ToAQO+8AMBHvADgqLwAYGS7AKAJPQCgKr0AoCK9AGAUvQCA3LsAQCW8AEDzPACAxjsA4Js8AID2vABggzoA4DI9ACDtvAAgkLwAYKq6AEBBvABAyLsAwPM7AMAUPQCgDb0AoCc9ACCDOQAAjrwAIDi9AEAtvQBg4DwAAEO7AEA9vADA9bwAAKm7AMCzvABgjzwAALe6AGCxOQAAXTwAAHu7ACDivAAgxDsAoAo8AOB8PAAAg7wAwDW8AABROwDgGLwAwIC8AACruwDAbjwAIGm7AGAuvABAI70A4Le8AMBwvAAAsDsAQBk8AAAkPAAgADwA4D+8AKClOABABL0AgOY8AOAnugCgcDwAQEG8AMCBvAAglDwAoKA8AEC0PABAHLsAgEM6AGAgvADgWTwA4KW8AECuvADgJjwAIOu8ACALvQCglbsAAF27ACCOOwCgZzoAQAE9AKA7vABgw7wAQIM6AMCuPABAUr0AIAg7AIAcPADgXjwAYGC8AKDyPADgrTwAIIK8ACDuOwCAITsAABG9AGA8OQBgIzwAwMG8AKCtPAAgFb0AIEK9AMB7vACA2LwAQIy7AGD8PABAjzwAAHY8ACAJPQAAkDwAIBG8AADXvAAgeLwAQA09AECoPAAgszwAgJ48AIA3vABgsTwAYHq8AKAJPABgMT0AADS9AICPPABAeDwAwKI8AMCVvABAV7wAoPC7AKDgOgDgEbwAYMU8AMA8PACgsTwAgI+8AAB0vACADLsAAIA8AGCqvACg6DwAgBk9AOALvQAgF7wAoN+8ACDKPAAgFr0AgFK8ACArvACAkzwA4KM6AGCGPACgx7wAQGE8AMAfPQBgprwAIBi9AAC7vAAA1DwAAJC7AKD1PABAILsAYLW8AGA8vADg6zoAwL47AECAvADAbjsAoAu9AODmPADgIL0A4FC8AABsvAAAaDwAgBY8AEANvAAAyzwAAJk7AICmvACgVjsA4Cu9AKCzPABg2DwAIAA8ACByvAAAibsAwJm8AODhPAAgrjwAoJy6AKCVvADA/TwAwJ46ACCAugCgujoAoNO8AMCEugAgyTwAAJK7AODvPACgCj0AIEY7ACASvADgXLsAwBe8AADtugDg37wAQBq9AIDDPACAhbwAAGe8ACAsPQDgWzwAwOW7AGCuPADAozwA4N87AACvOwAgsLwAYJy8AGDHvABg/DoAwBU8ACBTPADAVjsAYOA8AOAhPQDgID0AoAM7AABKPAAgFD0AwGQ8AOAvPQDAI70AwM08AMAgPAAgujwAwAw9AECBPACAqbwAAJa9AKCtvAAAvjsAgC89AAApvADg2jgAoMI7AGCmvADg4boAwFg9AMCYPACg9LwAAOq7AGAPvQBg1TsAYI68AADEPABgfLwAIA69AKChPADAJDwAYG48AKCsPACAl7wAgJU8AIBLPQDAbrwAQBO8AIAsvQCgojwAIJu8AICQPACgMrwAAA09AEASPABgSz0AoAq9AEDvvACg2jwAQN68AICOOwBA/TwAgFs9ACBkPACAhjsAANq6AOCMOwAgibwA4L+8AGAqvQCg9zwAwJi8AOA6PABAWDsAABo9AKDEuwAgPrwAACw8AGA/vQBg6TwAwJW8AICFvAAgU7sAIDs6AGB3vADALj0AoLI8AMCmOgDgGTsAwHM8AADsvADg3TwAQAQ9AMA0PABgrLsAQB27AIDHPAAgcrwAwFg8AGAGPQCAlDwA4Lk8ACCyvABA/TwAwH08AGCMPAAgh7sA4O87ACCUPQCAJjwAQN88AAB0PAAgJD0A4Oo8AKBYuwBA07sAIB08AIC7PABAfbwAIOe6AAAQPQAAVToAYBc8ACC6vACAg70AgA27AMCMPACgEDwAoMc6ACBzvADgIT0AQMW8AIAOuwDAl7sAwHM8ACCDuABg9DwAQAk9ACAnPADg8rwAYCy9AGC+vAAAYLwAwIy6AGAsvQBAXrwAAOy6AIBeuwCgELwAwA88AGBhPABggjwAQIM7AAB5OwAAKrwAwLI8ACDhvABgGTwAwIk8AECOPAAgHT0A4Kk7AMB4uwDgqLsAQAk8AECJvADgCDsAYK88ACAbPQAg0TwA4AU9AGAKOwAghTwAgPM8AMDgPADgMjwAoLY7ACDXuwAAYbwAANa8AKBOvACg67sAwLW7AADtPADgTjwA4EE9AIDeuwCg47oAwOy8AECYvABg97wAgBu7AOC5PABgz7wAACA8AKB1vABgGT0AAJk8AEDdvACgFz0AIPM8AODAvAAgvboAIHO8ACAtuwDgybwAoGI8AIALvADAuTsAgJ27ACAbvACgQLwAQMU8ACB6OwAgWLwAgH88AGB8PAAgCb0AIGI7AAAKvADAEr0AYKS7ACDZPADAYzwAACc9AEACPQBAaLwAQPa8AOCXuwBAAjsAwNS8AKALvQDAl7sAoHI7AAB9vABAxDsA4Js8AAASvAAgwbwAYOA8AGDuPACAv7wAwBe9AKBgPABA1rwAQMc6AIBevABgtLsAICO9AEBUPQBAs7kAQAG8AIBgPQBgirwAwFY8AKAYvQCgtDwAwNi7AMDsPABgEj0AQKq8AMBEvQBA8zoAwK87AAArvQDANj0A4C68AMDSuwBgJbwAgIo8AGDDPADgsbwAgDi8AODruwAABz4AgOk6AMBfOgAgXLsAwJc8AEAgvAAgtLsA4La8ACCRuwDAizwAgNG8AAD+OwCgEbwAwF88ACBrvAAgzzsA4Ba7AIBuvQBAxjsAYEC9ACCSvACAtzwA4LA8AECWPABAvzsAoAu9ACA4vAAAFL0A4AI9AMCsPABgDTkAYMS7AEDxvACABTwAIEa8AMAHvQAgIrwAgCE9AIDsuwCglrwAoLM7AOBUPADgDTwAIB68ACDyPABgEzwA4DI7AMDxOgDAbjwAgNw8AIC3vACAtrkAwAo6AMAKvACAZTwAoFi9AEBIPABAtrwAYA49AECsvACgmLsAIJa8AMD7PADA+DsAIGm8AMDJvABg/7sAIKu7ACBUPADgiLwAgGm5AGCJOgCAD7wAgAa9ACB8vADA/bwAgDI9AIBzPAAg8rwAIBq8AOCmPACAnDwAYAE9AOCauwAg5LsAwF87ACD1uwDABr0AYIy8AOCsvADgYTwAIC87AKCvOwBgBz0AgHM7AIAbOgBAILwAAMY7AAByvABAK70AYPo8AKC7uwBgZ7wAIDI9AMBMPABgqLwAwP28AGC1vABghzwAgCw8AOCTPACgj7sA4GU8AICzvAAAXDwAYKa7AEBBvQCgkzwAQFm8AICsvADgEbwAQJM8AEAuugDgqjwAAB+8ACC8uwAAUrwAgJA8AGBuuwBgMToAoBw8AKBIvQBAxDsA4GC8AGCVPAAggLsAQJ48AKA2vQDg9zwAoK08AIDaPAAAAzwAoLO8AKD8uwBgsTsAQB68AICsOwCAxTsAgHO7AODmvADgKL0AYDI7AABRvAAAmrsAAIg8AOCAuwAAKbwAQG+7ACAkvADAwTsAAG28AMDwugCA4jsAIDc9AAC6PABALrwAoOe7ACDYPABghTwA4Di6AECOvABgMr0AYCg7AODNuwCgwjwAwGS8AKAovABg6zsAQIA6AIAquwBAUDwAYKa7ACCpvADg+zsAgLi4AMA5PAAAhDsAQD+8AACqvADAHL0AgP88AGBTvACgVT0AIPc7AGAtvACAmDwAAAK9AEAivQCgI7wA4DG8AICsOwBAlrwA4OK7AADvPAAg4DsAwOc7ACD4vAAgRzwAQGe8AKDYOwCgWzwAIKW7AOBPvACAHzwAwLY8AAC+OwCgoDwAwKY7AAABvQCgALsAYBw8AGCGOwAg0TwAoKe8AEBvugDAVLwAoAk9AAAkvQCABDsAIDq9AMCHOwCgCT0AYIs6AAASvQAgvrwAIEY7AMB4vADggDsAQMC6AKAMPADgfrwA4Lk7AKAGvQAg7TwAwOU8AAC1uwAAQzwAYBU8AIDiOwCgFbwAYEM8AEDduwCAwrwAIPU7AKA6OwCgJrwAgBY9AGAEvQCg1rsAYKa8AKDcvABAqrsAQF47AGDROgAgQz0AAEA8AEB4PADg9zsAIOi8ACAJuwAA/rwAQMO7AGBpOwBAyjwAIHc8ACCkPACgkjwAIIk7AKCAPADgAL0AIPA7AEAJvQAgAboAoD28AEAavABggjwAIJ07AOCmPAAA8rsAYNc7AMB2vACgt7sAYNs7AECePADgRjwAoM07AECtOwDA+LwAQFa9AACFOwBgArsAoPm8AECEPACAH7wAAP27AEDivAAgGz0AgNm8ACDAOwCgDDwAwBi8AABLvABgzbsAQAS7AACGvADApLsAIIq7AMDpOwBAkLwAQA68AMAIvQDA0zwAYJm8AICwvACAlrsAYAU9AMB6vABAQ7sA4AU7AGA2uwCghbwAoIs8AADBvACgZ7sAYGg9AIAWOgDAhLwAICq8AMD4uwDA/bwAwLK8AIBCuwBAozoAAAg8AAD2OgDA8TwAwO47AIDxOgDAuDwAQK48AAAIvQAgqbwAgCg9AGAMvQDg+bwAIHO8AKASPQDAvzoAACK9AKCpOQAg3DwAgDo8AKDAPACgbbsAYEI7AAD+OwCAl7wAIAk8AKCruwAAk7oA4Gu9AMAPPQBAMDoAAAS9AAABPABgoDsAoDK8ACBcOwDgUr0AIF+8AADvuQCgwLwAABY9ACA/uwCgw7wAoMY7AGDPugAgt7wAIKO8
+          index: 0
+        model: text-embedding-3-small
+        usage:
+          prompt_tokens: 1
+          total_tokens: 1
diff --git a/tests/recorded/clients/test_openai_chat.py b/tests/recorded/clients/test_openai_chat.py
new file mode 100644
index 0000000000..a2fe07934f
--- /dev/null
+++ b/tests/recorded/clients/test_openai_chat.py
@@ -0,0 +1,65 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import httpx
+import pytest
+
+from nemoguardrails.llm.clients.openai_compatible import OpenAICompatibleClient
+from nemoguardrails.llm.models.openai_chat import OpenAIChatModel
+from tests.recorded.snapshots import snapshot
+
+pytestmark = [pytest.mark.recorded, pytest.mark.vcr, pytest.mark.asyncio]
+
+
+async def test_openai_chat_generate_text(openai_api_key):
+    async with httpx.AsyncClient() as http_client:
+        client = OpenAICompatibleClient(
+            base_url="https://api.openai.com/v1",
+            api_key=openai_api_key,
+            http_client=http_client,
+            max_retries=0,
+        )
+        model = OpenAIChatModel(client=client, model="gpt-4o-mini")
+
+        result = await model.generate_async("Say hello in one word")
+
+    assert result.usage is not None
+    assert {
+        "content": result.content,
+        "finish_reason": result.finish_reason,
+        "model": result.model,
+        "request_id": result.request_id,
+        "usage": {
+            "input_tokens": result.usage.input_tokens,
+            "output_tokens": result.usage.output_tokens,
+            "total_tokens": result.usage.total_tokens,
+            "cached_tokens": result.usage.cached_tokens,
+            "reasoning_tokens": result.usage.reasoning_tokens,
+        },
+    } == snapshot(
+        {
+            "content": "Hello!",
+            "finish_reason": "stop",
+            "model": "gpt-4o-mini-2024-07-18",
+            "request_id": "[RECORDED_RESPONSE_ID]",
+            "usage": {
+                "input_tokens": 12,
+                "output_tokens": 2,
+                "total_tokens": 14,
+                "cached_tokens": 0,
+                "reasoning_tokens": 0,
+            },
+        }
+    )
diff --git a/tests/recorded/clients/test_openai_embeddings.py b/tests/recorded/clients/test_openai_embeddings.py
new file mode 100644
index 0000000000..67b078d675
--- /dev/null
+++ b/tests/recorded/clients/test_openai_embeddings.py
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel
+
+pytestmark = [pytest.mark.recorded, pytest.mark.vcr]
+
+
+def test_openai_embeddings_sync(openai_api_key):
+    model = OpenAIEmbeddingModel("text-embedding-3-small", api_key=openai_api_key, max_retries=0)
+
+    result = model.encode(["test"])
+
+    assert len(result) == 1
+    assert len(result[0]) == 1536
diff --git a/tests/recorded/conftest.py b/tests/recorded/conftest.py
new file mode 100644
index 0000000000..11d32001e7
--- /dev/null
+++ b/tests/recorded/conftest.py
@@ -0,0 +1,438 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+from collections.abc import AsyncIterator, Callable
+from pathlib import Path
+from typing import Any, Dict, List
+
+import pytest
+import pytest_asyncio
+import yaml
+from vcr.util import read_body
+
+from tests.recorded.cassette import cassette_with_parsed_bodies, cassette_with_rehydrated_bodies, normalize_body
+from tests.recorded.sanitization import (
+    ALLOWED_HEADERS,
+    FILTERED_HEADER_PREFIXES,
+    FILTERED_HEADERS,
+    FILTERED_QUERY_PARAMETERS,
+    JSON_SECRET_KEYS,
+    NULLABLE_VOLATILE_RESPONSE_JSON_FIELDS,
+    SECRET_PATTERNS,
+    VOLATILE_RESPONSE_HEADERS,
+    VOLATILE_RESPONSE_JSON_FIELDS,
+    VOLATILE_RESPONSE_METADATA_FIELDS,
+)
+from tests.recorded.utils import (
+    DUMMY_NVIDIA_API_KEY,
+    DUMMY_OPENAI_API_KEY,
+    set_api_key_for_record_mode,
+)
+
+DUMMY_SERVICE_API_KEY = "recorded-replay"
+_NON_JSON_BODY = object()
+
+
+class _ReadableCassetteDumper(yaml.SafeDumper):
+    pass
+
+
+def _represent_readable_string(dumper: yaml.SafeDumper, data: str) -> yaml.nodes.ScalarNode:
+    if "\n" in data:
+        return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+    return dumper.represent_scalar("tag:yaml.org,2002:str", data)
+
+
+_ReadableCassetteDumper.add_representer(str, _represent_readable_string)
+
+
+class ReadableYamlSerializer:
+    @staticmethod
+    def deserialize(cassette_string: str) -> Any:
+        """Restore readable cassette bodies to the raw shape VCR expects."""
+        return cassette_with_rehydrated_bodies(yaml.safe_load(cassette_string))
+
+    @staticmethod
+    def serialize(cassette_dict: dict[str, Any]) -> str:
+        """Write cassettes with parsed JSON bodies and stable YAML formatting."""
+        return yaml.dump(
+            cassette_with_parsed_bodies(cassette_dict),
+            Dumper=_ReadableCassetteDumper,
+            allow_unicode=True,
+            sort_keys=False,
+            width=120,
+        )
+
+
+def _replace_case_insensitive(headers: Dict[str, Any], header_names: set[str], value: Any = None) -> None:
+    for name in list(headers):
+        if name.lower() in header_names:
+            if value is None:
+                del headers[name]
+            else:
+                headers[name] = value
+
+
+def _filter_headers_by_prefix(headers: Dict[str, Any]) -> None:
+    for name in list(headers):
+        lowered = name.lower()
+        if lowered in ALLOWED_HEADERS:
+            continue
+        if any(lowered.startswith(prefix) for prefix in FILTERED_HEADER_PREFIXES):
+            del headers[name]
+
+
+def _scrub_text(value: str) -> str:
+    for pattern, replacement in SECRET_PATTERNS:
+        value = pattern.sub(replacement, value)
+    return value
+
+
+def _normalize_response_metadata(key: str, value: Any) -> Any:
+    if key in VOLATILE_RESPONSE_JSON_FIELDS:
+        return VOLATILE_RESPONSE_JSON_FIELDS[key]
+    if key in NULLABLE_VOLATILE_RESPONSE_JSON_FIELDS and value is not None:
+        return NULLABLE_VOLATILE_RESPONSE_JSON_FIELDS[key]
+    return value
+
+
+def _scrub_json(value: Any, *, normalize_response_metadata: bool = False) -> Any:
+    if isinstance(value, dict):
+        scrubbed = {}
+        for key, nested in value.items():
+            if normalize_response_metadata and key in VOLATILE_RESPONSE_METADATA_FIELDS:
+                scrubbed[key] = _normalize_response_metadata(key, nested)
+            elif key.lower() in JSON_SECRET_KEYS:
+                scrubbed[key] = "[REDACTED]"
+            else:
+                scrubbed[key] = _scrub_json(
+                    nested,
+                    normalize_response_metadata=False,
+                )
+        return scrubbed
+    if isinstance(value, list):
+        return [
+            _scrub_json(
+                item,
+                normalize_response_metadata=normalize_response_metadata,
+            )
+            for item in value
+        ]
+    if isinstance(value, str):
+        return _scrub_text(value)
+    return value
+
+
+def _decode_json_body(body: Any) -> Any:
+    if body is None:
+        return None
+    if isinstance(body, (dict, list)):
+        return body
+    if isinstance(body, bytes):
+        body = body.decode("utf-8")
+    if isinstance(body, str):
+        return json.loads(body)
+    return None
+
+
+def _decode_match_body_json(body: Any) -> Any:
+    try:
+        return _decode_json_body(body)
+    except (UnicodeDecodeError, json.JSONDecodeError, TypeError):
+        return _NON_JSON_BODY
+
+
+def _normalize_raw_match_body(body: Any) -> Any:
+    if isinstance(body, bytearray):
+        return bytes(body)
+    return body
+
+
+def recorded_body_matcher(request_1: Any, request_2: Any) -> None:
+    """Compare recorded and replay requests after applying the same scrubbing rules.
+
+    This keeps replay strict about semantic request changes while ignoring
+    redacted secrets and normalization that are intentionally applied at record
+    time.
+    """
+    body_1 = read_body(request_1)
+    body_2 = read_body(request_2)
+    json_body_1 = _decode_match_body_json(body_1)
+    json_body_2 = _decode_match_body_json(body_2)
+
+    if json_body_1 is not _NON_JSON_BODY and json_body_2 is not _NON_JSON_BODY:
+        matched_1 = normalize_body(_scrub_request_json(json_body_1))
+        matched_2 = normalize_body(_scrub_request_json(json_body_2))
+        if matched_1 != matched_2:
+            raise AssertionError(
+                "Recorded request JSON body does not match replay request after scrubbing and normalization:\n"
+                f"recorded={matched_1!r}\n"
+                f"replay={matched_2!r}"
+            )
+        return
+
+    matched_1 = _normalize_raw_match_body(body_1)
+    matched_2 = _normalize_raw_match_body(body_2)
+    if matched_1 != matched_2:
+        raise AssertionError(
+            f"Recorded raw request body does not match replay request:\nrecorded={matched_1!r}\nreplay={matched_2!r}"
+        )
+
+
+def _encode_body_like(original_body: Any, data: Any) -> Any:
+    body = json.dumps(data, indent=2)
+    if isinstance(original_body, bytes):
+        return body.encode("utf-8")
+    if isinstance(original_body, str):
+        return body
+    return data
+
+
+def _body_to_text(body: Any) -> str:
+    if isinstance(body, bytes):
+        return body.decode("utf-8")
+    return body if isinstance(body, str) else ""
+
+
+def _encode_text_like(original_body: Any, text: str) -> Any:
+    return text.encode("utf-8") if isinstance(original_body, bytes) else text
+
+
+def _scrub_raw_body(body: Any) -> Any:
+    try:
+        text = _body_to_text(body)
+    except UnicodeDecodeError:
+        return body
+    if not text:
+        return body
+    scrubbed = _scrub_text(text)
+    if scrubbed == text:
+        return body
+    return _encode_text_like(body, scrubbed)
+
+
+def _header_values(headers: dict[str, Any], name: str) -> list[str]:
+    for key, value in headers.items():
+        if key.lower() == name:
+            return value if isinstance(value, list) else [value]
+    return []
+
+
+def _scrub_request_json(data: Any) -> Any:
+    return _scrub_json(data)
+
+
+def _scrub_response_json(data: Any) -> Any:
+    scrubbed = _scrub_json(
+        data,
+        normalize_response_metadata=True,
+    )
+    if isinstance(scrubbed, dict) and {"jailbreak", "score"} <= set(scrubbed):
+        scrubbed["score"] = 0.0
+    return scrubbed
+
+
+def _scrub_sse_body(body: Any) -> Any:
+    text = _body_to_text(body)
+    if not text:
+        return body
+
+    events = []
+    for event in text.split("\n\n"):
+        if not event:
+            continue
+        lines = []
+        for line in event.splitlines():
+            if line.startswith("data: ") and not line.startswith("data: [DONE]"):
+                try:
+                    data = json.loads(line.removeprefix("data: "))
+                except json.JSONDecodeError:
+                    lines.append(line)
+                    continue
+                data = _scrub_response_json(data)
+                lines.append("data: " + json.dumps(data, separators=(",", ":")))
+            else:
+                lines.append(line)
+        events.append("\n".join(lines))
+
+    return _encode_text_like(body, "\n\n".join(events) + "\n\n")
+
+
+def before_record_request(request: Any) -> Any:
+    """Redact request headers and bodies before VCR writes a cassette."""
+    _replace_case_insensitive(request.headers, FILTERED_HEADERS)
+    _filter_headers_by_prefix(request.headers)
+
+    try:
+        data = _decode_json_body(request.body)
+    except (UnicodeDecodeError, json.JSONDecodeError, TypeError):
+        request.body = _scrub_raw_body(request.body)
+        return request
+
+    if data is not None:
+        request.body = _encode_body_like(request.body, _scrub_request_json(data))
+    return request
+
+
+def before_record_response(response: Dict[str, Any]) -> Dict[str, Any]:
+    """Redact volatile response headers and bodies before VCR writes a cassette."""
+    headers = response.get("headers", {})
+    _replace_case_insensitive(headers, FILTERED_HEADERS | VOLATILE_RESPONSE_HEADERS)
+    _filter_headers_by_prefix(headers)
+
+    body_container = response.get("body")
+    if not isinstance(body_container, dict):
+        return response
+
+    body = body_container.get("string")
+    if body is None:
+        return response
+
+    content_types = [value.lower() for value in _header_values(headers, "content-type")]
+    if any("text/event-stream" in value for value in content_types):
+        body_container["string"] = _scrub_sse_body(body)
+        return response
+
+    try:
+        data = _decode_json_body(body)
+    except (UnicodeDecodeError, json.JSONDecodeError, TypeError):
+        body_container["string"] = _scrub_raw_body(body)
+        return response
+
+    scrubbed = _scrub_response_json(data)
+    body_container["string"] = _encode_body_like(body, scrubbed)
+    return response
+
+
+def pytest_recording_configure(config: pytest.Config, vcr: Any) -> None:
+    vcr.register_serializer("yaml", ReadableYamlSerializer)
+    vcr.register_matcher("recorded_body", recorded_body_matcher)
+
+
+@pytest.fixture(scope="module")
+def vcr_cassette_dir(request: pytest.FixtureRequest) -> str:
+    module = request.node.path
+    return str(module.parent / "cassettes" / module.stem)
+
+
+@pytest.fixture
+def recorded_cassette_path(vcr_cassette_dir: str, default_cassette_name: str) -> Path:
+    return Path(vcr_cassette_dir) / f"{default_cassette_name}.yaml"
+
+
+def build_vcr_config() -> Dict[str, Any]:
+    """Build the shared VCR config used by all recorded tests."""
+    return {
+        "decode_compressed_response": True,
+        "filter_headers": [(name, None) for name in FILTERED_HEADERS],
+        "filter_query_parameters": [(name, None) for name in FILTERED_QUERY_PARAMETERS],
+        "before_record_request": before_record_request,
+        "before_record_response": before_record_response,
+        "match_on": ["method", "scheme", "host", "port", "path", "query", "recorded_body"],
+    }
+
+
+_VCR_CONFIG = build_vcr_config()
+
+
+@pytest.fixture(scope="session")
+def vcr_config() -> Dict[str, Any]:
+    return _VCR_CONFIG
+
+
+@pytest_asyncio.fixture(autouse=True)
+async def close_owned_http_clients(monkeypatch: pytest.MonkeyPatch) -> AsyncIterator[None]:
+    from nemoguardrails.llm.clients import base
+
+    tracked: List[Any] = []
+    original_init = base.BaseClient.__init__
+
+    def tracking_init(self: Any, *args: Any, **kwargs: Any) -> None:
+        original_init(self, *args, **kwargs)
+        if getattr(self, "_owns_client", False):
+            tracked.append(self)
+
+    monkeypatch.setattr(base.BaseClient, "__init__", tracking_init)
+
+    yield
+
+    leaked = [client for client in tracked if client._owns_client and not client._client.is_closed]
+    for client in leaked:
+        await client._client.aclose()
+
+
+_PROXY_ENV_VARS = (
+    "HTTP_PROXY",
+    "http_proxy",
+    "HTTPS_PROXY",
+    "https_proxy",
+    "ALL_PROXY",
+    "all_proxy",
+    "FTP_PROXY",
+    "ftp_proxy",
+    "NO_PROXY",
+    "no_proxy",
+)
+
+
+@pytest.fixture(autouse=True)
+def strip_proxy_env_during_replay(monkeypatch: pytest.MonkeyPatch, record_mode: str) -> None:
+    """Make replay independent of the ambient proxy configuration.
+
+    Under ``--block-network`` a proxy is useless, and a SOCKS proxy is fatal:
+    httpx raises ``ImportError`` when ``socksio`` is not installed, turning a
+    cassette hit into an error that depends only on the developer or CI shell.
+    Strip proxy variables during replay so a hit is deterministic everywhere.
+    Recording keeps the ambient proxy so real provider calls can still egress.
+    """
+    if record_mode == "none":
+        for name in _PROXY_ENV_VARS:
+            monkeypatch.delenv(name, raising=False)
+
+
+@pytest.fixture
+def openai_api_key(monkeypatch: pytest.MonkeyPatch, record_mode: str) -> str:
+    return set_api_key_for_record_mode(monkeypatch, "OPENAI_API_KEY", DUMMY_OPENAI_API_KEY, record_mode)
+
+
+@pytest.fixture
+def nvidia_api_key(monkeypatch: pytest.MonkeyPatch, record_mode: str) -> str:
+    return set_api_key_for_record_mode(monkeypatch, "NVIDIA_API_KEY", DUMMY_NVIDIA_API_KEY, record_mode)
+
+
+@pytest.fixture
+def service_api_key(monkeypatch: pytest.MonkeyPatch, record_mode: str) -> Callable[[str], str]:
+    def set_service_api_key(env_name: str) -> str:
+        return set_api_key_for_record_mode(monkeypatch, env_name, DUMMY_SERVICE_API_KEY, record_mode)
+
+    return set_service_api_key
+
+
+_PROVIDER_KEY_FIXTURES = {"openai": "openai_api_key", "nim": "nvidia_api_key"}
+
+
+def _provider_key_fixture_name(provider: str) -> str:
+    fixture_name = _PROVIDER_KEY_FIXTURES.get(provider)
+    if fixture_name is not None:
+        return fixture_name
+
+    supported = ", ".join(sorted(_PROVIDER_KEY_FIXTURES))
+    raise ValueError(f"Unknown recorded provider {provider!r}; expected one of: {supported}")
+
+
+def provider_key(request: pytest.FixtureRequest, provider: str) -> None:
+    """Activate the API-key fixture for one LLM provider (``openai`` or ``nim``)."""
+    request.getfixturevalue(_provider_key_fixture_name(provider))
diff --git a/tests/recorded/fake_cassettes.py b/tests/recorded/fake_cassettes.py
new file mode 100644
index 0000000000..f6b6ee9f39
--- /dev/null
+++ b/tests/recorded/fake_cassettes.py
@@ -0,0 +1,50 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+REQUIRED_FAKE_CASSETTE_FIELDS = {"reason", "frozen_fields", "fake_llm_model_considered"}
+
+
+def fake_cassette_header(path: Path) -> dict[str, Any]:
+    lines = []
+    seen_header = False
+    for line in path.read_text(encoding="utf-8").splitlines():
+        if not seen_header and not line.strip():
+            continue
+        if not line.startswith("#"):
+            break
+        seen_header = True
+        content = line.removeprefix("#")
+        if content.startswith(" "):
+            content = content[1:]
+        lines.append(content)
+    data = yaml.safe_load("\n".join(lines)) if lines else None
+    return data if isinstance(data, dict) else {}
+
+
+def validate_fake_cassette_metadata(path: Path) -> None:
+    metadata = fake_cassette_header(path).get("fake_cassette")
+    assert isinstance(metadata, dict), f"{path} is missing fake_cassette header metadata"
+    missing = REQUIRED_FAKE_CASSETTE_FIELDS - set(metadata)
+    assert not missing, f"{path} missing fake_cassette fields: {sorted(missing)}"
+    assert isinstance(metadata["frozen_fields"], list) and metadata["frozen_fields"]
+    assert isinstance(metadata["reason"], str) and metadata["reason"].strip()
+    assert isinstance(metadata["fake_llm_model_considered"], bool)
diff --git a/tests/recorded/inspect_cassette.py b/tests/recorded/inspect_cassette.py
new file mode 100644
index 0000000000..5529be3f99
--- /dev/null
+++ b/tests/recorded/inspect_cassette.py
@@ -0,0 +1,85 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from tests.recorded.cassette import decode_body_json, decode_body_text, stream_payloads_from_body
+
+
+def _request_payload(interaction: dict[str, Any]) -> dict[str, Any]:
+    request = interaction.get("request", {})
+    if isinstance(request.get("parsed_body"), dict):
+        return request["parsed_body"]
+    payload = decode_body_json(request.get("body"))
+    return payload if isinstance(payload, dict) else {}
+
+
+def _response_payload(interaction: dict[str, Any]) -> dict[str, Any]:
+    body = interaction.get("response", {}).get("body")
+    try:
+        payload = decode_body_json(body)
+    except (json.JSONDecodeError, UnicodeDecodeError, TypeError):
+        return {}
+    return payload if isinstance(payload, dict) else {}
+
+
+def _response_body_text(interaction: dict[str, Any]) -> str | None:
+    body = interaction.get("response", {}).get("body")
+    text = decode_body_text(body)
+    return text or None
+
+
+def cassette_summary(path: Path) -> list[dict[str, Any]]:
+    data = yaml.safe_load(path.read_text(encoding="utf-8"))
+    rows = []
+    interactions = data.get("interactions", []) if isinstance(data, dict) else []
+    for index, interaction in enumerate(interactions):
+        request = interaction.get("request", {})
+        response = interaction.get("response", {})
+        request_payload = _request_payload(interaction)
+        response_payload = _response_payload(interaction)
+        stream_payloads = stream_payloads_from_body(response.get("body"))
+        rows.append(
+            {
+                "index": index,
+                "method": request.get("method"),
+                "uri": request.get("uri"),
+                "status": response.get("status", {}).get("code"),
+                "model": request_payload.get("model"),
+                "stream": request_payload.get("stream", False),
+                "response_model": response_payload.get("model") if response_payload else None,
+                "raw_response": _response_body_text(interaction) if not response_payload else None,
+                "stream_events": len(stream_payloads),
+            }
+        )
+    return rows
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("cassette", type=Path)
+    args = parser.parse_args()
+    print(json.dumps(cassette_summary(args.cassette), indent=2))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/recorded/normalization.py b/tests/recorded/normalization.py
new file mode 100644
index 0000000000..a224f3a0c2
--- /dev/null
+++ b/tests/recorded/normalization.py
@@ -0,0 +1,105 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from nemoguardrails.rails.llm.options import GenerationResponse, RailsResult
+from tests.recorded.cassette import normalize_body
+
+
+def normalize_rails_result(result: RailsResult) -> dict[str, Any]:
+    return normalize_body(
+        {
+            "status": result.status.value,
+            "rail": result.rail,
+            "content": result.content,
+        }
+    )
+
+
+def normalize_llm_calls(result: GenerationResponse) -> list[dict[str, Any]]:
+    if result.log is None or result.log.llm_calls is None:
+        return []
+    calls = []
+    for call in result.log.llm_calls:
+        calls.append(
+            {
+                "task": getattr(call, "task", None),
+                "provider": call.llm_provider_name,
+                "model": call.llm_model_name,
+                "completion": call.completion,
+                "prompt_tokens": getattr(call, "prompt_tokens", None),
+                "completion_tokens": getattr(call, "completion_tokens", None),
+                "total_tokens": getattr(call, "total_tokens", None),
+            }
+        )
+    return normalize_body(calls)
+
+
+def normalize_generation_response(result: GenerationResponse) -> dict[str, Any]:
+    activated_rails = []
+    if result.log is not None:
+        activated_rails = [
+            {
+                "type": rail.type,
+                "name": rail.name,
+                "decisions": rail.decisions,
+                "stop": rail.stop,
+            }
+            for rail in result.log.activated_rails
+        ]
+    return normalize_body(
+        {
+            "response": result.response,
+            "activated_rails": activated_rails,
+            "llm_calls": normalize_llm_calls(result),
+        }
+    )
+
+
+def normalize_stream_chunks(chunks: list[Any]) -> dict[str, Any]:
+    content_parts = []
+    errors = []
+    normalized_chunks = []
+    for chunk in chunks:
+        if isinstance(chunk, str):
+            if chunk.startswith('{"error":'):
+                errors.append(json.loads(chunk))
+            else:
+                content_parts.append(chunk)
+            normalized_chunks.append(chunk)
+        elif isinstance(chunk, dict):
+            text_value = chunk.get("text")
+            content_value = chunk.get("content")
+            text = (
+                text_value if isinstance(text_value, str) else content_value if isinstance(content_value, str) else None
+            )
+            if isinstance(text, str):
+                content_parts.append(text)
+            metadata = chunk.get("metadata") or {}
+            normalized = {key: chunk[key] for key in ("text", "content") if key in chunk}
+            if metadata.get("usage"):
+                normalized["usage"] = metadata["usage"]
+            normalized_chunks.append(normalized)
+    return normalize_body(
+        {
+            "content": "".join(content_parts),
+            "chunks": normalized_chunks,
+            "errors": errors,
+        }
+    )
diff --git a/tests/recorded/rails/__init__.py b/tests/recorded/rails/__init__.py
new file mode 100644
index 0000000000..467079831e
--- /dev/null
+++ b/tests/recorded/rails/__init__.py
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/recorded/rails/conftest.py b/tests/recorded/rails/conftest.py
new file mode 100644
index 0000000000..ff54f5974d
--- /dev/null
+++ b/tests/recorded/rails/conftest.py
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+
+import pytest_asyncio
+
+from nemoguardrails.llm.frameworks import _areset_frameworks, get_default_framework, set_default_framework
+
+
+@pytest_asyncio.fixture(autouse=True)
+async def default_llm_framework() -> AsyncIterator[None]:
+    previous = get_default_framework()
+    set_default_framework("default")
+    try:
+        yield
+    finally:
+        await _areset_frameworks()
+        set_default_framework(previous)
diff --git a/tests/recorded/rails/helpers.py b/tests/recorded/rails/helpers.py
new file mode 100644
index 0000000000..9f67ab556d
--- /dev/null
+++ b/tests/recorded/rails/helpers.py
@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+from nemoguardrails import LLMRails
+from tests.recorded.rails_config import RailsConfigSource, enable_streaming, load_config
+
+
+async def async_chunks(values: list[str]) -> AsyncIterator[str]:
+    for value in values:
+        yield value
+
+
+def build_rails(
+    source: RailsConfigSource,
+    *,
+    llm: Any = None,
+    streaming: bool = False,
+    verbose: bool = False,
+) -> LLMRails:
+    config = load_config(source)
+    if streaming:
+        config = enable_streaming(config)
+    return LLMRails(config, llm=llm, verbose=verbose)
diff --git a/tests/recorded/rails_config.py b/tests/recorded/rails_config.py
new file mode 100644
index 0000000000..e9e19989d6
--- /dev/null
+++ b/tests/recorded/rails_config.py
@@ -0,0 +1,84 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+from textwrap import dedent
+
+from nemoguardrails import RailsConfig
+
+
+@dataclass(frozen=True)
+class RailsConfigSource:
+    """A RailsConfig descriptor: either a filesystem path or inline yaml/colang content.
+
+    Construct via the factory methods rather than the raw constructor: use
+    ``from_path(base_dir, name)`` for on-disk configs and ``from_content(name, ...)``
+    for inline ones. ``name`` is used for parametrize IDs and error messages.
+    """
+
+    name: str
+    path: Path | None = None
+    yaml_content: str = ""
+    colang_content: str = ""
+
+    @classmethod
+    def from_path(cls, base_dir: Path, name: str) -> RailsConfigSource:
+        return cls(name=name, path=base_dir / name)
+
+    @classmethod
+    def from_content(cls, name: str, *, yaml_content: str = "", colang_content: str = "") -> RailsConfigSource:
+        return cls(name=name, yaml_content=yaml_content, colang_content=colang_content)
+
+
+@lru_cache(maxsize=None)
+def _cached_config(source: RailsConfigSource) -> RailsConfig:
+    if source.path is not None:
+        return RailsConfig.from_path(str(source.path))
+    return RailsConfig.from_content(
+        colang_content=dedent(source.colang_content).strip(),
+        yaml_content=dedent(source.yaml_content).strip(),
+    )
+
+
+def load_config(source: RailsConfigSource) -> RailsConfig:
+    """Load a ``RailsConfig`` from a ``RailsConfigSource``."""
+    return _cached_config(source).model_copy(deep=True)
+
+
+def enable_streaming(
+    config: RailsConfig,
+    *,
+    chunk_size: int | None = None,
+    context_size: int | None = None,
+    stream_first: bool | None = None,
+) -> RailsConfig:
+    """Return a copy of ``config`` with output-rail streaming enabled.
+
+    Source config is not mutated. Optional overrides apply only when set; otherwise the
+    framework defaults from ``OutputRailsStreamingConfig`` apply.
+    """
+    config = config.model_copy(deep=True)
+    config.rails.output.streaming.enabled = True
+    if chunk_size is not None:
+        config.rails.output.streaming.chunk_size = chunk_size
+    if context_size is not None:
+        config.rails.output.streaming.context_size = context_size
+    if stream_first is not None:
+        config.rails.output.streaming.stream_first = stream_first
+    return config
diff --git a/tests/recorded/sanitization.py b/tests/recorded/sanitization.py
new file mode 100644
index 0000000000..66aa7714dc
--- /dev/null
+++ b/tests/recorded/sanitization.py
@@ -0,0 +1,75 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import re
+
+FILTERED_HEADERS = {
+    "authorization",
+    "content-length",
+    "x-api-key",
+    "api-key",
+    "openai-organization",
+    "openai-project",
+    "cookie",
+    "set-cookie",
+    "x-xet-access-token",
+}
+
+FILTERED_QUERY_PARAMETERS = ("api_key", "key", "token")
+
+FILTERED_HEADER_PREFIXES = ("x-", "cf-", "openai-")
+
+ALLOWED_HEADERS = {"content-type"}
+
+VOLATILE_RESPONSE_HEADERS = {
+    "content-length",
+    "date",
+    "nvcf-reqid",
+    "request-id",
+    "server-timing",
+}
+
+SECRET_PATTERNS = (
+    (re.compile(r"sk-[A-Za-z0-9_-]{12,}"), "[OPENAI_API_KEY]"),
+    (re.compile(r"nvapi-[A-Za-z0-9_-]{12,}"), "[NVIDIA_API_KEY]"),
+    (re.compile(r"Bearer\s+[A-Za-z0-9._~+/=-]{12,}", re.IGNORECASE), "Bearer [REDACTED]"),
+    (re.compile(r"\borg-[A-Za-z0-9_-]{6,}\b"), "[OPENAI_ORG]"),
+    (re.compile(r"\bproj_[A-Za-z0-9_-]{6,}\b"), "[OPENAI_PROJECT]"),
+)
+
+JSON_SECRET_KEYS = {
+    "access_token",
+    "accesstoken",
+    "api_key",
+    "apikey",
+    "authorization",
+    "secret",
+    "token",
+    "xet_access_token",
+    "xetaccesstoken",
+}
+
+VOLATILE_RESPONSE_JSON_FIELDS = {
+    "created": 0,
+    "id": "[RECORDED_RESPONSE_ID]",
+}
+
+NULLABLE_VOLATILE_RESPONSE_JSON_FIELDS = {
+    "system_fingerprint": "[RECORDED_SYSTEM_FINGERPRINT]",
+}
+
+VOLATILE_RESPONSE_METADATA_FIELDS = set(VOLATILE_RESPONSE_JSON_FIELDS) | set(NULLABLE_VOLATILE_RESPONSE_JSON_FIELDS)
diff --git a/tests/recorded/snapshots.py b/tests/recorded/snapshots.py
new file mode 100644
index 0000000000..f2c55380ce
--- /dev/null
+++ b/tests/recorded/snapshots.py
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from inline_snapshot import snapshot
+
+__all__ = ["snapshot"]
diff --git a/tests/recorded/test_cassette_sanitization.py b/tests/recorded/test_cassette_sanitization.py
new file mode 100644
index 0000000000..5a2ed78359
--- /dev/null
+++ b/tests/recorded/test_cassette_sanitization.py
@@ -0,0 +1,562 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import re
+from pathlib import Path
+from typing import Any
+
+import pytest
+import yaml
+from vcr.request import Request
+
+from tests.recorded.conftest import (
+    ReadableYamlSerializer,
+    _provider_key_fixture_name,
+    before_record_request,
+    before_record_response,
+    build_vcr_config,
+    recorded_body_matcher,
+)
+from tests.recorded.sanitization import FILTERED_HEADERS
+from tests.recorded.utils import api_key_for_record_mode, set_api_key_for_record_mode
+
+pytestmark = [pytest.mark.recorded]
+
+RECORDED_DIR = Path(__file__).parent
+
+FORBIDDEN_HEADER_NAMES = FILTERED_HEADERS
+
+FORBIDDEN_PATTERNS = {
+    "openai_api_key": r"\bsk-[A-Za-z0-9_-]{12,}\b",
+    "nvidia_api_key": r"\bnvapi-[A-Za-z0-9_-]{12,}\b",
+    "bearer_token": r"\bBearer\s+[A-Za-z0-9._~+/=-]{12,}\b",
+    "openai_org": r"\borg-[A-Za-z0-9_-]{6,}\b",
+    "openai_project": r"\bproj_[A-Za-z0-9_-]{6,}\b",
+    "query_secret": r"[?&](api_key|key|token)=[^&\s]+",
+    "xet_access_token": r'("accessToken"\s*:\s*"(?!\[REDACTED\])[^"]+"|\baccessToken\s*:\s*(?![\'"]?\[REDACTED\][\'"]?)[^\s]+)',
+    "aws_presigned_url": r"[?&]X-Amz-(Credential|Security-Token|Signature)=",
+    "unexpected_huggingface_host": r"https://(?:[^/\s]+\.)?huggingface\.co",
+    "volatile_chat_response_id": r'"id"\s*:\s*"chat(?:cmpl)?-[^"]+"',
+    "volatile_created_timestamp": r'"created"\s*:\s*[1-9]\d{8,}',
+}
+
+_COMBINED_FORBIDDEN = re.compile(
+    "|".join(f"(?P<{name}>{pattern})" for name, pattern in FORBIDDEN_PATTERNS.items()),
+    re.IGNORECASE,
+)
+
+
+def _cassette_headers(data: Any) -> list[dict]:
+    interactions = data.get("interactions", []) if isinstance(data, dict) else []
+    headers = []
+    for interaction in interactions:
+        for side in ("request", "response"):
+            section = interaction.get(side, {}) if isinstance(interaction, dict) else {}
+            section_headers = section.get("headers") if isinstance(section, dict) else None
+            if isinstance(section_headers, dict):
+                headers.append(section_headers)
+    return headers
+
+
+def test_recorded_cassettes_are_sanitized():
+    cassette_paths = sorted(RECORDED_DIR.rglob("cassettes/**/*.yaml"))
+    if not cassette_paths:
+        pytest.skip("No recorded cassettes committed in this branch")
+
+    failures = []
+    for path in cassette_paths:
+        text = path.read_text(encoding="utf-8")
+        for match in _COMBINED_FORBIDDEN.finditer(text):
+            failures.append(f"{path}: matched forbidden pattern {match.lastgroup}")
+
+        data = yaml.safe_load(text)
+        for headers in _cassette_headers(data):
+            for header in headers:
+                if header.lower() in FORBIDDEN_HEADER_NAMES:
+                    failures.append(f"{path}: contains forbidden header {header}")
+
+    assert not failures, "\n".join(failures)
+
+
+def test_recorded_cassette_serializer_keeps_json_bodies_readable():
+    response = before_record_response(
+        {
+            "headers": {"Content-Length": ["100"], "Content-Type": ["application/json"]},
+            "body": {"string": '{"id":"chatcmpl-123","created":1770000000,"answer":"ok"}'},
+        }
+    )
+    cassette = {
+        "interactions": [
+            {
+                "request": {
+                    "body": '{"messages":[{"role":"user","content":"hello"}]}',
+                    "headers": {},
+                    "method": "POST",
+                    "uri": "https://api.openai.com/v1/chat/completions",
+                },
+                "response": response,
+            }
+        ],
+        "version": 1,
+    }
+
+    text = ReadableYamlSerializer.serialize(cassette)
+
+    assert "Content-Length" not in text
+    assert "parsed_body:" in text
+    assert "id: '[RECORDED_RESPONSE_ID]'" in text
+    assert "created: 0" in text
+    loaded = yaml.safe_load(text)
+    assert "string" not in loaded["interactions"][0]["response"]["body"]
+    assert ReadableYamlSerializer.deserialize(text)["interactions"][0]["response"]["body"]["string"].startswith("{")
+
+
+def test_recorded_cassette_serializer_handles_null_interactions():
+    cassette = {"version": 1, "interactions": None}
+
+    assert yaml.safe_load(ReadableYamlSerializer.serialize(cassette)) == cassette
+    assert ReadableYamlSerializer.deserialize("version: 1\ninteractions:\n") == cassette
+
+
+def test_recorded_cassette_serializer_redacts_access_tokens_from_parsed_bodies():
+    request = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/chat/completions",
+        body='{"accessToken":"request-access-token-1234567890","nested":{"xetAccessToken":"request-xet-token-1234567890"}}',
+        headers={"Content-Type": "application/json"},
+    )
+    response = before_record_response(
+        {
+            "headers": {"Content-Type": ["application/json"]},
+            "body": {
+                "string": '{"access_token":"response-access-token-1234567890","nested":{"accessToken":"response-xet-token-1234567890"}}'
+            },
+        }
+    )
+    cassette = {
+        "interactions": [
+            {
+                "request": {
+                    "body": before_record_request(request).body,
+                    "headers": {"Content-Type": ["application/json"]},
+                    "method": "POST",
+                    "uri": "https://api.openai.com/v1/chat/completions",
+                },
+                "response": response,
+            }
+        ],
+        "version": 1,
+    }
+
+    text = ReadableYamlSerializer.serialize(cassette)
+    loaded = yaml.safe_load(text)
+
+    assert "request-access-token-1234567890" not in text
+    assert "request-xet-token-1234567890" not in text
+    assert "response-access-token-1234567890" not in text
+    assert "response-xet-token-1234567890" not in text
+    assert loaded["interactions"][0]["request"]["parsed_body"]["accessToken"] == "[REDACTED]"
+    assert loaded["interactions"][0]["request"]["parsed_body"]["nested"]["xetAccessToken"] == "[REDACTED]"
+    assert loaded["interactions"][0]["response"]["body"]["parsed_body"]["access_token"] == "[REDACTED]"
+    assert loaded["interactions"][0]["response"]["body"]["parsed_body"]["nested"]["accessToken"] == "[REDACTED]"
+
+
+def test_recorded_cassette_serializer_preserves_smart_chars():
+    request = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/chat/completions",
+        body='{"prompt":"‘prompt’ “text” – — ‑ …"}',
+        headers={"Content-Type": "application/json"},
+    )
+    response = before_record_response(
+        {
+            "headers": {"Content-Type": ["application/json"]},
+            "body": {"string": '{"answer":"‘a’ “b” – — ‑ …"}'},
+        }
+    )
+    cassette = {
+        "interactions": [
+            {
+                "request": {
+                    "body": before_record_request(request).body,
+                    "headers": {"Content-Type": ["application/json"]},
+                    "method": "POST",
+                    "uri": "https://api.openai.com/v1/chat/completions",
+                },
+                "response": response,
+            }
+        ],
+        "version": 1,
+    }
+
+    text = ReadableYamlSerializer.serialize(cassette)
+    loaded = yaml.safe_load(text)
+    prompt = loaded["interactions"][0]["request"]["parsed_body"]["prompt"]
+    answer = loaded["interactions"][0]["response"]["body"]["parsed_body"]["answer"]
+
+    assert prompt == "‘prompt’ “text” – — ‑ …"
+    assert answer == "‘a’ “b” – — ‑ …"
+
+
+def test_recorded_body_matcher_normalizes_smart_chars_in_json_bodies():
+    cassette_request = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/chat/completions",
+        body=json.dumps({"messages": [{"role": "user", "content": 'It\'s a "smart" quote... - -- -'}]}),
+        headers={"Content-Type": "application/json"},
+    )
+    replay_request = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/chat/completions",
+        body=json.dumps({"messages": [{"role": "user", "content": "It’s a “smart” quote… – — ‑"}]}),
+        headers={"Content-Type": "application/json"},
+    )
+
+    recorded_body_matcher(cassette_request, replay_request)
+
+
+def test_recorded_cassette_serializer_filters_headers_by_prefix():
+    response = before_record_response(
+        {
+            "headers": {
+                "Content-Type": ["application/json"],
+                "x-request-id": ["req-123"],
+                "X-Content-Type-Options": ["nosniff"],
+                "cf-cache-status": ["HIT"],
+                "openai-version": ["2020-10-01"],
+            },
+            "body": {"string": '{"ok":true}'},
+        }
+    )
+    headers = response["headers"]
+
+    assert "Content-Type" in headers
+    assert "x-request-id" not in headers
+    assert "X-Content-Type-Options" not in headers
+    assert "cf-cache-status" not in headers
+    assert "openai-version" not in headers
+
+    request = before_record_request(
+        Request(
+            method="POST",
+            uri="https://api.openai.com/v1/chat/completions",
+            body='{"prompt":"hi"}',
+            headers={"Content-Type": "application/json", "x-stainless-os": "MacOS"},
+        )
+    )
+
+    assert "x-stainless-os" not in request.headers
+    assert "Content-Type" in request.headers
+
+
+def test_recorded_cassette_serializer_keeps_sse_bodies_parseable():
+    response = before_record_response(
+        {
+            "headers": {"Content-Type": ["text/event-stream"]},
+            "body": {"string": 'data: {"id":"chatcmpl-123","created":1770000000,"choices":[]}\n\ndata: [DONE]\n\n'},
+        }
+    )
+    cassette = {
+        "interactions": [
+            {
+                "request": {
+                    "body": '{"stream":true}',
+                    "headers": {},
+                    "method": "POST",
+                    "uri": "https://api.openai.com/v1/chat/completions",
+                },
+                "response": response,
+            }
+        ],
+        "version": 1,
+    }
+
+    text = ReadableYamlSerializer.serialize(cassette)
+    loaded = yaml.safe_load(text)
+    response_body = loaded["interactions"][0]["response"]["body"]
+
+    assert "string" not in response_body
+    assert response_body["parsed_body"][0]["id"] == "[RECORDED_RESPONSE_ID]"
+    assert response_body["parsed_body"][0]["created"] == 0
+    assert response_body["parsed_body"][-1] == "[DONE]"
+    assert "data: [DONE]" in ReadableYamlSerializer.deserialize(text)["interactions"][0]["response"]["body"]["string"]
+
+
+def test_recorded_cassette_serializer_preserves_non_strict_sse_bodies():
+    response = before_record_response(
+        {
+            "headers": {"Content-Type": ["text/event-stream"]},
+            "body": {"string": 'event: message\ndata: {"id":"chatcmpl-123","created":1770000000,"choices":[]}\n\n'},
+        }
+    )
+    cassette = {
+        "interactions": [
+            {
+                "request": {
+                    "body": '{"stream":true}',
+                    "headers": {},
+                    "method": "POST",
+                    "uri": "https://api.openai.com/v1/chat/completions",
+                },
+                "response": response,
+            }
+        ],
+        "version": 1,
+    }
+
+    text = ReadableYamlSerializer.serialize(cassette)
+    response_body = yaml.safe_load(text)["interactions"][0]["response"]["body"]
+
+    assert "parsed_body" not in response_body
+    assert response_body["string"] == (
+        'event: message\ndata: {"id":"[RECORDED_RESPONSE_ID]","created":0,"choices":[]}\n\n'
+    )
+
+
+def test_recorded_response_metadata_normalization_preserves_nested_ids():
+    response = before_record_response(
+        {
+            "headers": {"Content-Type": ["application/json"]},
+            "body": {
+                "string": (
+                    '{"id":"chatcmpl-123","created":1770000000,'
+                    '"choices":[{"message":{"tool_calls":[{"id":"call_123","type":"function"}]}}]}'
+                )
+            },
+        }
+    )
+    cassette = {
+        "interactions": [
+            {
+                "request": {
+                    "body": '{"messages":[]}',
+                    "headers": {},
+                    "method": "POST",
+                    "uri": "https://api.openai.com",
+                },
+                "response": response,
+            }
+        ],
+        "version": 1,
+    }
+
+    loaded = yaml.safe_load(ReadableYamlSerializer.serialize(cassette))
+    parsed_body = loaded["interactions"][0]["response"]["body"]["parsed_body"]
+
+    assert parsed_body["id"] == "[RECORDED_RESPONSE_ID]"
+    assert parsed_body["created"] == 0
+    assert parsed_body["choices"][0]["message"]["tool_calls"][0]["id"] == "call_123"
+
+
+def test_recorded_response_metadata_normalization_handles_top_level_arrays():
+    response = before_record_response(
+        {
+            "headers": {"Content-Type": ["application/json"]},
+            "body": {"string": '[{"id":"chatcmpl-123","created":1770000000,"choices":[]}]'},
+        }
+    )
+    cassette = {
+        "interactions": [
+            {
+                "request": {
+                    "body": '{"messages":[]}',
+                    "headers": {},
+                    "method": "POST",
+                    "uri": "https://api.openai.com",
+                },
+                "response": response,
+            }
+        ],
+        "version": 1,
+    }
+
+    parsed_body = yaml.safe_load(ReadableYamlSerializer.serialize(cassette))["interactions"][0]["response"]["body"][
+        "parsed_body"
+    ]
+
+    assert parsed_body == [{"id": "[RECORDED_RESPONSE_ID]", "created": 0, "choices": []}]
+
+
+def test_recorded_jailbreak_score_normalization_allows_extra_fields():
+    response = before_record_response(
+        {
+            "headers": {"Content-Type": ["application/json"]},
+            "body": {"string": '{"jailbreak":true,"score":0.873,"model":"jailbreak-detect"}'},
+        }
+    )
+    cassette = {
+        "interactions": [
+            {
+                "request": {
+                    "body": '{"messages":[]}',
+                    "headers": {},
+                    "method": "POST",
+                    "uri": "https://api.nvidia.com",
+                },
+                "response": response,
+            }
+        ],
+        "version": 1,
+    }
+
+    parsed_body = yaml.safe_load(ReadableYamlSerializer.serialize(cassette))["interactions"][0]["response"]["body"][
+        "parsed_body"
+    ]
+
+    assert parsed_body == {"jailbreak": True, "score": 0.0, "model": "jailbreak-detect"}
+
+
+def test_recorded_sse_jailbreak_score_normalization_allows_extra_fields():
+    response = before_record_response(
+        {
+            "headers": {"Content-Type": ["text/event-stream"]},
+            "body": {"string": 'data: {"jailbreak":true,"score":0.873,"model":"jailbreak-detect"}\n\n'},
+        }
+    )
+    cassette = {
+        "interactions": [
+            {
+                "request": {
+                    "body": '{"stream":true}',
+                    "headers": {},
+                    "method": "POST",
+                    "uri": "https://api.nvidia.com",
+                },
+                "response": response,
+            }
+        ],
+        "version": 1,
+    }
+
+    parsed_body = yaml.safe_load(ReadableYamlSerializer.serialize(cassette))["interactions"][0]["response"]["body"][
+        "parsed_body"
+    ]
+
+    assert parsed_body == [{"jailbreak": True, "score": 0.0, "model": "jailbreak-detect"}]
+
+
+def test_recorded_request_sanitizer_strips_volatile_headers():
+    request = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/chat/completions",
+        body='{"messages":[{"role":"user","content":"hello"}]}',
+        headers={"Content-Length": "100", "Content-Type": "application/json"},
+    )
+
+    sanitized = before_record_request(request)
+    body = sanitized.body.decode("utf-8") if isinstance(sanitized.body, bytes) else sanitized.body
+
+    assert "Content-Length" not in sanitized.headers
+    assert "hello" in body
+
+
+def test_recorded_request_sanitizer_redacts_non_object_json_body():
+    request = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/chat/completions",
+        body='[{"accessToken":"request-access-token-1234567890"}]',
+        headers={"Content-Type": "application/json"},
+    )
+
+    sanitized = before_record_request(request)
+    body = sanitized.body.decode("utf-8") if isinstance(sanitized.body, bytes) else sanitized.body
+
+    assert json.loads(body) == [{"accessToken": "[REDACTED]"}]
+
+
+def test_recorded_request_sanitizer_redacts_raw_non_json_body():
+    request = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/uploads",
+        body="Bearer raw-token-1234567890",
+        headers={"Content-Type": "text/plain"},
+    )
+
+    sanitized = before_record_request(request)
+    body = sanitized.body.decode("utf-8") if isinstance(sanitized.body, bytes) else sanitized.body
+
+    assert body == "Bearer [REDACTED]"
+
+
+def test_recorded_response_sanitizer_redacts_raw_non_json_body():
+    response = {
+        "headers": {"Content-Type": ["text/plain"]},
+        "body": {"string": "failed with sk-rawsecret1234567890"},
+    }
+
+    sanitized = before_record_response(response)
+
+    assert sanitized["body"]["string"] == "failed with [OPENAI_API_KEY]"
+
+
+def test_recorded_response_sanitizer_ignores_non_dict_body_container():
+    response = {"headers": {"Content-Type": ["application/json"]}, "body": "not-a-vcr-body-dict"}
+
+    assert before_record_response(response) == response
+
+
+def test_recorded_vcr_config_matches_on_request_body():
+    assert "recorded_body" in build_vcr_config()["match_on"]
+
+
+def test_recorded_provider_key_lookup_rejects_unknown_provider():
+    with pytest.raises(ValueError, match="Unknown recorded provider 'nvidia'; expected one of: nim, openai"):
+        _provider_key_fixture_name("nvidia")
+
+
+def test_recorded_refresh_uses_api_key_without_live_mode_gate(monkeypatch):
+    monkeypatch.setenv("LIVE_TEST_MODE", "0")
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+
+    assert api_key_for_record_mode("OPENAI_API_KEY", "dummy-key", "none") == "dummy-key"
+
+    with pytest.raises(pytest.fail.Exception, match="OPENAI_API_KEY is required to refresh cassette"):
+        api_key_for_record_mode("OPENAI_API_KEY", "dummy-key", "rewrite")
+
+    monkeypatch.setenv("OPENAI_API_KEY", "real-key")
+    assert api_key_for_record_mode("OPENAI_API_KEY", "dummy-key", "rewrite") == "real-key"
+
+
+def test_recorded_refresh_fixture_returns_selected_api_key(monkeypatch):
+    monkeypatch.setenv("OPENAI_API_KEY", "real-key")
+
+    assert set_api_key_for_record_mode(monkeypatch, "OPENAI_API_KEY", "dummy-key", "rewrite") == "real-key"
+    assert set_api_key_for_record_mode(monkeypatch, "OPENAI_API_KEY", "dummy-key", "none") == "dummy-key"
+
+
+def test_recorded_body_matcher_compares_sanitized_json_bodies():
+    cassette_request = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/chat/completions",
+        body={"model": "gpt-5.4-nano", "accessToken": "stored-token-1234567890"},
+        headers={"Content-Type": "application/json"},
+    )
+    replay_request = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/chat/completions",
+        body='{"model":"gpt-5.4-nano","accessToken":"live-token-1234567890"}',
+        headers={"Content-Type": "application/json"},
+    )
+    stale_request = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/chat/completions",
+        body='{"model":"gpt-5.4-nano","messages":[{"role":"user","content":"changed"}]}',
+        headers={"Content-Type": "application/json"},
+    )
+
+    recorded_body_matcher(cassette_request, replay_request)
+    with pytest.raises(AssertionError, match="Recorded request JSON body"):
+        recorded_body_matcher(cassette_request, stale_request)
diff --git a/tests/recorded/test_fake_cassettes.py b/tests/recorded/test_fake_cassettes.py
new file mode 100644
index 0000000000..256c16d013
--- /dev/null
+++ b/tests/recorded/test_fake_cassettes.py
@@ -0,0 +1,50 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from tests.recorded.fake_cassettes import fake_cassette_header, validate_fake_cassette_metadata
+
+pytestmark = [pytest.mark.recorded]
+
+RECORDED_DIR = Path(__file__).parent
+
+
+def test_fake_cassette_header_metadata_validation(tmp_path):
+    cassette = tmp_path / "fake.yaml"
+    cassette.write_text(
+        """
+# fake_cassette:
+#   reason: stream error path cannot be refreshed deterministically
+#   frozen_fields:
+#     - response.body.parsed_body
+#   fake_llm_model_considered: true
+version: 1
+interactions: []
+""",
+        encoding="utf-8",
+    )
+
+    assert fake_cassette_header(cassette)["fake_cassette"]["reason"]
+    validate_fake_cassette_metadata(cassette)
+
+
+def test_committed_fake_cassettes_have_metadata():
+    for cassette in RECORDED_DIR.rglob("cassettes/**/fake/**/*.yaml"):
+        validate_fake_cassette_metadata(cassette)
diff --git a/tests/recorded/test_inspect_cassette.py b/tests/recorded/test_inspect_cassette.py
new file mode 100644
index 0000000000..1a342b4160
--- /dev/null
+++ b/tests/recorded/test_inspect_cassette.py
@@ -0,0 +1,256 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import pytest
+
+from tests.recorded.cassette import cassette_request_jsons, recorded_chat_response, stream_payloads_from_body
+from tests.recorded.inspect_cassette import cassette_summary
+from tests.recorded.normalization import normalize_stream_chunks
+
+pytestmark = [pytest.mark.recorded]
+
+
+def test_cassette_summary_reads_parsed_bodies(tmp_path):
+    cassette = tmp_path / "example.yaml"
+    cassette.write_text(
+        """
+version: 1
+interactions:
+- request:
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+    parsed_body:
+      model: gpt-5.4-nano
+      stream: true
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Content-Type:
+      - text/event-stream
+    body:
+      parsed_body:
+      - id: '[RECORDED_RESPONSE_ID]'
+        choices: []
+      - '[DONE]'
+""",
+        encoding="utf-8",
+    )
+
+    assert cassette_summary(cassette) == [
+        {
+            "index": 0,
+            "method": "POST",
+            "uri": "https://api.openai.com/v1/chat/completions",
+            "status": 200,
+            "model": "gpt-5.4-nano",
+            "stream": True,
+            "response_model": None,
+            "raw_response": None,
+            "stream_events": 1,
+        }
+    ]
+
+
+def test_cassette_summary_reads_raw_error_bodies(tmp_path):
+    cassette = tmp_path / "example.yaml"
+    cassette.write_text(
+        """
+version: 1
+interactions:
+- request:
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+    parsed_body:
+      model: gpt-5.4-nano
+  response:
+    status:
+      code: 503
+      message: Service Unavailable
+    headers:
+      Content-Type:
+      - text/plain
+    body:
+      string: upstream connect error
+""",
+        encoding="utf-8",
+    )
+
+    assert cassette_summary(cassette) == [
+        {
+            "index": 0,
+            "method": "POST",
+            "uri": "https://api.openai.com/v1/chat/completions",
+            "status": 503,
+            "model": "gpt-5.4-nano",
+            "stream": False,
+            "response_model": None,
+            "raw_response": "upstream connect error",
+            "stream_events": 0,
+        }
+    ]
+
+
+def test_cassette_summary_handles_empty_files(tmp_path):
+    cassette = tmp_path / "example.yaml"
+    cassette.write_text("# empty\n", encoding="utf-8")
+
+    assert cassette_summary(cassette) == []
+
+
+def test_recorded_chat_response_normalizes_zero_and_nullable_usage(tmp_path):
+    cassette = tmp_path / "example.yaml"
+    cassette.write_text(
+        """
+version: 1
+interactions:
+- request:
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+    parsed_body:
+      model: gpt-5.4-nano
+  response:
+    body:
+      parsed_body:
+        id: chatcmpl-zero
+        choices:
+        - message:
+            content: ""
+          finish_reason: stop
+        usage:
+          prompt_tokens: 0
+          completion_tokens: 0
+          total_tokens: 0
+- request:
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+    parsed_body:
+      model: gpt-5.4-nano-null
+  response:
+    body:
+      parsed_body:
+        id: chatcmpl-null
+        choices:
+        - message:
+            content: ""
+          finish_reason: stop
+        usage:
+          prompt_tokens:
+          completion_tokens:
+          total_tokens:
+""",
+        encoding="utf-8",
+    )
+
+    zero_usage = recorded_chat_response(cassette, request_model="gpt-5.4-nano").usage
+    nullable_usage = recorded_chat_response(cassette, request_model="gpt-5.4-nano-null").usage
+
+    assert zero_usage["total_tokens"] == 0
+    assert nullable_usage == {"input_tokens": None, "output_tokens": None, "total_tokens": None}
+
+
+def test_recorded_chat_response_skips_non_dict_response_payloads(tmp_path):
+    cassette = tmp_path / "example.yaml"
+    cassette.write_text(
+        """
+version: 1
+interactions:
+- request:
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+    parsed_body:
+      model: gpt-5.4-nano
+  response:
+    body:
+      parsed_body:
+      - not-a-chat-response
+- request:
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+    parsed_body:
+      model: gpt-5.4-nano
+  response:
+    body:
+      string: not-json
+- request:
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+    parsed_body:
+      model: gpt-5.4-nano
+  response:
+    body:
+      parsed_body:
+        id: chatcmpl-valid
+        choices:
+        - message:
+            content: valid
+          finish_reason: stop
+""",
+        encoding="utf-8",
+    )
+
+    assert recorded_chat_response(cassette, request_model="gpt-5.4-nano").content == "valid"
+
+
+def test_cassette_request_jsons_returns_copy_of_cached_interactions(tmp_path):
+    cassette = tmp_path / "example.yaml"
+    cassette.write_text(
+        """
+version: 1
+interactions:
+- request:
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+    parsed_body:
+      model: gpt-5.4-nano
+  response:
+    body:
+      parsed_body:
+        choices: []
+""",
+        encoding="utf-8",
+    )
+
+    cassette_request_jsons(cassette)[0]["model"] = "mutated"
+
+    assert cassette_request_jsons(cassette)[0]["model"] == "gpt-5.4-nano"
+
+
+def test_cassette_request_jsons_handles_null_interactions(tmp_path):
+    cassette = tmp_path / "example.yaml"
+    cassette.write_text(
+        """
+version: 1
+interactions:
+""",
+        encoding="utf-8",
+    )
+
+    assert cassette_request_jsons(cassette) == []
+
+
+def test_stream_payloads_from_body_skips_malformed_raw_sse_lines():
+    body = {"string": 'data: not-json\n\ndata: {"choices":[]}\n\ndata: [DONE]\n\n'}
+
+    assert stream_payloads_from_body(body) == [{"choices": []}]
+
+
+def test_normalize_stream_chunks_ignores_non_string_content_fallback():
+    result = normalize_stream_chunks([{"content": {"not": "text"}}, {"content": "ok"}])
+
+    assert result["content"] == "ok"
diff --git a/tests/recorded/utils.py b/tests/recorded/utils.py
new file mode 100644
index 0000000000..0aed5c6171
--- /dev/null
+++ b/tests/recorded/utils.py
@@ -0,0 +1,40 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pytest
+
+DUMMY_OPENAI_API_KEY = "sk-recorded-replay"
+DUMMY_NVIDIA_API_KEY = "nvapi-recorded-replay"
+
+
+def api_key_for_record_mode(env_name: str, dummy_value: str, record_mode: str) -> str:
+    """Return dummy replay credentials, but require real credentials to record."""
+    if record_mode == "none":
+        return dummy_value
+
+    value = os.environ.get(env_name)
+    if not value:
+        pytest.fail(f"{env_name} is required to refresh cassette", pytrace=False)
+    return value
+
+
+def set_api_key_for_record_mode(
+    monkeypatch: pytest.MonkeyPatch, env_name: str, dummy_value: str, record_mode: str
+) -> str:
+    value = api_key_for_record_mode(env_name, dummy_value, record_mode)
+    monkeypatch.setenv(env_name, value)
+    return value
diff --git a/tests/test_llmrails.py b/tests/test_llmrails.py
index ffb4d156d9..ea65bbfc5d 100644
--- a/tests/test_llmrails.py
+++ b/tests/test_llmrails.py
@@ -1586,3 +1586,30 @@ async def test_warning_behavior(self, no_main_llm_config, caplog, options, has_l
             else:
                 await rails.generate_async(messages=messages, options=options)
         assert _count_no_llm_warnings(caplog) == expected_warnings
+
+
+def test_load_library_sorts_files_for_deterministic_overrides(tmp_path, monkeypatch):
+    """Library files are traversed in sorted order so collisions resolve identically
+    on every filesystem.
+
+    The library loader in ``LLMRails.__init__`` inserts each ``.co`` file's
+    ``bot_messages`` first-wins, so an unsorted ``os.walk`` would let the winner of a
+    message-id collision depend on filesystem ordering. This is the library-traversal
+    sibling of
+    ``test_prompt_override.py::test_load_prompts_sorts_files_for_deterministic_overrides``.
+    """
+    library_dir = tmp_path / "library"
+    library_dir.mkdir()
+    (library_dir / "z.co").write_text('define bot test det msg\n  "from_z"\n', encoding="utf-8")
+    (library_dir / "a.co").write_text('define bot test det msg\n  "from_a"\n', encoding="utf-8")
+
+    def walk(_path):
+        # Yield in non-sorted order; the loader must sort so "a.co" wins the collision.
+        yield str(library_dir), [], ["z.co", "a.co"]
+
+    monkeypatch.setattr("nemoguardrails.rails.llm.llmrails.os.walk", walk)
+
+    config = RailsConfig.from_content(yaml_content="models: []\n")
+    rails = LLMRails(config, llm=FakeLLMModel(responses=["unused"]))
+
+    assert rails.config.bot_messages["test det msg"] == ["from_a"]
diff --git a/tests/test_prompt_override.py b/tests/test_prompt_override.py
index 3150df3389..19cb92601b 100644
--- a/tests/test_prompt_override.py
+++ b/tests/test_prompt_override.py
@@ -16,6 +16,7 @@
 import os
 
 from nemoguardrails import RailsConfig
+from nemoguardrails.llm import prompts as prompts_module
 from nemoguardrails.llm.prompts import get_prompt
 from nemoguardrails.llm.types import Task
 
@@ -28,3 +29,19 @@ def test_custom_llm_registration():
     prompt = get_prompt(config, Task.GENERATE_USER_INTENT)
 
     assert prompt.content == "<<This is a placeholder for a custom prompt for generating the user intent>>"
+
+
+def test_load_prompts_sorts_files_for_deterministic_overrides(tmp_path, monkeypatch):
+    prompts_dir = tmp_path / "prompts"
+    prompts_dir.mkdir()
+    (prompts_dir / "z.yml").write_text("prompts:\n- task: generate_user_intent\n  content: z\n", encoding="utf-8")
+    (prompts_dir / "a.yml").write_text("prompts:\n- task: generate_user_intent\n  content: a\n", encoding="utf-8")
+
+    def walk(_path):
+        yield str(prompts_dir), [], ["z.yml", "a.yml"]
+
+    monkeypatch.setattr(prompts_module, "CURRENT_DIR", str(tmp_path))
+    monkeypatch.setattr(prompts_module.os, "walk", walk)
+    monkeypatch.delenv("PROMPTS_DIR", raising=False)
+
+    assert [prompt.content for prompt in prompts_module._load_prompts()] == ["a", "z"]