NVIDIA-NeMo · tgasser-nv · Jun 23, 2026 · Jun 23, 2026 · greptile-apps · Jun 23, 2026
diff --git a/nemoguardrails/guardrails/actions/content_safety_action.py b/nemoguardrails/guardrails/actions/content_safety_action.py
@@ -75,7 +75,7 @@ def _extract_messages(self, messages: LLMMessages, bot_response: Optional[str])
         if not bot_response:
             raise RuntimeError("bot_response is required for content safety output check")
         return {
-            "user_input": self._last_user_content(messages),
+            "user_input": self._last_user_content_or_empty(messages),
             "bot_response": bot_response,
         }
 

diff --git a/nemoguardrails/guardrails/guardrails.py b/nemoguardrails/guardrails/guardrails.py
@@ -372,27 +372,20 @@ async def check_async(
         rail_types: Optional[List[RailType]] = None,
     ) -> RailsResult:
         """Run rails on messages based on their content (asynchronous).
-        Only supported for LLMRails.
+        Supported by both LLMRails and IORails.
         """
-        if isinstance(self.rails_engine, IORails):
-            raise NotImplementedError("IORails doesn't support check_async()")
-
-        llmrails = cast(LLMRails, self.rails_engine)
-        return await llmrails.check_async(messages, rail_types=rail_types)
+        await self._ensure_started()
+        return await self.rails_engine.check_async(messages, rail_types=rail_types)
 
     def check(
         self,
         messages: List[dict],
         rail_types: Optional[List[RailType]] = None,
     ) -> RailsResult:
         """Synchronous version of check_async.
-        Only supported for LLMRails.
+        Supported by both LLMRails and IORails.
         """
-        if isinstance(self.rails_engine, IORails):
-            raise NotImplementedError("IORails doesn't support check()")
-
-        llmrails = cast(LLMRails, self.rails_engine)
-        return llmrails.check(messages, rail_types=rail_types)
+        return self.rails_engine.check(messages, rail_types=rail_types)
 
     def register_action(self, action: Callable, name: Optional[str] = None) -> Self:
         """Register a custom action for the rails configuration.

diff --git a/nemoguardrails/guardrails/guardrails_types.py b/nemoguardrails/guardrails/guardrails_types.py
@@ -38,6 +38,7 @@ class RailResult:
 
     is_safe: bool
     reason: str | None = None
+    triggered_rail: str | None = None
 
 
 # Default max character length for truncate(). Used to keep DEBUG log lines short.

diff --git a/nemoguardrails/guardrails/iorails.py b/nemoguardrails/guardrails/iorails.py
@@ -62,7 +62,7 @@
 from nemoguardrails.llm.taskmanager import LLMTaskManager
 from nemoguardrails.rails.llm.buffer import get_buffer_strategy
 from nemoguardrails.rails.llm.config import RailsConfig, _get_flow_name
-from nemoguardrails.rails.llm.options import GenerationOptions
+from nemoguardrails.rails.llm.options import GenerationOptions, RailsResult, RailStatus, RailType
 from nemoguardrails.streaming import END_OF_STREAM, StreamingHandler
 from nemoguardrails.tracing.constants import GuardrailsAttributes
 from nemoguardrails.types import LLMModel, LLMResponse, ToolCall
@@ -160,6 +160,44 @@ def _build_assistant_message(content: str, tool_calls: Optional[list[ToolCall]])
     }
 
 
+# TODO: _determine_rails_from_messages and _get_last_content_by_role are duplicated
+# from nemoguardrails.rails.llm.llmrails. They should move to a shared checks-helper
+# module that both engines import, rather than IORails depending on the heavy LLMRails
+# module. Tracked for a future refactor.
+def _determine_rails_from_messages(messages: list[dict]) -> Optional[dict]:
+    """Pick which rails to run from message roles.
+
+    user-only -> input, assistant-only -> output, both -> input and output.
+    Returns ``{"rails": [...]}`` or ``None`` when there is no user/assistant
+    message to check.
+    """
+    roles = {msg.get("role") for msg in reversed(messages)}
-    roles = {msg.get("role") for msg in reversed(messages)}
+    roles = {msg.get("role") for msg in messages}
-    roles = {msg.get("role") for msg in reversed(messages)}
+    roles = {msg.get("role") for msg in messages}
+    has_user = "user" in roles
+    has_assistant = "assistant" in roles
+
+    if not has_user and not has_assistant:
+        log.warning(
+            "check() called with no user or assistant messages. "
+            "Only system, context, or tool messages found. "
+            "Returning passing result without running rails."
+        )
+        return None
+
+    if has_user and has_assistant:
+        return {"rails": ["input", "output"]}
+    if has_user:
+        return {"rails": ["input"]}
+    return {"rails": ["output"]}
+
+
+def _get_last_content_by_role(messages: list[dict], role: str) -> str:
+    """Return the content of the last message with the given role, or ""."""
+    for msg in reversed(messages):
+        if msg.get("role") == role:
+            return msg.get("content", "")
+    return ""
+
+
 class IORails(BaseGuardrails):
     """Workflow engine for accelerated Input/Output rails inference."""
 
@@ -572,6 +610,119 @@ async def _parallel_input_rail_and_response_generation(
         log.debug("[%s] Main LLM response: %s", req_id, truncate(response.content))
         return response
 
+    def check(self, messages: LLMMessages, rail_types: Optional[list[RailType]] = None) -> RailsResult:
+        """Synchronous version of ``check_async``.
+
+        Mirrors ``generate``: spins up a short-lived IORails engine with tracing
+        and metrics disabled and runs the check on it. For production use, prefer
+        the asynchronous ``check_async``.
+        """
+        sync_config = self.config.model_copy(deep=True)
+        if sync_config.tracing is not None:
+            sync_config.tracing.enabled = False
+        if sync_config.metrics is not None:
+            sync_config.metrics.enabled = False
+
+        async def _run_sync_iorails():
+            """Spin up a short-lived IORails engine for one synchronous check call."""
+            async with IORails(sync_config, _report_usage=False) as iorails_engine:
+                return await iorails_engine.check_async(messages, rail_types=rail_types)
+
+        return asyncio.run(_run_sync_iorails())
+
+    async def check_async(self, messages: LLMMessages, rail_types: Optional[list[RailType]] = None) -> RailsResult:
+        """Run input and/or output rails on messages without main-LLM generation.
+
+        When ``rail_types`` is None the rails to run are auto-detected from the
+        message roles (user-only -> input, assistant-only -> output, both ->
+        input and output). When provided, exactly the named rail types run.
+
+        Submitted through the same admission queue as ``generate_async`` so the
+        check path shares non-streaming concurrency limits, request metrics, and
+        the per-request trace span.
+        """
+        await self.start()
+        metrics_ctx = request_metrics() if self._metrics_enabled else nullcontext()
+        with metrics_ctx:
+            try:
+                return await self._generate_async_queue.submit(self._run_check, messages, rail_types)
+            except asyncio.QueueFull:
+                if self._metrics_enabled:
+                    record_nonstream_rejected()
+                raise
+
+    async def _run_check(self, messages: LLMMessages, rail_types: Optional[list[RailType]]) -> RailsResult:
+        """Queue-worker entry for ``check_async``: wrap the rails in a request span."""
+        tracer = self._tracer if self._tracing_enabled else None
+        with traced_request(tracer) as (request_span, req_id):
+            t0 = time.monotonic()
+            try:
+                result = await self._do_check(messages, rail_types, req_id)
+            except Exception:
+                elapsed_ms = (time.monotonic() - t0) * 1000
+                log.error("[%s] check_async failed time=%.1fms", req_id, elapsed_ms, exc_info=True)
+                raise
+            if self._content_capture_enabled:
+                set_request_content(request_span, messages, result.content)
+            elapsed_ms = (time.monotonic() - t0) * 1000
+            log.info(
+                "[%s] check_async completed time=%.1fms status=%s",
+                req_id,
+                elapsed_ms,
+                result.status.value,
+            )
+            return result
+
+    async def _do_check(
+        self,
+        messages: LLMMessages,
+        rail_types: Optional[list[RailType]],
+        req_id: str,
+    ) -> RailsResult:
+        """Core check pipeline: run the requested input/output rails on messages."""
+        log.info("[%s] check_async called", req_id)
+        log.debug("[%s] check_async messages=%s", req_id, truncate(messages))
+
+        if rail_types is not None:
+            rails_to_run = [rail_type.value for rail_type in rail_types]
+        else:
+            determined = _determine_rails_from_messages(messages)
+            if determined is None:
+                last_content = messages[-1].get("content", "") if messages else ""
+                return RailsResult(status=RailStatus.PASSED, content=last_content)
+            rails_to_run = determined["rails"]
+
+        # Content reported on a pass. IORails rails only block or pass; they never
+        # rewrite content, so a passing check returns the checked content unchanged
+        # (RailStatus.MODIFIED never occurs for IORails).
+        if "output" in rails_to_run:
+            pass_content = _get_last_content_by_role(messages, "assistant")
+        else:
+            pass_content = _get_last_content_by_role(messages, "user")
+
+        if "input" in rails_to_run:
+            log.info("[%s] Running input rails", req_id)
+            input_result = await self.rails_manager.is_input_safe(messages)
+            if not input_result.is_safe:
+                log.info("[%s] Input blocked: %s", req_id, input_result.reason)
+                if self._metrics_enabled:
+                    record_request_blocked(RailDirection.INPUT)
+                return RailsResult(status=RailStatus.BLOCKED, content=REFUSAL_MESSAGE, rail=input_result.triggered_rail)
+
+        if "output" in rails_to_run:
+            bot_response = _get_last_content_by_role(messages, "assistant")
+            log.info("[%s] Running output rails", req_id)
+            output_result = await self.rails_manager.is_output_safe(messages, bot_response)
+            if not output_result.is_safe:
+                log.info("[%s] Output blocked: %s", req_id, output_result.reason)
+                if self._metrics_enabled:
+                    record_request_blocked(RailDirection.OUTPUT)
+                return RailsResult(
+                    status=RailStatus.BLOCKED, content=REFUSAL_MESSAGE, rail=output_result.triggered_rail
+                )
+
+        return RailsResult(status=RailStatus.PASSED, content=pass_content)
+
     def _validate_streaming_with_output_rails(self) -> None:
         """Raise if output rails exist but streaming is not enabled for them."""
         if len(self.config.rails.output.flows) > 0 and not self._has_streaming_output_rails:

diff --git a/nemoguardrails/guardrails/rail_action.py b/nemoguardrails/guardrails/rail_action.py
@@ -187,6 +187,19 @@ def _last_user_content(messages: LLMMessages) -> str:
                 return msg["content"]
         raise RuntimeError(f"No user message found in: {messages}")
 
+    @staticmethod
+    def _last_user_content_or_empty(messages: LLMMessages) -> str:
+        """Return the content of the last user message, or "" when there is none.
+
+        Output checks evaluate the bot response; the user prompt only adds context
+        and may legitimately be absent (for example an output-only ``check`` on an
+        assistant message). Unlike :meth:`_last_user_content`, this does not raise.
+        """
+        for msg in reversed(messages):
+            if msg.get("role") == "user" and msg.get("content"):
+                return msg["content"]
+        return ""
+
     @staticmethod
     def _prompt_to_messages(prompt: Union[str, list[dict]]) -> list[dict]:
         """Convert LLMTaskManager render output to role/content message format."""

diff --git a/nemoguardrails/guardrails/rails_manager.py b/nemoguardrails/guardrails/rails_manager.py
@@ -24,6 +24,7 @@
 import asyncio
 import logging
 from collections.abc import Coroutine, Mapping
+from dataclasses import replace
 from typing import TYPE_CHECKING, Any, Optional
 
 from nemoguardrails.guardrails.actions.content_safety_action import (
@@ -166,6 +167,8 @@ async def _run_rail(
         with rail_span(self._tracer, flow, direction) as span:
             action = self._actions[flow]
             result = await action.run(flow, messages, bot_response)
+            if not result.is_safe and result.triggered_rail is None:
+                result = replace(result, triggered_rail=_get_flow_name(flow) or flow)
             mark_rail_stop(span, result.is_safe)
             # Capture rail input + block reason after the action runs.
             # RailAction.run() catches its own exceptions and returns

diff --git a/tests/guardrails/test_content_safety_iorails_actions.py b/tests/guardrails/test_content_safety_iorails_actions.py
@@ -136,6 +136,15 @@ def test_extracts_user_and_bot(self, output_action):
             "bot_response": BOT_RESPONSE,
         }
 
+    def test_extracts_empty_user_when_no_user_message(self, output_action):
+        # Output-only check: no user message -> user_input "" rather than raising,
+        # so the bot response is still checked instead of a spurious error-block.
+        messages = [{"role": "assistant", "content": "earlier reply"}]
+        assert output_action._extract_messages(messages, BOT_RESPONSE) == {
+            "user_input": "",
+            "bot_response": BOT_RESPONSE,
+        }
+
 
 class TestContentSafetyOutputExtractValidation:
     """Test that _extract_messages rejects missing bot_response."""