Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nemoguardrails/guardrails/actions/content_safety_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _extract_messages(self, messages: LLMMessages, bot_response: Optional[str])
if not bot_response:
raise RuntimeError("bot_response is required for content safety output check")
return {
"user_input": self._last_user_content(messages),
"user_input": self._last_user_content_or_empty(messages),
"bot_response": bot_response,
}

Expand Down
17 changes: 5 additions & 12 deletions nemoguardrails/guardrails/guardrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,27 +372,20 @@ async def check_async(
rail_types: Optional[List[RailType]] = None,
) -> RailsResult:
"""Run rails on messages based on their content (asynchronous).
Only supported for LLMRails.
Supported by both LLMRails and IORails.
"""
if isinstance(self.rails_engine, IORails):
raise NotImplementedError("IORails doesn't support check_async()")

llmrails = cast(LLMRails, self.rails_engine)
return await llmrails.check_async(messages, rail_types=rail_types)
await self._ensure_started()
return await self.rails_engine.check_async(messages, rail_types=rail_types)

def check(
self,
messages: List[dict],
rail_types: Optional[List[RailType]] = None,
) -> RailsResult:
"""Synchronous version of check_async.
Only supported for LLMRails.
Supported by both LLMRails and IORails.
"""
if isinstance(self.rails_engine, IORails):
raise NotImplementedError("IORails doesn't support check()")

llmrails = cast(LLMRails, self.rails_engine)
return llmrails.check(messages, rail_types=rail_types)
return self.rails_engine.check(messages, rail_types=rail_types)

def register_action(self, action: Callable, name: Optional[str] = None) -> Self:
"""Register a custom action for the rails configuration.
Expand Down
1 change: 1 addition & 0 deletions nemoguardrails/guardrails/guardrails_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class RailResult:

is_safe: bool
reason: str | None = None
triggered_rail: str | None = None


# Default max character length for truncate(). Used to keep DEBUG log lines short.
Expand Down
153 changes: 152 additions & 1 deletion nemoguardrails/guardrails/iorails.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
from nemoguardrails.llm.taskmanager import LLMTaskManager
from nemoguardrails.rails.llm.buffer import get_buffer_strategy
from nemoguardrails.rails.llm.config import RailsConfig, _get_flow_name
from nemoguardrails.rails.llm.options import GenerationOptions
from nemoguardrails.rails.llm.options import GenerationOptions, RailsResult, RailStatus, RailType
from nemoguardrails.streaming import END_OF_STREAM, StreamingHandler
from nemoguardrails.tracing.constants import GuardrailsAttributes
from nemoguardrails.types import LLMModel, LLMResponse, ToolCall
Expand Down Expand Up @@ -160,6 +160,44 @@ def _build_assistant_message(content: str, tool_calls: Optional[list[ToolCall]])
}


# TODO: _determine_rails_from_messages and _get_last_content_by_role are duplicated
# from nemoguardrails.rails.llm.llmrails. They should move to a shared checks-helper
# module that both engines import, rather than IORails depending on the heavy LLMRails
# module. Tracked for a future refactor.
def _determine_rails_from_messages(messages: list[dict]) -> Optional[dict]:
"""Pick which rails to run from message roles.

user-only -> input, assistant-only -> output, both -> input and output.
Returns ``{"rails": [...]}`` or ``None`` when there is no user/assistant
message to check.
"""
roles = {msg.get("role") for msg in reversed(messages)}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The reversed() call inside a set comprehension is a no-op — set construction is order-independent — and just allocates an extra iterator. A plain for msg in messages produces an identical set.

Suggested change
roles = {msg.get("role") for msg in reversed(messages)}
roles = {msg.get("role") for msg in messages}
Prompt To Fix With AI
This is a comment left during a code review.
Path: nemoguardrails/guardrails/iorails.py
Line: 174

Comment:
The `reversed()` call inside a set comprehension is a no-op — set construction is order-independent — and just allocates an extra iterator. A plain `for msg in messages` produces an identical set.

```suggestion
    roles = {msg.get("role") for msg in messages}
```

How can I resolve this? If you propose a fix, please make it concise.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

has_user = "user" in roles
has_assistant = "assistant" in roles

if not has_user and not has_assistant:
log.warning(
"check() called with no user or assistant messages. "
"Only system, context, or tool messages found. "
"Returning passing result without running rails."
)
return None

if has_user and has_assistant:
return {"rails": ["input", "output"]}
if has_user:
return {"rails": ["input"]}
return {"rails": ["output"]}


def _get_last_content_by_role(messages: list[dict], role: str) -> str:
"""Return the content of the last message with the given role, or ""."""
for msg in reversed(messages):
if msg.get("role") == role:
return msg.get("content", "")
return ""


class IORails(BaseGuardrails):
"""Workflow engine for accelerated Input/Output rails inference."""

Expand Down Expand Up @@ -572,6 +610,119 @@ async def _parallel_input_rail_and_response_generation(
log.debug("[%s] Main LLM response: %s", req_id, truncate(response.content))
return response

def check(self, messages: LLMMessages, rail_types: Optional[list[RailType]] = None) -> RailsResult:
"""Synchronous version of ``check_async``.

Mirrors ``generate``: spins up a short-lived IORails engine with tracing
and metrics disabled and runs the check on it. For production use, prefer
the asynchronous ``check_async``.
"""
sync_config = self.config.model_copy(deep=True)
if sync_config.tracing is not None:
sync_config.tracing.enabled = False
if sync_config.metrics is not None:
sync_config.metrics.enabled = False

async def _run_sync_iorails():
"""Spin up a short-lived IORails engine for one synchronous check call."""
async with IORails(sync_config, _report_usage=False) as iorails_engine:
return await iorails_engine.check_async(messages, rail_types=rail_types)

return asyncio.run(_run_sync_iorails())

async def check_async(self, messages: LLMMessages, rail_types: Optional[list[RailType]] = None) -> RailsResult:
"""Run input and/or output rails on messages without main-LLM generation.

When ``rail_types`` is None the rails to run are auto-detected from the
message roles (user-only -> input, assistant-only -> output, both ->
input and output). When provided, exactly the named rail types run.

Submitted through the same admission queue as ``generate_async`` so the
check path shares non-streaming concurrency limits, request metrics, and
the per-request trace span.
"""
await self.start()
metrics_ctx = request_metrics() if self._metrics_enabled else nullcontext()
with metrics_ctx:
try:
return await self._generate_async_queue.submit(self._run_check, messages, rail_types)
except asyncio.QueueFull:
if self._metrics_enabled:
record_nonstream_rejected()
raise

async def _run_check(self, messages: LLMMessages, rail_types: Optional[list[RailType]]) -> RailsResult:
"""Queue-worker entry for ``check_async``: wrap the rails in a request span."""
tracer = self._tracer if self._tracing_enabled else None
with traced_request(tracer) as (request_span, req_id):
t0 = time.monotonic()
try:
result = await self._do_check(messages, rail_types, req_id)
except Exception:
elapsed_ms = (time.monotonic() - t0) * 1000
log.error("[%s] check_async failed time=%.1fms", req_id, elapsed_ms, exc_info=True)
raise
if self._content_capture_enabled:
set_request_content(request_span, messages, result.content)
elapsed_ms = (time.monotonic() - t0) * 1000
log.info(
"[%s] check_async completed time=%.1fms status=%s",
req_id,
elapsed_ms,
result.status.value,
)
return result

async def _do_check(
self,
messages: LLMMessages,
rail_types: Optional[list[RailType]],
req_id: str,
) -> RailsResult:
"""Core check pipeline: run the requested input/output rails on messages."""
log.info("[%s] check_async called", req_id)
log.debug("[%s] check_async messages=%s", req_id, truncate(messages))

if rail_types is not None:
rails_to_run = [rail_type.value for rail_type in rail_types]
else:
determined = _determine_rails_from_messages(messages)
if determined is None:
last_content = messages[-1].get("content", "") if messages else ""
return RailsResult(status=RailStatus.PASSED, content=last_content)
rails_to_run = determined["rails"]

# Content reported on a pass. IORails rails only block or pass; they never
# rewrite content, so a passing check returns the checked content unchanged
# (RailStatus.MODIFIED never occurs for IORails).
if "output" in rails_to_run:
pass_content = _get_last_content_by_role(messages, "assistant")
else:
pass_content = _get_last_content_by_role(messages, "user")

if "input" in rails_to_run:
log.info("[%s] Running input rails", req_id)
input_result = await self.rails_manager.is_input_safe(messages)
if not input_result.is_safe:
log.info("[%s] Input blocked: %s", req_id, input_result.reason)
if self._metrics_enabled:
record_request_blocked(RailDirection.INPUT)
return RailsResult(status=RailStatus.BLOCKED, content=REFUSAL_MESSAGE, rail=input_result.triggered_rail)

if "output" in rails_to_run:
bot_response = _get_last_content_by_role(messages, "assistant")
log.info("[%s] Running output rails", req_id)
output_result = await self.rails_manager.is_output_safe(messages, bot_response)
if not output_result.is_safe:
log.info("[%s] Output blocked: %s", req_id, output_result.reason)
if self._metrics_enabled:
record_request_blocked(RailDirection.OUTPUT)
return RailsResult(
status=RailStatus.BLOCKED, content=REFUSAL_MESSAGE, rail=output_result.triggered_rail
)
Comment on lines +712 to +722

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 False-positive block for output-only check with no assistant message

When rail_types=[RailType.OUTPUT] is requested (explicitly or auto-detected) but the messages contain no assistant message, _get_last_content_by_role(messages, "assistant") returns "". ContentSafetyOutputAction._extract_messages then raises RuntimeError("bot_response is required …"), which RailAction.run() catches and converts to RailResult(is_safe=False). _do_check then returns RailsResult(status=BLOCKED, content=REFUSAL_MESSAGE) — a false-positive block indistinguishable from a real safety verdict. Adding an early guard (if not bot_response: return RailsResult(status=RailStatus.PASSED, content="") or raising ValueError) before calling is_output_safe would surface the right outcome.

Prompt To Fix With AI
This is a comment left during a code review.
Path: nemoguardrails/guardrails/iorails.py
Line: 712-722

Comment:
**False-positive block for output-only check with no assistant message**

When `rail_types=[RailType.OUTPUT]` is requested (explicitly or auto-detected) but the messages contain no assistant message, `_get_last_content_by_role(messages, "assistant")` returns `""`. `ContentSafetyOutputAction._extract_messages` then raises `RuntimeError("bot_response is required …")`, which `RailAction.run()` catches and converts to `RailResult(is_safe=False)`. `_do_check` then returns `RailsResult(status=BLOCKED, content=REFUSAL_MESSAGE)` — a false-positive block indistinguishable from a real safety verdict. Adding an early guard (`if not bot_response: return RailsResult(status=RailStatus.PASSED, content="")` or raising `ValueError`) before calling `is_output_safe` would surface the right outcome.

How can I resolve this? If you propose a fix, please make it concise.


return RailsResult(status=RailStatus.PASSED, content=pass_content)

def _validate_streaming_with_output_rails(self) -> None:
"""Raise if output rails exist but streaming is not enabled for them."""
if len(self.config.rails.output.flows) > 0 and not self._has_streaming_output_rails:
Expand Down
13 changes: 13 additions & 0 deletions nemoguardrails/guardrails/rail_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,19 @@ def _last_user_content(messages: LLMMessages) -> str:
return msg["content"]
raise RuntimeError(f"No user message found in: {messages}")

@staticmethod
def _last_user_content_or_empty(messages: LLMMessages) -> str:
"""Return the content of the last user message, or "" when there is none.

Output checks evaluate the bot response; the user prompt only adds context
and may legitimately be absent (for example an output-only ``check`` on an
assistant message). Unlike :meth:`_last_user_content`, this does not raise.
"""
for msg in reversed(messages):
if msg.get("role") == "user" and msg.get("content"):
return msg["content"]
return ""

@staticmethod
def _prompt_to_messages(prompt: Union[str, list[dict]]) -> list[dict]:
"""Convert LLMTaskManager render output to role/content message format."""
Expand Down
3 changes: 3 additions & 0 deletions nemoguardrails/guardrails/rails_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import asyncio
import logging
from collections.abc import Coroutine, Mapping
from dataclasses import replace
from typing import TYPE_CHECKING, Any, Optional

from nemoguardrails.guardrails.actions.content_safety_action import (
Expand Down Expand Up @@ -166,6 +167,8 @@ async def _run_rail(
with rail_span(self._tracer, flow, direction) as span:
action = self._actions[flow]
result = await action.run(flow, messages, bot_response)
if not result.is_safe and result.triggered_rail is None:
result = replace(result, triggered_rail=_get_flow_name(flow) or flow)
mark_rail_stop(span, result.is_safe)
# Capture rail input + block reason after the action runs.
# RailAction.run() catches its own exceptions and returns
Expand Down
9 changes: 9 additions & 0 deletions tests/guardrails/test_content_safety_iorails_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,15 @@ def test_extracts_user_and_bot(self, output_action):
"bot_response": BOT_RESPONSE,
}

def test_extracts_empty_user_when_no_user_message(self, output_action):
# Output-only check: no user message -> user_input "" rather than raising,
# so the bot response is still checked instead of a spurious error-block.
messages = [{"role": "assistant", "content": "earlier reply"}]
assert output_action._extract_messages(messages, BOT_RESPONSE) == {
"user_input": "",
"bot_response": BOT_RESPONSE,
}


class TestContentSafetyOutputExtractValidation:
"""Test that _extract_messages rejects missing bot_response."""
Expand Down
Loading
Loading