Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 63 additions & 19 deletions nemoguardrails/library/regex/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,28 @@ def _regex_blocked_mapping(result: RegexDetectionResult) -> bool:
return result.get("is_match", False)


def _get_regex_options(source: str, config: RailsConfig):
"""Return the RegexDetectionOptions for *source*, or None with a warning."""
if source not in ("input", "output", "retrieval"):
raise ValueError("source must be one of 'input', 'output', or 'retrieval'")

regex_config = config.rails.config.regex_detection
if regex_config is None:
log.warning("No regex_detection configuration found.")
return None

options = getattr(regex_config, source, None)
if options is None:
log.warning("No regex rails configuration found for source: %s", source)
return None

if not options.compiled_patterns:
log.debug("No regex patterns specified for source: %s", source)
return None

return options


@action(is_system_action=True, output_mapping=_regex_blocked_mapping)
async def detect_regex_pattern(
source: str,
Expand All @@ -53,23 +75,8 @@ async def detect_regex_pattern(
- text (str): The original text that was checked.
- detections (List[str]): List of pattern strings that matched.
"""
if source not in ("input", "output", "retrieval"):
raise ValueError("source must be one of 'input', 'output', or 'retrieval'")

regex_config = config.rails.config.regex_detection
if regex_config is None:
log.warning("No regex_detection configuration found.")
return RegexDetectionResult(is_match=False, text=text, detections=[])

options = getattr(regex_config, source, None)

options = _get_regex_options(source, config)
if options is None:
log.warning("No regex rails configuration found for source: %s", source)
return RegexDetectionResult(is_match=False, text=text, detections=[])

compiled_patterns = options.compiled_patterns
if not compiled_patterns:
log.debug("No regex patterns specified for source: %s", source)
return RegexDetectionResult(is_match=False, text=text, detections=[])

if not text:
Expand All @@ -78,12 +85,49 @@ async def detect_regex_pattern(

# Match against pre-compiled patterns and collect all matches.
matched: List[str] = []
for compiled, raw_pattern in zip(compiled_patterns, options.patterns):
for compiled, pcfg in zip(options.compiled_patterns, options.normalized_patterns):
if compiled.search(text):
log.info("Regex pattern matched: %s", raw_pattern)
matched.append(raw_pattern)
log.info("Regex pattern matched: %s", pcfg.pattern)
matched.append(pcfg.pattern)

if matched:
return RegexDetectionResult(is_match=True, text=text, detections=matched)

return RegexDetectionResult(is_match=False, text=text, detections=[])


@action(is_system_action=True)
async def redact_regex_pattern(
source: str,
text: str,
config: RailsConfig,
**kwargs,
) -> str:
"""Replace all regex-matched spans with the configured mask token.

Args:
source: The source for the text, i.e. "input", "output", "retrieval".
text: The text to redact.
config: The rails configuration object.

Returns:
The text with every match of every configured pattern replaced by
the mask_token (default ``<REDACTED>``).
"""
options = _get_regex_options(source, config)
if options is None:
return text

if not text:
log.debug("Empty text provided, skipping regex redaction.")
return text

redacted = text
for compiled, pcfg in zip(options.compiled_patterns, options.normalized_patterns):
if compiled.search(redacted):
log.info("Regex pattern redacted: %s", pcfg.pattern)
mask = pcfg.mask_token
# use a lambda to ensure the mask token is treated as a literal string, not a regex
redacted = compiled.sub(lambda _: mask, redacted)

return redacted
24 changes: 21 additions & 3 deletions nemoguardrails/library/regex/flows.co
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,50 @@

flow regex check input
"""Check if the user input matches any forbidden regex patterns."""
$result = await DetectRegexMatchAction(source="input", text=$user_message)
$result = await DetectRegexPatternAction(source="input", text=$user_message)

if $result["is_match"]
bot refuse to respond
abort


flow regex redact input
"""Redact any regex-matched content in the user input."""
global $user_message
$user_message = await RedactRegexPatternAction(source="input", text=$user_message)


# OUTPUT RAILS


flow regex check output
"""Check if the bot output matches any forbidden regex patterns."""
$result = await DetectRegexMatchAction(source="output", text=$bot_message)
$result = await DetectRegexPatternAction(source="output", text=$bot_message)

if $result["is_match"]
bot refuse to respond
abort


flow regex redact output
"""Redact any regex-matched content in the bot output."""
global $bot_message
$bot_message = await RedactRegexPatternAction(source="output", text=$bot_message)


# RETRIEVAL RAILS


flow regex check retrieval
"""Check if the relevant chunks from the knowledge base match any forbidden regex patterns.
"""
$result = await DetectRegexMatchAction(source="retrieval", text=$relevant_chunks)
$result = await DetectRegexPatternAction(source="retrieval", text=$relevant_chunks)

if $result["is_match"]
$relevant_chunks = ""


flow regex redact retrieval
"""Redact any regex-matched content in the retrieved knowledge base chunks."""
global $relevant_chunks
$relevant_chunks = await RedactRegexPatternAction(source="retrieval", text=$relevant_chunks)
15 changes: 15 additions & 0 deletions nemoguardrails/library/regex/flows.v1.co
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ define subflow regex check input
stop


define subflow regex redact input
"""Redact any regex-matched content in the user input."""
$user_message = execute redact_regex_pattern(source="input", text=$user_message)


# OUTPUT RAILS


Expand All @@ -24,6 +29,11 @@ define subflow regex check output
stop


define subflow regex redact output
"""Redact any regex-matched content in the bot output."""
$bot_message = execute redact_regex_pattern(source="output", text=$bot_message)


# RETRIEVAL RAILS


Expand All @@ -33,3 +43,8 @@ define subflow regex check retrieval

if $result["is_match"]
$relevant_chunks = ""


define subflow regex redact retrieval
"""Redact any regex-matched content in the retrieved knowledge base chunks."""
$relevant_chunks = execute redact_regex_pattern(source="retrieval", text=$relevant_chunks)
44 changes: 37 additions & 7 deletions nemoguardrails/rails/llm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,30 +248,55 @@ class SensitiveDataDetection(BaseModel):
)


class RegexPatternConfig(BaseModel):
"""A single regex pattern with an optional per-pattern mask token."""

pattern: str = Field(description="The regex pattern string.")
mask_token: str = Field(
default="<REDACTED>",
description="Replacement token used when redacting this pattern's matches.",
)


class RegexDetectionOptions(BaseModel):
"""Configuration options for regex pattern detection on a specific source."""
"""Configuration options for regex pattern detection on a specific source.

Each entry in ``patterns`` may be a plain string (uses the default
``<REDACTED>`` mask token) or an object with ``pattern`` and ``mask_token``
keys.
"""

patterns: List[str] = Field(
patterns: List[Union[str, RegexPatternConfig]] = Field(
default_factory=list,
description="List of regex patterns to match against the text.",
description="List of regex patterns (strings or objects with pattern/mask_token).",
)
case_insensitive: bool = Field(
default=False,
description="Whether to perform case-insensitive matching.",
)

_normalized_patterns: List[RegexPatternConfig] = PrivateAttr(default_factory=list)
_compiled_patterns: List["re.Pattern[str]"] = PrivateAttr(default_factory=list)

@model_validator(mode="after")
def compile_patterns(self) -> "RegexDetectionOptions":
"""Pre-compile regex patterns at config load time."""
"""Normalize plain strings to RegexPatternConfig and pre-compile."""
normalized: List[RegexPatternConfig] = []
for entry in self.patterns:
if isinstance(entry, str):
normalized.append(RegexPatternConfig(pattern=entry))
else:
normalized.append(entry)

flags = re.IGNORECASE if self.case_insensitive else 0
compiled = []
for i, pattern in enumerate(self.patterns):
for i, cfg in enumerate(normalized):
try:
compiled.append(re.compile(pattern, flags))
compiled.append(re.compile(cfg.pattern, flags))
except re.error as e:
raise ValueError(f"Invalid regex pattern at index {i} ({pattern!r}): {e}") from e
raise ValueError(f"Invalid regex pattern at index {i} ({cfg.pattern!r}): {e}") from e

object.__setattr__(self, "_normalized_patterns", normalized)
object.__setattr__(self, "_compiled_patterns", compiled)
return self

Expand All @@ -280,6 +305,11 @@ def compiled_patterns(self) -> List["re.Pattern[str]"]:
"""Return the pre-compiled regex patterns."""
return self._compiled_patterns

@property
def normalized_patterns(self) -> List[RegexPatternConfig]:
"""Return the normalized pattern configs."""
return self._normalized_patterns


class RegexDetection(BaseModel):
"""Configuration for regex pattern detection."""
Expand Down
Loading
Loading