NVIDIA-NeMo · nac7 · Jun 6, 2026 · Jun 6, 2026 · Jun 6, 2026 · Jun 6, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,6 +20,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 - *(llm)* Add LangChain adapter and framework registry ([#1759](https://github.com/NVIDIA-NeMo/Guardrails/issues/1759))
 - *(llm)* Add streaming tool call accumulation and LLMResponse parity ([#1789](https://github.com/NVIDIA-NeMo/Guardrails/issues/1789))
 - *(llm)* Add default framework with OpenAI-compatible client ([#1797](https://github.com/NVIDIA-NeMo/Guardrails/issues/1797))
+- *(llm)* Add prompt injection detection with configurable sensitivity levels ([#1979](https://github.com/NVIDIA-NeMo/Guardrails/issues/1979))
 - *(llm/frameworks)* Validate framework on registration ([#1863](https://github.com/NVIDIA-NeMo/Guardrails/issues/1863))
 - *(types)* Add framework-agnostic LLM type system ([#1745](https://github.com/NVIDIA-NeMo/Guardrails/issues/1745))
 - *(compat)* Transitional compat layer to migrate from 0.21 to 0.22+ ([#1841](https://github.com/NVIDIA-NeMo/Guardrails/issues/1841))

diff --git a/nemoguardrails/__init__.py b/nemoguardrails/__init__.py
@@ -64,6 +64,7 @@
     set_default_framework,
 )
 from nemoguardrails.llm.providers import register_provider  # noqa: E402
+from nemoguardrails.rails.llm.injections import PromptInjectionDetectedError  # noqa: E402
 from nemoguardrails.types import (  # noqa: E402
     ChatMessage,
     FinishReason,
@@ -92,6 +93,7 @@
     "ToolCall",
     "ToolCallFunction",
     "UsageInfo",
+    "PromptInjectionDetectedError",
     "get_default_framework",
     "register_framework",
     "register_provider",

diff --git a/nemoguardrails/guardrails/guardrails.py b/nemoguardrails/guardrails/guardrails.py
@@ -37,6 +37,7 @@
 from nemoguardrails.guardrails.iorails import IORails
 from nemoguardrails.logging.explain import ExplainInfo
 from nemoguardrails.rails.llm.config import RailsConfig
+from nemoguardrails.rails.llm.injections import PromptInjectionDetectedError, validate_prompt_safety
 from nemoguardrails.rails.llm.llmrails import LLMRails
 from nemoguardrails.rails.llm.options import GenerationResponse, RailsResult, RailType
 from nemoguardrails.types import LLMModel
@@ -210,6 +211,17 @@ def generate(
         """Generate an LLM response synchronously with guardrails applied.
         Supported in both IORails and LLMRails
         """
+        # Validate input for prompt injection attempts if enabled
+        if self.config.injection_detection_enabled:
+            try:
+                validate_prompt_safety(
+                    prompt=prompt,
+                    messages=messages,
+                    sensitivity=self.config.injection_detection_sensitivity,
+                )
+            except PromptInjectionDetectedError as e:
+                log.warning(f"Prompt injection attempt blocked: {e}")
+                raise
 
         generate_messages = self._convert_to_messages(prompt, messages)
         return self.rails_engine.generate(messages=generate_messages, **kwargs)
@@ -238,6 +250,18 @@ async def generate_async(
         """Generate an LLM response asynchronously with guardrails applied.
         Supported by both LLMRails and IORails
         """
+        # Validate input for prompt injection attempts if enabled
+        if self.config.injection_detection_enabled:
+            try:
+                validate_prompt_safety(
+                    prompt=prompt,
+                    messages=messages,
+                    sensitivity=self.config.injection_detection_sensitivity,
+                )
+            except PromptInjectionDetectedError as e:
+                log.warning(f"Prompt injection attempt blocked: {e}")
+                raise
+
         await self._ensure_started()
 
         generate_messages = self._convert_to_messages(prompt, messages)
@@ -247,6 +271,17 @@ def stream_async(
         self, prompt: str | None = None, messages: LLMMessages | None = None, **kwargs
     ) -> AsyncIterator[str | dict]:
         """Generate an LLM response asynchronously with streaming support."""
+        # Validate input for prompt injection attempts if enabled
+        if self.config.injection_detection_enabled:
+            try:
+                validate_prompt_safety(
+                    prompt=prompt,
+                    messages=messages,
+                    sensitivity=self.config.injection_detection_sensitivity,
+                )
+            except PromptInjectionDetectedError as e:
+                log.warning(f"Prompt injection attempt blocked: {e}")
+                raise
 
         stream_messages = self._convert_to_messages(prompt, messages)
 
@@ -320,6 +355,9 @@ async def generate_events_async(self, events: List[dict]) -> List[dict]:
         """Generate the next events based on the provided history.
         Only supported for LLMRails.
         """
+        if self.config.injection_detection_enabled:
+            self._scan_events_for_injection(events)
+
         if isinstance(self.rails_engine, IORails):
             raise NotImplementedError("IORails doesn't support generate_events_async()")
 
@@ -330,12 +368,41 @@ def generate_events(self, events: List[dict]) -> List[dict]:
         """Synchronous version of generate_events_async.
         Only supported for LLMRails.
         """
+        if self.config.injection_detection_enabled:
+            self._scan_events_for_injection(events)
+
         if isinstance(self.rails_engine, IORails):
             raise NotImplementedError("IORails doesn't support generate_events()")
 
         llmrails = cast(LLMRails, self.rails_engine)
         return llmrails.generate_events(events)
 
+    def _scan_events_for_injection(self, events: List[dict]) -> None:
+        """Scan user-input events for prompt injection and raise if one is found.
+
+        Inspects UserMessage (Colang 1.0) and UtteranceUserActionFinished (Colang 2.x)
+        events, which carry raw user text that could contain injection payloads.
+        """
+        for event in events:
+            if not isinstance(event, dict):
+                continue
+            event_type = event.get("type", "")
+            if event_type == "UserMessage":
+                text = event.get("text")
+            elif event_type == "UtteranceUserActionFinished":
+                text = event.get("final_transcript")
+            else:
+                continue
+            if text and isinstance(text, str):
+                try:
+                    validate_prompt_safety(
+                        prompt=text,
+                        sensitivity=self.config.injection_detection_sensitivity,
+                    )
+                except PromptInjectionDetectedError as e:
+                    log.warning(f"Prompt injection attempt blocked: {e}")
+                    raise
+
     async def process_events_async(
         self,
         events: List[dict],
@@ -345,6 +412,9 @@ async def process_events_async(
         """Process a sequence of events in a given state.
         Only supported for LLMRails.
         """
+        if self.config.injection_detection_enabled:
+            self._scan_events_for_injection(events)
+
         if isinstance(self.rails_engine, IORails):
             raise NotImplementedError("IORails doesn't support process_events_async()")
 
@@ -360,6 +430,9 @@ def process_events(
         """Synchronous version of process_events_async.
         Only supported for LLMRails.
         """
+        if self.config.injection_detection_enabled:
+            self._scan_events_for_injection(events)
+
         if isinstance(self.rails_engine, IORails):
             raise NotImplementedError("IORails doesn't support process_events()")
 
@@ -374,6 +447,17 @@ async def check_async(
         """Run rails on messages based on their content (asynchronous).
         Only supported for LLMRails.
         """
+        # Validate input for prompt injection attempts if enabled
+        if self.config.injection_detection_enabled:
+            try:
+                validate_prompt_safety(
+                    messages=messages,
+                    sensitivity=self.config.injection_detection_sensitivity,
+                )
+            except PromptInjectionDetectedError as e:
+                log.warning(f"Prompt injection attempt blocked: {e}")
+                raise
+
         if isinstance(self.rails_engine, IORails):
             raise NotImplementedError("IORails doesn't support check_async()")
 
@@ -388,6 +472,17 @@ def check(
         """Synchronous version of check_async.
         Only supported for LLMRails.
         """
+        # Validate input for prompt injection attempts if enabled
+        if self.config.injection_detection_enabled:
+            try:
+                validate_prompt_safety(
+                    messages=messages,
+                    sensitivity=self.config.injection_detection_sensitivity,
+                )
+            except PromptInjectionDetectedError as e:
+                log.warning(f"Prompt injection attempt blocked: {e}")
+                raise
+
         if isinstance(self.rails_engine, IORails):
             raise NotImplementedError("IORails doesn't support check()")
 

diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py
@@ -1805,6 +1805,20 @@ class RailsConfig(BaseModel):
         description="Configuration for OTEL metrics emission (independent of tracing).",
     )
 
+    injection_detection_enabled: bool = Field(
+        default=True,
+        description="Whether to enable prompt injection detection. When disabled, no injection checks are performed.",
+    )
+
+    injection_detection_sensitivity: Literal["low", "medium", "high"] = Field(
+        default="medium",
+        description="Sensitivity level for prompt injection detection. "
+        "'low': catches critical patterns only, "
+        "'medium': catches moderate and critical patterns, "
+        "'high': catches all patterns including advanced techniques. "
+        "Use 'low' to reduce false positives in coding/developer-facing contexts.",
+    )
+
     @root_validator(pre=True)
     def check_model_exists_for_input_rails(cls, values):
         """Make sure we have a model for each input rail where one is provided using $model=<model_type>"""