NVIDIA · sidsingh-nvidia · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026
@@ -2,11 +2,15 @@
 from megatron.core.tokenizers.text.parsers.deepseek_r1_reasoning_parser import (
     DeepSeekR1ReasoningParser,
 )
+from megatron.core.tokenizers.text.parsers.nemotron_v3_reasoning_parser import (
+    NemotronV3ReasoningParser,
+)
 from megatron.core.tokenizers.text.parsers.qwen3_coder_tool_parser import Qwen3CoderToolParser
 
 PARSER_MAPPING = {
     "deepseek-r1-reasoning": DeepSeekR1ReasoningParser,
     "qwen3-coder-tool": Qwen3CoderToolParser,
+    "nemotron-v3-reasoning": NemotronV3ReasoningParser,
 }
 
 __all__ = ["PARSER_MAPPING"]
@@ -0,0 +1,54 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+from megatron.core.tokenizers.text.parsers.deepseek_r1_reasoning_parser import (
+    DeepSeekR1ReasoningParser,
+)
+
+
+class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser):
+    """Parser for NVIDIA Nemotron 3 (Super, Ultra) reasoning output.
+
+    Behaves like `DeepSeekR1ReasoningParser`, except when reasoning is disabled
+    via `enable_thinking=False`, or the caller passes `force_nonempty_content=True`:
+    in that case, if no content would otherwise be returned (either because
+    `</think>` never closes, e.g. reasoning exceeded the max length, or because
+    it closes with nothing following it), the reasoning text is returned as
+    content instead of being discarded, so callers always get a non-empty
+    response.
+    """
+
+    @staticmethod
+    def parse(text: str, **kwargs) -> tuple[str, dict[str, str]]:
+        """Extract reasoning content delimited by `<think>...</think>` tags.
+
+        Args:
+            text (str): The text to parse.
+            enable_thinking (bool, optional): Whether reasoning is enabled for
+                this request. When `False`, reasoning is surfaced as content
+                rather than discarded if there would otherwise be no content.
+            force_nonempty_content (bool, optional): When `True`, reasoning is
+                surfaced as content rather than discarded if there would
+                otherwise be no content.
+
+        Returns:
+            tuple[str, dict[str, str]]: A tuple containing the unprocessed text
+            and a dictionary with the extracted reasoning content.
+        """
+        # Discard anything before the first `<think>`.
+        before, think_open, after = text.partition("<think>")
+        remaining = after if think_open else before
+
+        if "</think>" not in remaining:
+            # No closing tag: treat the remaining text as unterminated reasoning.
+            reasoning_content, content = remaining, ""
+        else:
+            reasoning_content, _, content = remaining.partition("</think>")
+
+        if (
+            content == ""
+            and reasoning_content
+            and (kwargs.get("enable_thinking") is False or kwargs.get("force_nonempty_content") is True)
+        ):
+            content, reasoning_content = reasoning_content, ""
+
+        info = {"reasoning": reasoning_content} if reasoning_content else {}
+        return content, info
diff --git a/tests/unit_tests/tokenizers/test_text_parsers.py b/tests/unit_tests/tokenizers/test_text_parsers.py
@@ -0,0 +1,90 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+"""Parity tests for the `<think>`/`</think>` reasoning parsers.
+
+Ground truth for `NemotronV3ReasoningParser` is derived from vLLM's actual
+implementation (not reimplemented from memory):
+
+- Base extraction: `BaseThinkingReasoningParser.extract_reasoning` in
+  `vllm/reasoning/basic_parsers.py` (used unmodified by `DeepSeekR1ReasoningParser`
+  for non-streaming extraction). Notably `final_content = content or None`, so an
+  empty string after a closing `</think>` collapses to `None`, same as a missing
+  closing tag entirely.
+- Override: `SuperV3ReasoningParser`/`UltraV3ReasoningParser.extract_reasoning` in
+  `super_v3_reasoning_parser.py`/`ultra_v3_reasoning_parser.py` (from
+  huggingface.co/nvidia/NVIDIA-Nemotron-3-{Super,Ultra}-*), which swaps all text
+  into content when `final_content is None` and either `enable_thinking is False`
+  or `force_nonempty_content is True`.
+
+vLLM's `extract_reasoning` returns `(reasoning, content)` with `None` as the
+"absent" sentinel; Megatron's `parse` returns `(content, info)` with `""` as the
+"absent" sentinel and omits the `"reasoning"` key from `info` entirely when
+reasoning is empty. Expected values below are translated accordingly.
+"""
+
+import pytest
+
+from megatron.core.tokenizers.text.parsers import PARSER_MAPPING
+from megatron.core.tokenizers.text.parsers.deepseek_r1_reasoning_parser import (
+    DeepSeekR1ReasoningParser,
+)
+from megatron.core.tokenizers.text.parsers.nemotron_v3_reasoning_parser import (
+    NemotronV3ReasoningParser,
+)
+
+# (text, kwargs, expected_content, expected_info)
+NEMOTRON_V3_CASES = [
+    # No chat_template_kwargs override: behaves exactly like DeepSeekR1ReasoningParser.
+    ("<think>hello", {}, "", {"reasoning": "hello"}),
+    ("<think>hello</think>world", {}, "world", {"reasoning": "hello"}),
+    # Closing tag present but nothing follows it: vLLM's `content or None` treats
+    # this the same as a missing closing tag, so it is empty here too.
+    ("<think>hello</think>", {}, "", {"reasoning": "hello"}),
+    # No `<think>` tag at all: vLLM assumes the whole string is reasoning.
+    ("just an answer", {}, "", {"reasoning": "just an answer"}),
+    # enable_thinking=False surfaces would-be-empty content as the reasoning text,
+    # for both the "unterminated" and "closes with nothing following" cases.
+    ("<think>hello", {"enable_thinking": False}, "hello", {}),
+    ("<think>hello</think>", {"enable_thinking": False}, "hello", {}),
+    # force_nonempty_content=True has the same effect as enable_thinking=False.
+    ("<think>hello</think>", {"force_nonempty_content": True}, "hello", {}),
+    ("<think>hello", {"force_nonempty_content": True}, "hello", {}),
+    # The override only fires when there would otherwise be no content.
+    (
+        "<think>hello</think>world",
+        {"enable_thinking": False},
+        "world",
+        {"reasoning": "hello"},
+    ),
+    # Text preceding `<think>` is discarded, override still applies past it.
+    ("prefix<think>hello</think>", {"enable_thinking": False}, "hello", {}),
+    # enable_thinking=True (or omitted) must not trigger the override.
+    ("<think>hello</think>", {"enable_thinking": True}, "", {"reasoning": "hello"}),
+]
+
+
+@pytest.mark.parametrize("text,kwargs,expected_content,expected_info", NEMOTRON_V3_CASES)
+def test_nemotron_v3_reasoning_parser_matches_vllm(text, kwargs, expected_content, expected_info):
+    content, info = NemotronV3ReasoningParser.parse(text, **kwargs)
+    assert content == expected_content
+    assert info == expected_info
+
+
+@pytest.mark.parametrize(
+    "text",
+    [
+        "<think>hello",
+        "<think>hello</think>world",
+        "<think>hello</think>",
+        "just an answer",
+    ],
+)
+def test_nemotron_v3_reasoning_parser_without_override_matches_deepseek_r1(text):
+    """With no `enable_thinking`/`force_nonempty_content` kwargs, the Nemotron 3
+    parser must be observably identical to the DeepSeek R1 parser it extends."""
+    assert NemotronV3ReasoningParser.parse(text) == DeepSeekR1ReasoningParser.parse(text)
+
+
+def test_parser_mapping_registers_nemotron_v3_reasoning():
+    """Super and Ultra share identical reasoning-extraction logic upstream, so
+    both models are served by a single consolidated parser and registry key."""
+    assert PARSER_MAPPING["nemotron-v3-reasoning"] is NemotronV3ReasoningParser