DataDog · vitor-de-araujo · Apr 1, 2025 · Mar 26, 2025 · Mar 26, 2025 · Mar 26, 2025
@@ -4,6 +4,7 @@
 import pytest
 
 from ddtrace.contrib.internal.pytest._retry_utils import RetryOutcomes
+from ddtrace.contrib.internal.pytest._retry_utils import RetryTestReport
 from ddtrace.contrib.internal.pytest._retry_utils import _get_outcome_from_retry
 from ddtrace.contrib.internal.pytest._retry_utils import _get_retry_attempt_string
 from ddtrace.contrib.internal.pytest._retry_utils import set_retry_num
@@ -79,13 +80,14 @@ def atr_handle_retries(
         return
 
     atr_outcome = _atr_do_retries(item, outcomes)
+    longrepr = InternalTest.stash_get(test_id, "failure_longrepr")
 
-    final_report = pytest_TestReport(
+    final_report = RetryTestReport(
         nodeid=item.nodeid,
         location=item.location,
         keywords=item.keywords,
         when="call",
-        longrepr=None,
+        longrepr=longrepr,
         outcome=final_outcomes[atr_outcome],
     )
     item.ihook.pytest_runtest_logreport(report=final_report)

@@ -4,6 +4,7 @@
 import pytest
 
 from ddtrace.contrib.internal.pytest._retry_utils import RetryOutcomes
+from ddtrace.contrib.internal.pytest._retry_utils import RetryTestReport
 from ddtrace.contrib.internal.pytest._retry_utils import _get_outcome_from_retry
 from ddtrace.contrib.internal.pytest._retry_utils import _get_retry_attempt_string
 from ddtrace.contrib.internal.pytest._retry_utils import set_retry_num
@@ -59,13 +60,14 @@ def attempt_to_fix_handle_retries(
         return
 
     retries_outcome = _do_retries(item, outcomes)
+    longrepr = InternalTest.stash_get(test_id, "failure_longrepr")
 
-    final_report = pytest_TestReport(
+    final_report = RetryTestReport(
         nodeid=item.nodeid,
         location=item.location,
         keywords=item.keywords,
         when="call",
-        longrepr=None,
+        longrepr=longrepr,
         outcome=final_outcomes[retries_outcome],
     )
 

@@ -4,6 +4,7 @@
 import pytest
 
 from ddtrace.contrib.internal.pytest._retry_utils import RetryOutcomes
+from ddtrace.contrib.internal.pytest._retry_utils import RetryTestReport
 from ddtrace.contrib.internal.pytest._retry_utils import _get_outcome_from_retry
 from ddtrace.contrib.internal.pytest._retry_utils import _get_retry_attempt_string
 from ddtrace.contrib.internal.pytest._retry_utils import set_retry_num
@@ -85,13 +86,14 @@ def efd_handle_retries(
             InternalTest.mark_skip(test_id)
 
     efd_outcome = _efd_do_retries(item)
+    longrepr = InternalTest.stash_get(test_id, "failure_longrepr")
 
-    final_report = pytest_TestReport(
+    final_report = RetryTestReport(
         nodeid=item.nodeid,
         location=item.location,
         keywords=item.keywords,
         when="call",
-        longrepr=None,
+        longrepr=longrepr,
         outcome=_FINAL_OUTCOMES[efd_outcome],
     )
     item.ihook.pytest_runtest_logreport(report=final_report)

@@ -528,6 +528,9 @@ def _pytest_runtest_makereport(item: pytest.Item, call: pytest_CallInfo, outcome
         # (see <https://github.com/pytest-dev/pytest/blob/8.3.x/src/_pytest/main.py#L654>).
         original_result.outcome = OUTCOME_QUARANTINED
 
+    if original_result.failed or original_result.skipped:
+        InternalTest.stash_set(test_id, "failure_longrepr", original_result.longrepr)
+
     # ATR and EFD retry tests only if their teardown succeeded to ensure the best chance the retry will succeed
     # NOTE: this mutates the original result's outcome
     if InternalTest.stash_get(test_id, "setup_failed") or InternalTest.stash_get(test_id, "teardown_failed"):

@@ -8,6 +8,7 @@
 from _pytest.runner import CallInfo
 import pytest
 
+from ddtrace.contrib.internal.pytest._types import pytest_TestReport
 from ddtrace.contrib.internal.pytest._types import tmppath_result_key
 from ddtrace.contrib.internal.pytest._utils import _TestOutcome
 from ddtrace.ext.test_visibility.api import TestExcInfo
@@ -128,3 +129,33 @@ def _retry_run_when(item, when, outcomes: RetryOutcomes) -> t.Tuple[CallInfo, _p
     if when == "call" or "passed" not in report.outcome:
         item.ihook.pytest_runtest_logreport(report=report)
     return call, report
+
+
+class RetryTestReport(pytest_TestReport):
+    """
+    A RetryTestReport behaves just like a normal pytest TestReport, except that the the failed/passed/skipped
+    properties are aware of retry final states (dd_efd_final_*, etc). This affects the test counts in JUnit XML output,
+    for instance.
+
+    The object should be initialized with the `longrepr` of the _initial_ test attempt. A `longrepr` set to `None` means
+    the initial attempt either succeeded (which means it was already counted by pytest) or was quarantined (which means
+    we should not count it at all), so we don't need to count it here.
+    """
+
+    @property
+    def failed(self):
+        if self.longrepr is None:
+            return False
+        return "final_failed" in self.outcome
+
+    @property
+    def passed(self):
+        if self.longrepr is None:
+            return False
+        return "final_passed" in self.outcome or "final_flaky" in self.outcome
+
+    @property
+    def skipped(self):
+        if self.longrepr is None:
+            return False
+        return "final_skipped" in self.outcome
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    CI Visibility: This fix resolves an issue where JUnit XML output would not count tests retried by Early Flake
+    Detection, Auto Test Retries, and Attempt-to-Fix.
@@ -7,6 +7,7 @@
 - The session object is patched to never be a faulty session, by default.
 """
 from unittest import mock
+from xml.etree import ElementTree
 
 import pytest
 
@@ -315,3 +316,22 @@ def test_pytest_atr_does_not_retry_failed_setup_or_teardown(self):
 
         assert rec.ret == 1
         assert len(spans) == 5
+
+    def test_pytest_atr_junit_xml(self):
+        self.testdir.makepyfile(test_pass=_TEST_PASS_CONTENT)
+        self.testdir.makepyfile(test_fail=_TEST_FAIL_CONTENT)
+        self.testdir.makepyfile(test_errors=_TEST_ERRORS_CONTENT)
+        self.testdir.makepyfile(test_pass_on_retries=_TEST_PASS_ON_RETRIES_CONTENT)
+        self.testdir.makepyfile(test_skip=_TEST_SKIP_CONTENT)
+
+        rec = self.inline_run("--ddtrace", "--junit-xml=out.xml")
+        assert rec.ret == 1
+
+        test_suite = ElementTree.parse(f"{self.testdir}/out.xml").find("testsuite")
+
+        # There are 15 tests, but we get 16 in the JUnit XML output, because a test that passes during call but fails
+        # during teardown is counted twice. This is a bug in pytest, not ddtrace.
+        assert test_suite.attrib["tests"] == "16"
+        assert test_suite.attrib["failures"] == "4"
+        assert test_suite.attrib["skipped"] == "4"
+        assert test_suite.attrib["errors"] == "2"
@@ -7,6 +7,7 @@
 - The session object is patched to never be a faulty session, by default.
 """
 from unittest import mock
+from xml.etree import ElementTree
 
 import pytest
 
@@ -285,3 +286,17 @@ def test_pytest_efd_does_not_retry_failed_teardown(self):
         assert fails_teardown_spans[0].get_tag("test.is_retry") != "true"
         assert rec.ret == 1
         assert len(spans) == 7
+
+    def test_pytest_efd_junit_xml(self):
+        self.testdir.makepyfile(test_known_pass=_TEST_KNOWN_PASS_CONTENT)
+        self.testdir.makepyfile(test_known_fail=_TEST_KNOWN_FAIL_CONTENT)
+        self.testdir.makepyfile(test_new_pass=_TEST_NEW_PASS_CONTENT)
+        self.testdir.makepyfile(test_new_fail=_TEST_NEW_FAIL_CONTENT)
+        self.testdir.makepyfile(test_new_flaky=_TEST_NEW_FLAKY_CONTENT)
+
+        rec = self.inline_run("--ddtrace", "--junit-xml=out.xml")
+        assert rec.ret == 1
+
+        test_suite = ElementTree.parse(f"{self.testdir}/out.xml").find("testsuite")
+        assert test_suite.attrib["tests"] == "7"
+        assert test_suite.attrib["failures"] == "3"