Skip to content

fix(ci_visibility): count failed/skipped tests in JUnit XML when retries are enabled [backport 3.3] #12898

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions ddtrace/contrib/internal/pytest/_atr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from ddtrace.contrib.internal.pytest._retry_utils import RetryOutcomes
from ddtrace.contrib.internal.pytest._retry_utils import RetryTestReport
from ddtrace.contrib.internal.pytest._retry_utils import _get_outcome_from_retry
from ddtrace.contrib.internal.pytest._retry_utils import _get_retry_attempt_string
from ddtrace.contrib.internal.pytest._retry_utils import set_retry_num
Expand Down Expand Up @@ -79,13 +80,14 @@ def atr_handle_retries(
return

atr_outcome = _atr_do_retries(item, outcomes)
longrepr = InternalTest.stash_get(test_id, "failure_longrepr")

final_report = pytest_TestReport(
final_report = RetryTestReport(
nodeid=item.nodeid,
location=item.location,
keywords=item.keywords,
when="call",
longrepr=None,
longrepr=longrepr,
outcome=final_outcomes[atr_outcome],
)
item.ihook.pytest_runtest_logreport(report=final_report)
Expand Down
6 changes: 4 additions & 2 deletions ddtrace/contrib/internal/pytest/_attempt_to_fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from ddtrace.contrib.internal.pytest._retry_utils import RetryOutcomes
from ddtrace.contrib.internal.pytest._retry_utils import RetryTestReport
from ddtrace.contrib.internal.pytest._retry_utils import _get_outcome_from_retry
from ddtrace.contrib.internal.pytest._retry_utils import _get_retry_attempt_string
from ddtrace.contrib.internal.pytest._retry_utils import set_retry_num
Expand Down Expand Up @@ -59,13 +60,14 @@ def attempt_to_fix_handle_retries(
return

retries_outcome = _do_retries(item, outcomes)
longrepr = InternalTest.stash_get(test_id, "failure_longrepr")

final_report = pytest_TestReport(
final_report = RetryTestReport(
nodeid=item.nodeid,
location=item.location,
keywords=item.keywords,
when="call",
longrepr=None,
longrepr=longrepr,
outcome=final_outcomes[retries_outcome],
)

Expand Down
6 changes: 4 additions & 2 deletions ddtrace/contrib/internal/pytest/_efd_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from ddtrace.contrib.internal.pytest._retry_utils import RetryOutcomes
from ddtrace.contrib.internal.pytest._retry_utils import RetryTestReport
from ddtrace.contrib.internal.pytest._retry_utils import _get_outcome_from_retry
from ddtrace.contrib.internal.pytest._retry_utils import _get_retry_attempt_string
from ddtrace.contrib.internal.pytest._retry_utils import set_retry_num
Expand Down Expand Up @@ -85,13 +86,14 @@ def efd_handle_retries(
InternalTest.mark_skip(test_id)

efd_outcome = _efd_do_retries(item)
longrepr = InternalTest.stash_get(test_id, "failure_longrepr")

final_report = pytest_TestReport(
final_report = RetryTestReport(
nodeid=item.nodeid,
location=item.location,
keywords=item.keywords,
when="call",
longrepr=None,
longrepr=longrepr,
outcome=_FINAL_OUTCOMES[efd_outcome],
)
item.ihook.pytest_runtest_logreport(report=final_report)
Expand Down
3 changes: 3 additions & 0 deletions ddtrace/contrib/internal/pytest/_plugin_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,9 @@ def _pytest_runtest_makereport(item: pytest.Item, call: pytest_CallInfo, outcome
# (see <https://github.com/pytest-dev/pytest/blob/8.3.x/src/_pytest/main.py#L654>).
original_result.outcome = OUTCOME_QUARANTINED

if original_result.failed or original_result.skipped:
InternalTest.stash_set(test_id, "failure_longrepr", original_result.longrepr)

# ATR and EFD retry tests only if their teardown succeeded to ensure the best chance the retry will succeed
# NOTE: this mutates the original result's outcome
if InternalTest.stash_get(test_id, "setup_failed") or InternalTest.stash_get(test_id, "teardown_failed"):
Expand Down
31 changes: 31 additions & 0 deletions ddtrace/contrib/internal/pytest/_retry_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from _pytest.runner import CallInfo
import pytest

from ddtrace.contrib.internal.pytest._types import pytest_TestReport
from ddtrace.contrib.internal.pytest._types import tmppath_result_key
from ddtrace.contrib.internal.pytest._utils import _TestOutcome
from ddtrace.ext.test_visibility.api import TestExcInfo
Expand Down Expand Up @@ -128,3 +129,33 @@ def _retry_run_when(item, when, outcomes: RetryOutcomes) -> t.Tuple[CallInfo, _p
if when == "call" or "passed" not in report.outcome:
item.ihook.pytest_runtest_logreport(report=report)
return call, report


class RetryTestReport(pytest_TestReport):
"""
A RetryTestReport behaves just like a normal pytest TestReport, except that the the failed/passed/skipped
properties are aware of retry final states (dd_efd_final_*, etc). This affects the test counts in JUnit XML output,
for instance.

The object should be initialized with the `longrepr` of the _initial_ test attempt. A `longrepr` set to `None` means
the initial attempt either succeeded (which means it was already counted by pytest) or was quarantined (which means
we should not count it at all), so we don't need to count it here.
"""

@property
def failed(self):
if self.longrepr is None:
return False
return "final_failed" in self.outcome

@property
def passed(self):
if self.longrepr is None:
return False
return "final_passed" in self.outcome or "final_flaky" in self.outcome

@property
def skipped(self):
if self.longrepr is None:
return False
return "final_skipped" in self.outcome
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
fixes:
- |
CI Visibility: This fix resolves an issue where JUnit XML output would not count tests retried by Early Flake
Detection, Auto Test Retries, and Attempt-to-Fix.
20 changes: 20 additions & 0 deletions tests/contrib/pytest/test_pytest_atr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- The session object is patched to never be a faulty session, by default.
"""
from unittest import mock
from xml.etree import ElementTree

import pytest

Expand Down Expand Up @@ -315,3 +316,22 @@ def test_pytest_atr_does_not_retry_failed_setup_or_teardown(self):

assert rec.ret == 1
assert len(spans) == 5

def test_pytest_atr_junit_xml(self):
self.testdir.makepyfile(test_pass=_TEST_PASS_CONTENT)
self.testdir.makepyfile(test_fail=_TEST_FAIL_CONTENT)
self.testdir.makepyfile(test_errors=_TEST_ERRORS_CONTENT)
self.testdir.makepyfile(test_pass_on_retries=_TEST_PASS_ON_RETRIES_CONTENT)
self.testdir.makepyfile(test_skip=_TEST_SKIP_CONTENT)

rec = self.inline_run("--ddtrace", "--junit-xml=out.xml")
assert rec.ret == 1

test_suite = ElementTree.parse(f"{self.testdir}/out.xml").find("testsuite")

# There are 15 tests, but we get 16 in the JUnit XML output, because a test that passes during call but fails
# during teardown is counted twice. This is a bug in pytest, not ddtrace.
assert test_suite.attrib["tests"] == "16"
assert test_suite.attrib["failures"] == "4"
assert test_suite.attrib["skipped"] == "4"
assert test_suite.attrib["errors"] == "2"
15 changes: 15 additions & 0 deletions tests/contrib/pytest/test_pytest_efd.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- The session object is patched to never be a faulty session, by default.
"""
from unittest import mock
from xml.etree import ElementTree

import pytest

Expand Down Expand Up @@ -285,3 +286,17 @@ def test_pytest_efd_does_not_retry_failed_teardown(self):
assert fails_teardown_spans[0].get_tag("test.is_retry") != "true"
assert rec.ret == 1
assert len(spans) == 7

def test_pytest_efd_junit_xml(self):
self.testdir.makepyfile(test_known_pass=_TEST_KNOWN_PASS_CONTENT)
self.testdir.makepyfile(test_known_fail=_TEST_KNOWN_FAIL_CONTENT)
self.testdir.makepyfile(test_new_pass=_TEST_NEW_PASS_CONTENT)
self.testdir.makepyfile(test_new_fail=_TEST_NEW_FAIL_CONTENT)
self.testdir.makepyfile(test_new_flaky=_TEST_NEW_FLAKY_CONTENT)

rec = self.inline_run("--ddtrace", "--junit-xml=out.xml")
assert rec.ret == 1

test_suite = ElementTree.parse(f"{self.testdir}/out.xml").find("testsuite")
assert test_suite.attrib["tests"] == "7"
assert test_suite.attrib["failures"] == "3"
Loading