Skip to content

Commit aa1f57b

Browse files
authored
[flakiness] pytest - use rerunfailures for retries (#391)
# Summary This pull request improves the reliability of several end-to-end and integration tests in the MongoDB Kubernetes test suite by replacing custom retry logic with standardized pytest flakiness handling. The changes primarily remove manual retry loops from test cases and instead use the `pytest.mark.flaky` decorator to automatically rerun flaky tests, making the code cleaner and more maintainable. **Test stability improvements:** * Added `pytest.mark.flaky` decorators to multiple test cases to automatically rerun tests on failure, replacing custom retry logic. * Only replaced those simple try and catch cases and kept the total wait time to be the same * Added this decorator to `e2e_replica_set` and `e2e_replica_set_pv` which is racy/flaky **Dependency and import adjustments:** * Added missing `pytest` imports to test files to support the use of the `flaky` marker. ## Proof of Work - green ci ## Checklist - [x] Have you linked a jira ticket and/or is the ticket in the title? - [x] Have you checked whether your jira ticket required DOCSP changes? - [x] Have you added changelog file? - use `skip-changelog` label if not needed - refer to [Changelog files and Release Notes](https://github.com/mongodb/mongodb-kubernetes/blob/master/CONTRIBUTING.md#changelog-files-and-release-notes) section in CONTRIBUTING.md for more details
1 parent 1ad24cf commit aa1f57b

File tree

8 files changed

+18
-53
lines changed

8 files changed

+18
-53
lines changed

docker/mongodb-kubernetes-tests/tests/multicluster/multi_cluster_backup_restore.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import kubernetes
66
import kubernetes.client
77
import pymongo
8+
import pytest
89
from kubernetes import client
910
from kubetester import (
1011
create_or_update_configmap,
@@ -474,17 +475,9 @@ def test_mongodb_multi_one_running_state(self, mongodb_multi_one: MongoDBMulti):
474475

475476
@skip_if_local
476477
@mark.e2e_multi_cluster_backup_restore
478+
@pytest.mark.flaky(reruns=100, reruns_delay=6)
477479
def test_add_test_data(self, mongodb_multi_one_collection):
478-
max_attempts = 100
479-
while max_attempts > 0:
480-
try:
481-
mongodb_multi_one_collection.insert_one(TEST_DATA)
482-
return
483-
except Exception as e:
484-
print(e)
485-
max_attempts -= 1
486-
time.sleep(6)
487-
raise Exception("❌ Failed to insert test data after multiple attempts")
480+
mongodb_multi_one_collection.insert_one(TEST_DATA)
488481

489482
@mark.e2e_multi_cluster_backup_restore
490483
def test_mdb_backed_up(self, project_one: OMTester):

docker/mongodb-kubernetes-tests/tests/multicluster/multi_cluster_dr_connect.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,18 +54,9 @@ def test_replica_set_is_reachable(mongodb_multi: MongoDBMulti):
5454

5555

5656
@pytest.mark.e2e_multi_cluster_dr
57+
@pytest.mark.flaky(reruns=100, reruns_delay=6)
5758
def test_add_test_data(mongodb_multi_collection):
58-
# TODO: remove this retry mechanism, for some reason the resource exits the running state and then
59-
# enters it later. The subsequent test fails because the resource is not actually
60-
max_attempts = 100
61-
while max_attempts > 0:
62-
try:
63-
mongodb_multi_collection.insert_one(TEST_DATA)
64-
return
65-
except Exception as e:
66-
print(e)
67-
max_attempts -= 1
68-
time.sleep(6)
59+
mongodb_multi_collection.insert_one(TEST_DATA)
6960

7061

7162
@pytest.mark.e2e_multi_cluster_dr

docker/mongodb-kubernetes-tests/tests/multicluster_appdb/multicluster_appdb_s3_based_backup_restore.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import kubernetes.client
55
import pymongo
6+
import pytest
67
from kubetester import create_or_update_configmap, try_load
78
from kubetester.kubetester import ensure_ent_version
89
from kubetester.kubetester import fixture as yaml_fixture
@@ -203,16 +204,9 @@ def test_mongodb_multi_one_running_state(self, mongodb_multi_one: MongoDBMulti):
203204
# we might fail connection in the beginning since we set a custom dns in coredns
204205
mongodb_multi_one.assert_reaches_phase(Phase.Running, ignore_errors=True, timeout=600)
205206

207+
@pytest.mark.flaky(reruns=100, reruns_delay=6)
206208
def test_add_test_data(self, mongodb_multi_one_collection):
207-
max_attempts = 100
208-
while max_attempts > 0:
209-
try:
210-
mongodb_multi_one_collection.insert_one(TEST_DATA)
211-
return
212-
except Exception as e:
213-
print(e)
214-
max_attempts -= 1
215-
time.sleep(6)
209+
mongodb_multi_one_collection.insert_one(TEST_DATA)
216210

217211
def test_mdb_backed_up(self, project_one: OMTester):
218212
project_one.wait_until_backup_snapshots_are_ready(expected_count=1)

docker/mongodb-kubernetes-tests/tests/multicluster_om/multicluster_om_appdb_no_mesh.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -557,16 +557,9 @@ def test_create_mongodb_multi(server_certs: str, mongodb_multi: MongoDBMulti):
557557

558558
@skip_if_local
559559
@mark.e2e_multi_cluster_om_appdb_no_mesh
560+
@pytest.mark.flaky(reruns=100, reruns_delay=6)
560561
def test_add_test_data(mongodb_multi_collection):
561-
max_attempts = 100
562-
while max_attempts > 0:
563-
try:
564-
mongodb_multi_collection.insert_one(TEST_DATA)
565-
return
566-
except Exception as e:
567-
print(e)
568-
max_attempts -= 1
569-
time.sleep(6)
562+
mongodb_multi_collection.insert_one(TEST_DATA)
570563

571564

572565
@mark.e2e_multi_cluster_om_appdb_no_mesh

docker/mongodb-kubernetes-tests/tests/replicaset/replica_set.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ def test_replica_set_sts_exists(self):
117117
sts = self.appsv1.read_namespaced_stateful_set(RESOURCE_NAME, self.namespace)
118118
assert sts
119119

120+
@pytest.mark.flaky(reruns=15, reruns_delay=5)
120121
def test_sts_creation(self):
121122
sts = self.appsv1.read_namespaced_stateful_set(RESOURCE_NAME, self.namespace)
122123

docker/mongodb-kubernetes-tests/tests/replicaset/replica_set_liveness_probe.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,26 +25,17 @@ def _get_pods(podname_template: str, qty: int = 3):
2525

2626
@skip_if_static_containers
2727
@pytest.mark.e2e_replica_set_liveness_probe
28+
@pytest.mark.flaky(reruns=10, reruns_delay=30)
2829
def test_pods_are_running(replica_set: MongoDB, namespace: str):
2930
corev1_client = client.CoreV1Api()
3031
running_pods: Set[str] = set()
31-
tries = 10
3232
# Wait for all the pods to be running
3333
# We can't wait for the replica set to be running
3434
# as it will never get to it (mongod is not starting)
35-
while tries:
36-
if len(running_pods) == 3:
37-
break
38-
for podname in _get_pods("my-replica-set-{}", 3):
39-
try:
40-
pod = corev1_client.read_namespaced_pod(podname, namespace)
41-
if pod.status.phase == "Running":
42-
running_pods.add(podname)
43-
except:
44-
# Pod not found, will retry
45-
pass
46-
tries -= 1
47-
time.sleep(30)
35+
for podname in _get_pods("my-replica-set-{}", 3):
36+
pod = corev1_client.read_namespaced_pod(podname, namespace)
37+
if pod.status.phase == "Running":
38+
running_pods.add(podname)
4839
assert len(running_pods) == 3
4940

5041

docker/mongodb-kubernetes-tests/tests/replicaset/replica_set_pv.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def test_replica_set_sts_exists(self):
2222
sts = self.appsv1.read_namespaced_stateful_set("rs001-pv", self.namespace)
2323
assert sts
2424

25+
@pytest.mark.flaky(reruns=15, reruns_delay=5)
2526
def test_sts_creation(self):
2627
sts = self.appsv1.read_namespaced_stateful_set("rs001-pv", self.namespace)
2728

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,4 @@ types-python-dateutil==2.9.0.20250809
4949
pipupgrade==1.12.0
5050
pytest-cov==6.2.1
5151
pytest-socket==0.7.0
52+
pytest-rerunfailures==16.0

0 commit comments

Comments
 (0)