From 4ad580038829dd51f91f02ea97a40594f96a3d3a Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 6 Feb 2025 18:21:04 -0600 Subject: [PATCH] Fix Flaky Test SpecificClusterManagerNodesIT.testElectOnlyBetweenClusterManagerNodes (#16021) (#17265) (cherry picked from commit 852011aac6ed5faecc38e3d285789a630a34168d) Signed-off-by: kkewwei Signed-off-by: kkewwei Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .../cluster/SpecificClusterManagerNodesIT.java | 18 ++++++++++++------ .../opensearch/test/InternalTestCluster.java | 12 ++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/SpecificClusterManagerNodesIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/SpecificClusterManagerNodesIT.java index 713873bb222e2..8e3426b9cae26 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/SpecificClusterManagerNodesIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/SpecificClusterManagerNodesIT.java @@ -44,6 +44,7 @@ import org.opensearch.test.OpenSearchIntegTestCase.Scope; import java.io.IOException; +import java.util.function.Supplier; import static org.opensearch.test.NodeRoles.clusterManagerNode; import static org.opensearch.test.NodeRoles.dataOnlyNode; @@ -254,9 +255,9 @@ public void testElectOnlyBetweenClusterManagerNodes() throws Exception { logger.info("--> closing cluster-manager node (1)"); client().execute(AddVotingConfigExclusionsAction.INSTANCE, new AddVotingConfigExclusionsRequest(clusterManagerNodeName)).get(); // removing the cluster-manager from the voting configuration immediately triggers the cluster-manager to step down - assertBusy(() -> { - assertThat( - internalCluster().nonClusterManagerClient() + Supplier getClusterManagerIfElected = () -> { + try { + return internalCluster().nonClusterManagerClient() .admin() .cluster() .prepareState() @@ -265,9 +266,14 @@ public void testElectOnlyBetweenClusterManagerNodes() throws Exception { .getState() .nodes() .getClusterManagerNode() - .getName(), - equalTo(nextClusterManagerEligableNodeName) - ); + .getName(); + } catch (ClusterManagerNotDiscoveredException e) { + logger.debug("failed to get cluster-manager name", e); + return null; + } + }; + assertBusy(() -> { + assertThat(getClusterManagerIfElected.get(), equalTo(nextClusterManagerEligableNodeName)); assertThat( internalCluster().clusterManagerClient() .admin() diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 0c68108434110..b77f0b3ed599f 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -89,6 +89,7 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.indices.breaker.CircuitBreakerService; import org.opensearch.core.util.FileSystemUtils; +import org.opensearch.discovery.ClusterManagerNotDiscoveredException; import org.opensearch.env.Environment; import org.opensearch.env.NodeEnvironment; import org.opensearch.env.ShardLockObtainFailedException; @@ -2155,13 +2156,12 @@ public String getClusterManagerName() { * in the viaNode parameter. If viaNode isn't specified a random node will be picked to the send the request to. */ public String getClusterManagerName(@Nullable String viaNode) { - try { - Client client = viaNode != null ? client(viaNode) : client(); - return client.admin().cluster().prepareState().get().getState().nodes().getClusterManagerNode().getName(); - } catch (Exception e) { - logger.warn("Can't fetch cluster state", e); - throw new RuntimeException("Can't get cluster-manager node " + e.getMessage(), e); + Client client = viaNode != null ? client(viaNode) : client(); + DiscoveryNode clusterManagerNode = client.admin().cluster().prepareState().get().getState().nodes().getClusterManagerNode(); + if (clusterManagerNode == null) { + throw new ClusterManagerNotDiscoveredException("Cluster manager node not discovered"); } + return clusterManagerNode.getName(); } /**