From a8ccd6b41740b36d32cece552c712a5318baf8f9 Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Thu, 6 Feb 2025 12:30:11 -0500 Subject: [PATCH] logictest: raise TxnLivenessThreshold for multitenant configs Logic tests are flaky due to overload when running in multitenant mode. This patch increases the threshold for transaction heartbeat timeouts, which will make it less likely for foreground operations to be aborted by background jobs like the span config reconciler or the job registry loop to reclaim jobs from dead sessions. Release note: None --- pkg/sql/logictest/BUILD.bazel | 1 + pkg/sql/logictest/logic.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/pkg/sql/logictest/BUILD.bazel b/pkg/sql/logictest/BUILD.bazel index b6f964479cd6..827815a42a14 100644 --- a/pkg/sql/logictest/BUILD.bazel +++ b/pkg/sql/logictest/BUILD.bazel @@ -62,6 +62,7 @@ go_library( "//pkg/kv/kvclient/rangefeed", "//pkg/kv/kvserver", "//pkg/kv/kvserver/kvserverbase", + "//pkg/kv/kvserver/txnwait", "//pkg/multitenant/tenantcapabilities", "//pkg/security/username", "//pkg/server", diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index 2792388e0a68..b7456fac7a2f 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -41,6 +41,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvclient/rangefeed" "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait" "github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities" "github.com/cockroachdb/cockroach/pkg/security/username" "github.com/cockroachdb/cockroach/pkg/server" @@ -1708,6 +1709,21 @@ func (t *logicTest) newCluster( t.Fatal(err) } } + if _, err := conn.Exec( + "RESET CLUSTER SETTING kv.closed_timestamp.target_duration", + ); err != nil { + t.Fatal(err) + } + if _, err := conn.Exec( + "RESET CLUSTER SETTING kv.closed_timestamp.side_transport_interval", + ); err != nil { + t.Fatal(err) + } + if _, err := conn.Exec( + "RESET CLUSTER SETTING kv.rangefeed.closed_timestamp_refresh_interval", + ); err != nil { + t.Fatal(err) + } } capabilities := toa.capabilities @@ -4373,6 +4389,18 @@ func RunLogicTest( if *printErrorSummary { defer lt.printErrorSummary() } + if config.UseSecondaryTenant == logictestbase.Always { + // Under multitenant configs running in EngFlow, we have seen that logic + // tests can be flaky due to an overload condition where schema change + // transactions do not heartbeat quickly enough. This allows background jobs + // such as the spanconfig reconciler or the job registry "remove claims from + // dead sessions" loop. + // See https://github.com/cockroachdb/cockroach/pull/140400#issuecomment-2634346278 + // and https://github.com/cockroachdb/cockroach/issues/140494#issuecomment-2640208187 + // for a detailed analysis of this issue. + cleanup := txnwait.TestingOverrideTxnLivenessThreshold(30 * time.Second) + defer cleanup() + } // Each test needs a copy because of Parallel serverArgsCopy := serverArgs serverArgsCopy.ForceProductionValues = serverArgs.ForceProductionValues || nonMetamorphicBatchSizes