From 2f459a02964d2eab27993eab183ef6df86171093 Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Thu, 6 Feb 2025 17:30:11 +0000 Subject: [PATCH] logictest: raise TxnLivenessThreshold for multitenant configs Logic tests are flaky due to overload when running in multitenant mode. This patch increases the threshold for transaction heartbeat timeouts, which will make it less likely for foreground operations to be aborted by background jobs like the span config reconciler or the job registry loop to reclaim jobs from dead sessions. Release note: None --- pkg/sql/logictest/BUILD.bazel | 1 + pkg/sql/logictest/logic.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/pkg/sql/logictest/BUILD.bazel b/pkg/sql/logictest/BUILD.bazel index b6f964479cd6..827815a42a14 100644 --- a/pkg/sql/logictest/BUILD.bazel +++ b/pkg/sql/logictest/BUILD.bazel @@ -62,6 +62,7 @@ go_library( "//pkg/kv/kvclient/rangefeed", "//pkg/kv/kvserver", "//pkg/kv/kvserver/kvserverbase", + "//pkg/kv/kvserver/txnwait", "//pkg/multitenant/tenantcapabilities", "//pkg/security/username", "//pkg/server", diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index 89ca74238dee..55d2569c925c 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -41,6 +41,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvclient/rangefeed" "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait" "github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities" "github.com/cockroachdb/cockroach/pkg/security/username" "github.com/cockroachdb/cockroach/pkg/server" @@ -1709,6 +1710,21 @@ func (t *logicTest) newCluster( t.Fatal(err) } } + if _, err := conn.Exec( + "RESET CLUSTER SETTING kv.closed_timestamp.target_duration", + ); err != nil { + t.Fatal(err) + } + if _, err := conn.Exec( + "RESET CLUSTER SETTING kv.closed_timestamp.side_transport_interval", + ); err != nil { + t.Fatal(err) + } + if _, err := conn.Exec( + "RESET CLUSTER SETTING kv.rangefeed.closed_timestamp_refresh_interval", + ); err != nil { + t.Fatal(err) + } } capabilities := toa.capabilities @@ -4374,6 +4390,18 @@ func RunLogicTest( if *printErrorSummary { defer lt.printErrorSummary() } + if config.UseSecondaryTenant == logictestbase.Always { + // Under multitenant configs running in EngFlow, we have seen that logic + // tests can be flaky due to an overload condition where schema change + // transactions do not heartbeat quickly enough. This allows background jobs + // such as the spanconfig reconciler or the job registry "remove claims from + // dead sessions" loop. + // See https://github.com/cockroachdb/cockroach/pull/140400#issuecomment-2634346278 + // and https://github.com/cockroachdb/cockroach/issues/140494#issuecomment-2640208187 + // for a detailed analysis of this issue. + cleanup := txnwait.TestingOverrideTxnLivenessThreshold(30 * time.Second) + defer cleanup() + } // Each test needs a copy because of Parallel serverArgsCopy := serverArgs serverArgsCopy.ForceProductionValues = serverArgs.ForceProductionValues || nonMetamorphicBatchSizes