Skip to content

Commit 215d16b

Browse files
craig[bot]rafiss
andcommitted
Merge #140607
140607: logictest: raise TxnLivenessThreshold for multitenant configs r=rafiss a=rafiss Logic tests are flaky due to overload when running in multitenant mode. This patch increases the threshold for transaction heartbeat timeouts, which will make it less likely for foreground operations to be aborted by background jobs like the span config reconciler or the job registry loop to reclaim jobs from dead sessions. fixes #140597 fixes #140494 fixes #140578 fixes #140577 fixes #140572 fixes #140574 fixes #139890 fixes #140565 fixes #140542 fixes #140543 fixes #140545 fixes #140544 fixes #140546 fixes #140560 fixes #140552 fixes #140541 fixes #140553 fixes #139990 fixes #139776 fixes #140072 fixes #139761 fixes #140161 fixes #140063 fixes #140062 fixes #140011 Release note: None Co-authored-by: Rafi Shamim <[email protected]>
2 parents 15a3216 + a8ccd6b commit 215d16b

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

pkg/sql/logictest/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ go_library(
6262
"//pkg/kv/kvclient/rangefeed",
6363
"//pkg/kv/kvserver",
6464
"//pkg/kv/kvserver/kvserverbase",
65+
"//pkg/kv/kvserver/txnwait",
6566
"//pkg/multitenant/tenantcapabilities",
6667
"//pkg/security/username",
6768
"//pkg/server",

pkg/sql/logictest/logic.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
"github.com/cockroachdb/cockroach/pkg/kv/kvclient/rangefeed"
4242
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
4343
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
44+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait"
4445
"github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities"
4546
"github.com/cockroachdb/cockroach/pkg/security/username"
4647
"github.com/cockroachdb/cockroach/pkg/server"
@@ -1708,6 +1709,21 @@ func (t *logicTest) newCluster(
17081709
t.Fatal(err)
17091710
}
17101711
}
1712+
if _, err := conn.Exec(
1713+
"RESET CLUSTER SETTING kv.closed_timestamp.target_duration",
1714+
); err != nil {
1715+
t.Fatal(err)
1716+
}
1717+
if _, err := conn.Exec(
1718+
"RESET CLUSTER SETTING kv.closed_timestamp.side_transport_interval",
1719+
); err != nil {
1720+
t.Fatal(err)
1721+
}
1722+
if _, err := conn.Exec(
1723+
"RESET CLUSTER SETTING kv.rangefeed.closed_timestamp_refresh_interval",
1724+
); err != nil {
1725+
t.Fatal(err)
1726+
}
17111727
}
17121728

17131729
capabilities := toa.capabilities
@@ -4373,6 +4389,18 @@ func RunLogicTest(
43734389
if *printErrorSummary {
43744390
defer lt.printErrorSummary()
43754391
}
4392+
if config.UseSecondaryTenant == logictestbase.Always {
4393+
// Under multitenant configs running in EngFlow, we have seen that logic
4394+
// tests can be flaky due to an overload condition where schema change
4395+
// transactions do not heartbeat quickly enough. This allows background jobs
4396+
// such as the spanconfig reconciler or the job registry "remove claims from
4397+
// dead sessions" loop.
4398+
// See https://github.com/cockroachdb/cockroach/pull/140400#issuecomment-2634346278
4399+
// and https://github.com/cockroachdb/cockroach/issues/140494#issuecomment-2640208187
4400+
// for a detailed analysis of this issue.
4401+
cleanup := txnwait.TestingOverrideTxnLivenessThreshold(30 * time.Second)
4402+
defer cleanup()
4403+
}
43764404
// Each test needs a copy because of Parallel
43774405
serverArgsCopy := serverArgs
43784406
serverArgsCopy.ForceProductionValues = serverArgs.ForceProductionValues || nonMetamorphicBatchSizes

0 commit comments

Comments
 (0)