From 5c5ffc5bde79c6f82d4b5ed761fd06ee312be90f Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Mon, 3 Feb 2025 14:19:06 -0500 Subject: [PATCH] spanconfigreconciler: use fixed timestamp when reading descriptors Recently when we changed the default value of autocommit_before_ddl to true, we found that the chance of hitting a retry error while running schema changes dramatically increased. The reason was because the backgound span reconciler would need locks for the same keys that were being modified by the schema change job itself -- most notably, the descriptor table and descriptor ID sequence. This patch addresses the issue by making the spanconfig reconciler use the checkpoint timestamp from the rangefeed as the fixed timestamp for the transaction that reads the descriptors whose spans are being reconciled. This defensive measure helps us avoid any possibility of contention caused by this background job's transaction. This allows us to re-enable the autocommit setting for logictests that run in multitenancy. Release note: None --- pkg/spanconfig/spanconfigreconciler/reconciler.go | 10 ++++++++++ pkg/sql/logictest/logic.go | 10 ---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/spanconfig/spanconfigreconciler/reconciler.go b/pkg/spanconfig/spanconfigreconciler/reconciler.go index 122cd92fcaba..f5f27ae151fe 100644 --- a/pkg/spanconfig/spanconfigreconciler/reconciler.go +++ b/pkg/spanconfig/spanconfigreconciler/reconciler.go @@ -522,6 +522,16 @@ func (r *incrementalReconciler) reconcile( ) error { var err error + // Using a fixed timestamp prevents this background job from contending + // with foreground schema change traffic. Schema changes modify system + // objects like system.descriptor, system.descriptor_id_seq, and + // system.span_count. The spanconfig reconciler needs to read these + // objects also. A fixed timestamp is a defensive measure to help + // avoid contention caused by this background job. + err = txn.KV().SetFixedTimestamp(ctx, checkpoint) + if err != nil { + return err + } // TODO(irfansharif): Instead of these filter methods for missing // tables and system targets that live on the Reconciler, we could // move this to the SQLTranslator instead, now that the SQLTranslator diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index 60d9b8495ae5..2792388e0a68 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -1683,16 +1683,6 @@ func (t *logicTest) newCluster( tenantID := serverutils.TestTenantID() conn := t.cluster.SystemLayer(0).SQLConn(t.rootT) - // TODO(rafi): Remove this setting. We're adding it since the 3node-tenant - // config seem to be flaky with autcommit_before_ddl = true. Disable that - // setting for multitenant configs while the issue is being investigated. - if _, err := conn.Exec( - "ALTER TENANT [$1] SET CLUSTER SETTING sql.defaults.autocommit_before_ddl.enabled = false", - tenantID.ToUint64(), - ); err != nil { - t.Fatal(err) - } - clusterSettings := toa.clusterSettings if len(clusterSettings) > 0 { // We reduce the closed timestamp duration on the host tenant so that the