Skip to content

Commit 33ad7b2

Browse files
craig[bot]stevendannarickystewart
committed
114464: backupccl: allow online restore to assume exclusive end keys r=dt a=stevendanna Currently, we only support non-revision-history restores for online restore. As a result, we can assume file span end keys are exclusive. We were implicitly assuming this and throwing away files that didn't overlap with the restore span entry. Epic: none Release note: None 118950: workflows: shut down bazel server if we're asked to cancel the build r=rail a=rickystewart This is implemented as a [SIGINT](https://docs.github.com/en/actions/managing-workflow-runs/canceling-a-workflow#steps-github-takes-to-cancel-a-workflow-run). However, the build is likely to just continue running in the background because this may not be enough to tear Bazel down. This could cause steps that need to happen later to stall indefinitely. We just shut down the server directly in this case. Epic: CRDB-8308 Release note: None Co-authored-by: Steven Danna <[email protected]> Co-authored-by: Ricky Stewart <[email protected]>
3 parents 7f82808 + 22f977a + ec68ac9 commit 33ad7b2

File tree

9 files changed

+68
-18
lines changed

9 files changed

+68
-18
lines changed

.github/workflows/experimental-github-actions-testing.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
ref: ${{ github.event.pull_request.head.sha }}
1818
- run: ./build/github/get-engflow-keys.sh
1919
- name: run tests
20-
run: bazel test //pkg:all_tests //pkg/ui:lint //pkg/ui:test --config crosslinux --jobs 300 --remote_download_minimal --bes_keywords=github_pr_number=${{ github.event.pull_request.number }} --config=use_ci_timeouts --build_event_binary_file=bes.bin $(./build/github/engflow-args.sh) --bes_keywords ci-unit-test
20+
run: ./build/github/run-bazel.sh test //pkg:all_tests //pkg/ui:lint //pkg/ui:test --config crosslinux --jobs 300 --remote_download_minimal --bes_keywords=github_pr_number=${{ github.event.pull_request.number }} --config=use_ci_timeouts --build_event_binary_file=bes.bin $(./build/github/engflow-args.sh) --bes_keywords ci-unit-test
2121
- name: upload test results
2222
run: ./build/github/summarize-build.sh bes.bin
2323
if: always()

build/github/run-bazel.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Any arguments passed to this script will be directly passed to Bazel. In
2+
# addition to just calling Bazel, this script has the property that the Bazel
3+
# server will be killed if we receive an interrupt.
4+
5+
bazel_shutdown() {
6+
bazel shutdown
7+
}
8+
9+
trap bazel_shutdown EXIT
10+
11+
bazel "$@"

pkg/ccl/backupccl/bench_covering_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ func BenchmarkRestoreEntryCover(b *testing.B) {
111111
layerToBackupManifestFileIterFactory,
112112
nil,
113113
filter,
114+
&inclusiveEndKeyComparator{},
114115
spanCh)
115116
})
116117

pkg/ccl/backupccl/generative_split_and_scatter_processor.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,13 +476,20 @@ func runGenerativeSplitAndScatter(
476476
return errors.Wrap(err, "failed to make span covering filter")
477477
}
478478
defer filter.close()
479+
480+
var fsc fileSpanComparator = &inclusiveEndKeyComparator{}
481+
if spec.ExclusiveFileSpanComparison {
482+
fsc = &exclusiveEndKeyComparator{}
483+
}
484+
479485
return errors.Wrap(generateAndSendImportSpans(
480486
ctx,
481487
spec.Spans,
482488
backups,
483489
layerToFileIterFactory,
484490
backupLocalityMap,
485491
filter,
492+
fsc,
486493
restoreSpanEntriesCh,
487494
), "generating and sending import spans")
488495
})

pkg/ccl/backupccl/restore_job.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,14 @@ func restore(
344344
return roachpb.RowCount{}, err
345345
}
346346

347+
var fsc fileSpanComparator = &inclusiveEndKeyComparator{}
348+
// We know we can use exclusive comparison for
349+
// ExperimentalOnline since we don't support revision history
350+
// backups for ExperimentalOnline.
351+
if details.ExperimentalOnline {
352+
fsc = &exclusiveEndKeyComparator{}
353+
}
354+
347355
countSpansCh := make(chan execinfrapb.RestoreSpanEntry, 1000)
348356
genSpan := func(ctx context.Context, spanCh chan execinfrapb.RestoreSpanEntry) error {
349357
defer close(spanCh)
@@ -354,6 +362,7 @@ func restore(
354362
layerToIterFactory,
355363
backupLocalityMap,
356364
filter,
365+
fsc,
357366
spanCh,
358367
), "generate and send import spans")
359368
}

pkg/ccl/backupccl/restore_online.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -173,14 +173,16 @@ func sendAddRemoteSSTWorker(
173173
firstSplitDone := false
174174
for _, file := range entry.Files {
175175
restoringSubspan := file.BackupFileEntrySpan.Intersect(entry.Span)
176-
log.Infof(ctx, "Experimental restore: sending span %s of file (path: %s, span: %s), with intersecting subspan %s",
177-
file.BackupFileEntrySpan, file.Path, file.BackupFileEntrySpan, restoringSubspan)
178-
179176
if !restoringSubspan.Valid() {
180-
log.Warningf(ctx, "backup file does not intersect with the restoring span")
181-
continue
177+
return errors.AssertionFailedf("file %s with span %s has no overlap with restore span %s",
178+
file.Path,
179+
file.BackupFileEntrySpan,
180+
entry.Span,
181+
)
182182
}
183183

184+
log.Infof(ctx, "experimental restore: sending span %s of file %s (file span: %s) as part of restore span %s",
185+
restoringSubspan, file.Path, file.BackupFileEntrySpan, entry.Span)
184186
file.BackupFileEntrySpan = restoringSubspan
185187
if !firstSplitDone {
186188
expiration := execCtx.ExecCfg().Clock.Now().AddDuration(time.Hour)

pkg/ccl/backupccl/restore_span_covering.go

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -235,10 +235,9 @@ func (f spanCoveringFilter) close() {
235235
// trimmed/removed based on the lowWaterMark before the covering for them is
236236
// generated. These spans are generated one at a time and then sent to spanCh.
237237
//
238-
// Note that because of https://github.com/cockroachdb/cockroach/issues/101963,
239-
// the spans of files are end key _inclusive_. Because the current definition
240-
// of spans are all end key _exclusive_, we work around this by assuming that
241-
// the end key of each file span is actually the next key of the end key.
238+
// Spans are considered for inclusion using the fileSpanComparator. Note that
239+
// because of https://github.com/cockroachdb/cockroach/issues/101963, most
240+
// backups use a comparator that treats a file's end key as _inclusive_.
242241
//
243242
// Consider a chain of backups with files f1, f2… which cover spans as follows:
244243
//
@@ -279,6 +278,7 @@ func generateAndSendImportSpans(
279278
layerToBackupManifestFileIterFactory backupinfo.LayerToBackupManifestFileIterFactory,
280279
backupLocalityMap map[int]storeByLocalityKV,
281280
filter spanCoveringFilter,
281+
fsc fileSpanComparator,
282282
spanCh chan execinfrapb.RestoreSpanEntry,
283283
) error {
284284

@@ -378,7 +378,7 @@ func generateAndSendImportSpans(
378378
coverSpan.EndKey = span.EndKey
379379
}
380380

381-
newFilesByLayer, err := getNewIntersectingFilesByLayer(coverSpan, layersCoveredLater, fileIterByLayer)
381+
newFilesByLayer, err := getNewIntersectingFilesByLayer(coverSpan, layersCoveredLater, fileIterByLayer, fsc)
382382
if err != nil {
383383
return err
384384
}
@@ -405,7 +405,7 @@ func generateAndSendImportSpans(
405405
sz = 16 << 20
406406
}
407407

408-
if inclusiveOverlap(coverSpan, file.Span) {
408+
if fsc.overlaps(coverSpan, file.Span) {
409409
covSize += sz
410410
filesByLayer[layer] = append(filesByLayer[layer], file)
411411
}
@@ -608,6 +608,7 @@ func getNewIntersectingFilesByLayer(
608608
span roachpb.Span,
609609
layersCoveredLater map[int]bool,
610610
fileIters []bulk.Iterator[*backuppb.BackupManifest_File],
611+
fsc fileSpanComparator,
611612
) ([][]*backuppb.BackupManifest_File, error) {
612613
var files [][]*backuppb.BackupManifest_File
613614

@@ -629,7 +630,7 @@ func getNewIntersectingFilesByLayer(
629630
// inclusive. Because roachpb.Span and its associated operations
630631
// are end key exclusive, we work around this by replacing the
631632
// end key with its next value in order to include the end key.
632-
if inclusiveOverlap(span, f.Span) {
633+
if fsc.overlaps(span, f.Span) {
633634
layerFiles = append(layerFiles, f)
634635
}
635636

@@ -644,8 +645,23 @@ func getNewIntersectingFilesByLayer(
644645
return files, nil
645646
}
646647

647-
// inclusiveOverlap returns true if sp, which is end key exclusive, overlaps
648-
// isp, which is end key inclusive.
649-
func inclusiveOverlap(sp roachpb.Span, isp roachpb.Span) bool {
650-
return sp.Overlaps(isp) || sp.ContainsKey(isp.EndKey)
648+
type fileSpanComparator interface {
649+
overlaps(targetSpan, fileSpan roachpb.Span) bool
650+
}
651+
652+
// inclusiveEndKeyComparator assumes that file spans have inclusive
653+
// end keys.
654+
type inclusiveEndKeyComparator struct{}
655+
656+
func (*inclusiveEndKeyComparator) overlaps(targetSpan, inclusiveEndKeyFileSpan roachpb.Span) bool {
657+
// TODO(ssd): Could ContainsKey here be replaced with targetSpan.Key.Equal(inclusiveEndKeyFileSpan.EndKey)?
658+
return targetSpan.Overlaps(inclusiveEndKeyFileSpan) || targetSpan.ContainsKey(inclusiveEndKeyFileSpan.EndKey)
659+
}
660+
661+
// exclusiveEndKeyComparator assumes that file spans have exclusive
662+
// end keys.
663+
type exclusiveEndKeyComparator struct{}
664+
665+
func (*exclusiveEndKeyComparator) overlaps(targetSpan, inclusiveEndKeyFileSpan roachpb.Span) bool {
666+
return targetSpan.Overlaps(inclusiveEndKeyFileSpan)
651667
}

pkg/ccl/backupccl/restore_span_covering_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ func makeImportSpans(
301301
layerToIterFactory,
302302
nil,
303303
filter,
304+
&inclusiveEndKeyComparator{},
304305
spanCh)
305306
close(spanCh)
306307

pkg/sql/execinfrapb/processors_bulk_io.proto

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ message RestoreFileSpec {
323323
optional roachpb.RowCount backup_file_entry_counts = 6 [(gogoproto.nullable) = false];
324324
optional uint64 backing_file_size = 7 [(gogoproto.nullable) = false];
325325
optional uint64 approximate_physical_size = 8 [(gogoproto.nullable) = false];
326-
326+
327327
// NEXT ID: 9.
328328
}
329329

@@ -447,6 +447,9 @@ message GenerativeSplitAndScatterSpec {
447447
optional int64 job_id = 18 [(gogoproto.nullable) = false, (gogoproto.customname) = "JobID"];
448448
optional bool use_frontier_checkpointing = 20 [(gogoproto.nullable) = false];
449449
repeated jobs.jobspb.RestoreProgress.FrontierEntry checkpointed_spans = 21 [(gogoproto.nullable) = false];
450+
// ExclusiveFileSpanComparison is true if the backup can safely use
451+
// exclusive file span comparison.
452+
optional bool exclusive_file_span_comparison = 22 [(gogoproto.nullable) = false];
450453

451454
reserved 19;
452455
}

0 commit comments

Comments
 (0)