@@ -6,15 +6,18 @@ package wal
66
77import (
88 "cmp"
9+ crand "crypto/rand"
910 "fmt"
1011 "io"
11- "math/rand/v2"
12+ mathrand "math/rand/v2"
1213 "os"
1314 "slices"
15+ "strings"
1416 "sync"
1517 "time"
1618
1719 "github.com/cockroachdb/errors"
20+ "github.com/cockroachdb/errors/oserror"
1821 "github.com/cockroachdb/pebble/internal/base"
1922 "github.com/cockroachdb/pebble/internal/invariants"
2023 "github.com/cockroachdb/pebble/vfs"
@@ -52,6 +55,71 @@ const probeHistoryLength = 128
5255// Large value.
5356const failedProbeDuration = 24 * 60 * 60 * time .Second
5457
58+ // For testing, generateStableIdentifierForTesting can be overridden to return
59+ // a constant value when we generate stable identifiers.
60+ var generateStableIdentifierForTesting = ""
61+
62+ // SetGenerateStableIdentifierForTesting sets a constant identifier for testing.
63+ // This should only be used in tests to avoid flaky behavior.
64+ func SetGenerateStableIdentifierForTesting (identifier string ) {
65+ generateStableIdentifierForTesting = identifier
66+ }
67+
68+ // ResetGenerateStableIdentifierForTesting resets the testing override.
69+ // This should only be used in tests.
70+ func ResetGenerateStableIdentifierForTesting () {
71+ generateStableIdentifierForTesting = ""
72+ }
73+
74+ // generateStableIdentifier generates a random hex string from 16 bytes.
75+ func generateStableIdentifier () (string , error ) {
76+ // For testing, return a constant value if set.
77+ if generateStableIdentifierForTesting != "" {
78+ return generateStableIdentifierForTesting , nil
79+ }
80+
81+ var uuid [16 ]byte
82+ if _ , err := crand .Read (uuid [:]); err != nil {
83+ return "" , err
84+ }
85+ return fmt .Sprintf ("%x" , uuid ), nil
86+ }
87+
88+ // readSecondaryIdentifier reads the identifier from the secondary directory.
89+ func readSecondaryIdentifier (fs vfs.FS , identifierFile string ) (string , error ) {
90+ f , err := fs .Open (identifierFile )
91+ if err != nil {
92+ if oserror .IsNotExist (err ) {
93+ return "" , nil
94+ }
95+ return "" , err
96+ }
97+ defer f .Close ()
98+
99+ data , err := io .ReadAll (f )
100+ if err != nil {
101+ return "" , err
102+ }
103+
104+ // Trim whitespace and return the identifier.
105+ return strings .TrimSpace (string (data )), nil
106+ }
107+
108+ // writeSecondaryIdentifier writes the identifier to the secondary directory.
109+ func writeSecondaryIdentifier (fs vfs.FS , identifierFile string , identifier string ) error {
110+ f , err := fs .Create (identifierFile , "pebble-wal" )
111+ if err != nil {
112+ return err
113+ }
114+
115+ if _ , err := io .WriteString (f , identifier ); err != nil {
116+ f .Close ()
117+ return err
118+ }
119+
120+ return errors .CombineErrors (f .Sync (), f .Close ())
121+ }
122+
55123// init takes a stopper in order to connect the dirProber's long-running
56124// goroutines with the stopper's wait group, but the dirProber has its own
57125// stop() method that should be invoked to trigger the shutdown.
@@ -73,7 +141,7 @@ func (p *dirProber) init(
73141 }
74142 // Random bytes for writing, to defeat any FS compression optimization.
75143 for i := range p .buf {
76- p .buf [i ] = byte (rand .Uint32 ())
144+ p .buf [i ] = byte (mathrand .Uint32 ())
77145 }
78146 // dirProber has an explicit stop() method instead of listening on
79147 // stopper.shouldQuiesce. This structure helps negotiate the shutdown
@@ -538,6 +606,46 @@ func (wm *failoverManager) init(o Options, initial Logs) error {
538606 return nil
539607}
540608
609+ // ValidateOrInitWALDir manages the secondary directory identifier for
610+ // failover validation. It ensures the correct secondary directory is mounted
611+ // by validating or generating a stable identifier.
612+ func ValidateOrInitWALDir (walDir Dir ) (Dir , error ) {
613+ identifierFile := walDir .FS .PathJoin (walDir .Dirname , "failover_identifier" )
614+ // If we have an identifier from the OPTIONS file, validate it matches what's
615+ // in the directory.
616+ if walDir .ID != "" {
617+ existingIdentifier , err := readSecondaryIdentifier (walDir .FS , identifierFile )
618+ if err != nil {
619+ return Dir {}, errors .Newf ("failed to read secondary identifier: %v" , err )
620+ }
621+ // Not the same identifier, wrong disk may be mounted.
622+ if existingIdentifier != walDir .ID {
623+ return Dir {}, errors .Newf ("secondary directory %q has identifier %q but expected %q - wrong disk may be mounted" ,
624+ walDir .Dirname , existingIdentifier , walDir .ID )
625+ }
626+ } else {
627+ // No identifier in OPTIONS file, check if one exists in the directory.
628+ existingIdentifier , err := readSecondaryIdentifier (walDir .FS , identifierFile )
629+ if err != nil {
630+ return Dir {}, errors .Newf ("failed to read secondary identifier: %v" , err )
631+ }
632+ if existingIdentifier == "" {
633+ // Generate a new identifier.
634+ identifier , err := generateStableIdentifier ()
635+ if err != nil {
636+ return Dir {}, errors .Newf ("failed to generate UUID: %v" , err )
637+ }
638+ if err := writeSecondaryIdentifier (walDir .FS , identifierFile , identifier ); err != nil {
639+ return Dir {}, errors .Newf ("failed to write secondary identifier: %v" , err )
640+ }
641+ walDir .ID = identifier
642+ } else {
643+ walDir .ID = existingIdentifier
644+ }
645+ }
646+ return walDir , nil
647+ }
648+
541649// List implements Manager.
542650func (wm * failoverManager ) List () Logs {
543651 wm .mu .Lock ()
@@ -843,6 +951,11 @@ func (wm *failoverManager) logCreator(
843951 return logFile , 0 , err
844952}
845953
954+ // Opts implements Manager.
955+ func (wm * failoverManager ) Opts () Options {
956+ return wm .opts
957+ }
958+
846959type stopper struct {
847960 quiescer chan struct {} // Closed when quiescing
848961 wg sync.WaitGroup
0 commit comments