@@ -11,6 +11,7 @@ import (
11
11
"context"
12
12
"fmt"
13
13
"io"
14
+ "io/ioutil"
14
15
"path"
15
16
"strings"
16
17
"sync"
@@ -54,6 +55,9 @@ const (
54
55
restoreCommandOption = "restore_command"
55
56
targetActionOption = "recovery_target_action"
56
57
promoteTargetAction = "promote"
58
+
59
+ // WAL parsing constants.
60
+ walNameLen = 24
57
61
)
58
62
59
63
var defaultRecoveryCfg = map [string ]string {
@@ -390,7 +394,7 @@ func (p *PhysicalInitial) checkSyncInstance(ctx context.Context) (string, error)
390
394
391
395
extractedDataStateAt , err := p .getLastXActReplayTimestamp (ctx , syncContainer .ID )
392
396
if err != nil {
393
- return "" , errors .Wrap (err , `failed to get last xact replay timestamp from the sync instance` )
397
+ return "" , errors .Wrap (err , `failed to get last replay timestamp from the sync instance` )
394
398
}
395
399
396
400
log .Msg ("Sync instance data state at: " , extractedDataStateAt )
@@ -550,6 +554,19 @@ func (p *PhysicalInitial) promoteInstance(ctx context.Context, clonePath string,
550
554
return errors .Wrap (err , "failed to start container" )
551
555
}
552
556
557
+ if syState .DSA == "" {
558
+ dsa , err := p .getDSAFromWAL (ctx , cfgManager .GetPgVersion (), promoteCont .ID , clonePath )
559
+ if err != nil {
560
+ log .Dbg ("cannot extract DSA form WAL files: " , err )
561
+ }
562
+
563
+ if dsa != "" {
564
+ log .Msg ("DataStateAt extracted from WAL files: " , dsa )
565
+
566
+ syState .DSA = dsa
567
+ }
568
+ }
569
+
553
570
log .Msg ("Starting PostgreSQL and waiting for readiness" )
554
571
log .Msg (fmt .Sprintf ("View logs using the command: %s %s" , tools .ViewLogsCmd , p .promoteContainerName ()))
555
572
@@ -623,6 +640,85 @@ func (p *PhysicalInitial) promoteInstance(ctx context.Context, clonePath string,
623
640
return nil
624
641
}
625
642
643
+ func (p * PhysicalInitial ) getDSAFromWAL (ctx context.Context , pgVersion float64 , containerID , cloneDir string ) (string , error ) {
644
+ log .Dbg (cloneDir )
645
+
646
+ infos , err := ioutil .ReadDir (path .Join (cloneDir , "pg_wal" ))
647
+ if err != nil {
648
+ return "" , errors .Wrap (err , "failed to read the pg_wal dir" )
649
+ }
650
+
651
+ // Walk in the reverse order.
652
+ for i := len (infos ) - 1 ; i >= 0 ; i -- {
653
+ fileName := infos [i ].Name ()
654
+ walFilePath := path .Join (cloneDir , "pg_wal" , fileName )
655
+
656
+ log .Dbg ("Look up into file: " , walFilePath )
657
+
658
+ if len (fileName ) != walNameLen {
659
+ continue
660
+ }
661
+
662
+ dateTime := p .parseWAL (ctx , containerID , pgVersion , walFilePath )
663
+ if dateTime != "" {
664
+ return dateTime , nil
665
+ }
666
+ }
667
+
668
+ log .Dbg ("no found dataStateAt in WAL files" )
669
+
670
+ return "" , nil
671
+ }
672
+
673
+ func (p * PhysicalInitial ) parseWAL (ctx context.Context , containerID string , pgVersion float64 , walFilePath string ) string {
674
+ cmd := fmt .Sprintf ("/usr/lib/postgresql/%g/bin/pg_waldump %s -r Transaction | tail -1" , pgVersion , walFilePath )
675
+
676
+ output , err := tools .ExecCommandWithOutput (ctx , p .dockerClient , containerID , types.ExecConfig {
677
+ Cmd : []string {"sh" , "-c" , cmd },
678
+ })
679
+ if err != nil {
680
+ log .Dbg ("failed to parse WAL: " , err )
681
+ return ""
682
+ }
683
+
684
+ if output == "" {
685
+ log .Dbg ("empty timestamp output given" )
686
+ return ""
687
+ }
688
+
689
+ log .Dbg ("Parse the line from a WAL file" , output )
690
+
691
+ return parseWALLine (output )
692
+ }
693
+
694
+ func parseWALLine (line string ) string {
695
+ const (
696
+ commitToken = "COMMIT"
697
+ tokenLen = len (commitToken )
698
+ layout = "2006-01-02 15:04:05.000000 MST"
699
+ )
700
+
701
+ commitIndex := strings .LastIndex (line , commitToken )
702
+ if commitIndex == - 1 {
703
+ log .Dbg ("timestamp not found" , line )
704
+ return ""
705
+ }
706
+
707
+ dateTimeString := strings .TrimSpace (line [commitIndex + tokenLen :])
708
+
709
+ if idx := strings .IndexByte (dateTimeString , ';' ); idx > 0 {
710
+ dateTimeString = dateTimeString [:idx ]
711
+ }
712
+
713
+ parsedDate , err := time .Parse (layout , dateTimeString )
714
+ if err != nil {
715
+ log .Dbg ("failed to parse WAL time: " , dateTimeString )
716
+ return ""
717
+ }
718
+
719
+ return parsedDate .Format (tools .DataStateAtFormat )
720
+ }
721
+
626
722
func buildRecoveryConfig (fileConfig , userRecoveryConfig map [string ]string ) map [string ]string {
627
723
recoveryConf := fileConfig
628
724
@@ -640,7 +736,7 @@ func buildRecoveryConfig(fileConfig, userRecoveryConfig map[string]string) map[s
640
736
}
641
737
642
738
func (p * PhysicalInitial ) markDSA (ctx context.Context , defaultDSA , containerID , dataDir string , pgVersion float64 ) error {
643
- extractedDataStateAt , err := p .extractDataStateAt (ctx , containerID , dataDir , pgVersion )
739
+ extractedDataStateAt , err := p .extractDataStateAt (ctx , containerID , dataDir , pgVersion , defaultDSA )
644
740
if err != nil {
645
741
if defaultDSA == "" {
646
742
return errors .Wrap (err , `failed to extract dataStateAt` )
@@ -748,26 +844,50 @@ func (p *PhysicalInitial) checkRecovery(ctx context.Context, containerID string)
748
844
return output , err
749
845
}
750
846
751
- func (p * PhysicalInitial ) extractDataStateAt (ctx context.Context , containerID , dataDir string , pgVersion float64 ) (string , error ) {
847
+ func (p * PhysicalInitial ) extractDataStateAt (ctx context.Context , containerID , dataDir string , pgVersion float64 ,
848
+ defaultDSA string ) (string , error ) {
752
849
output , err := p .getLastXActReplayTimestamp (ctx , containerID )
850
+ if err != nil {
851
+ log .Dbg ("unable to get last replay timestamp from the promotion container: " , err )
852
+ }
753
853
754
- if output == "" {
755
- log .Msg ("The last replay timestamp not found. Extract the last checkpoint timestamp" )
854
+ if output != "" && err == nil {
855
+ return output , nil
856
+ }
756
857
757
- response , err := pgtool .ReadControlData (ctx , p .dockerClient , containerID , dataDir , pgVersion )
758
- if err != nil {
759
- return "" , errors .Wrap (err , "failed to read control data" )
760
- }
858
+ if defaultDSA != "" {
859
+ log .Msg ("failed to extract dataStateAt. Use value from the sync instance: " , defaultDSA )
761
860
762
- defer response .Close ()
861
+ return defaultDSA , nil
862
+ }
763
863
764
- output , err = getCheckPointTimestamp ( ctx , response . Reader )
765
- if err != nil {
766
- return "" , errors . Wrap ( err , "failed to read control data" )
767
- }
864
+ // If the sync instance has not yet downloaded WAL when retrieving the default DSA, run it again.
865
+ dsa , err := p . getDSAFromWAL ( ctx , pgVersion , containerID , dataDir )
866
+ if err != nil {
867
+ log . Dbg ( "cannot extract DSA from WAL files in the promotion container: " , err )
768
868
}
769
869
770
- return output , err
870
+ if dsa != "" {
871
+ log .Msg ("Use dataStateAt value from the promotion WAL files: " , defaultDSA )
872
+
873
+ return dsa , nil
874
+ }
875
+
876
+ log .Msg ("The last replay timestamp and dataStateAt from the sync instance are not found. Extract the last checkpoint timestamp" )
877
+
878
+ response , err := pgtool .ReadControlData (ctx , p .dockerClient , containerID , dataDir , pgVersion )
879
+ if err != nil {
880
+ return "" , errors .Wrap (err , "failed to read control data" )
881
+ }
882
+
883
+ defer response .Close ()
884
+
885
+ output , err = getCheckPointTimestamp (ctx , response .Reader )
886
+ if err != nil {
887
+ return "" , errors .Wrap (err , "failed to read control data" )
888
+ }
889
+
890
+ return output , nil
771
891
}
772
892
773
893
func (p * PhysicalInitial ) getLastXActReplayTimestamp (ctx context.Context , containerID string ) (string , error ) {
0 commit comments