139
139
DAYS ,
140
140
)
141
141
142
+ PRIMARY_FORMATION = " --formation primary "
142
143
FIELD_DELIMITER = "-"
143
144
WAITING_POSTGRESQL_READY_COMMAND = ["pgtools" , "-a" ]
144
145
INIT_FINISH_MESSAGE = "init postgresql finish"
@@ -1781,7 +1782,7 @@ def delete_postgresql(
1781
1782
autofailover_switchover (meta , spec , patch , status , logger )
1782
1783
cmd = ["pgtools" , "-D" ]
1783
1784
output = exec_command (conn , cmd , logger , interrupt = False )
1784
- if output .find ("ERROR " ) != - 1 :
1785
+ if output .find ("drop auto_failover failed " ) != - 1 :
1785
1786
logger .error ("can't delete postgresql instance " + output )
1786
1787
else :
1787
1788
cmd = ["pgtools" , "-R" ]
@@ -2509,6 +2510,7 @@ async def create_cluster(
2509
2510
# wait a few seconds to prevent the pod not running
2510
2511
time .sleep (5 )
2511
2512
# cluster running
2513
+ update_number_sync_standbys (meta , spec , patch , status , logger )
2512
2514
set_cluster_status (meta , CLUSTER_CREATE_CLUSTER , CLUSTER_STATUS_RUN ,
2513
2515
logger )
2514
2516
except Exception as e :
@@ -2788,6 +2790,37 @@ async def timer_cluster(
2788
2790
await correct_postgresql_password (meta , spec , patch , status , logger )
2789
2791
await correct_keepalived (meta , spec , patch , status , logger )
2790
2792
2793
+ def update_number_sync_standbys (
2794
+ meta : kopf .Meta ,
2795
+ spec : kopf .Spec ,
2796
+ patch : kopf .Patch ,
2797
+ status : kopf .Status ,
2798
+ logger : logging .Logger ,
2799
+ ) -> None :
2800
+ mode , autofailover_replicas , readwrite_replicas , readonly_replicas = get_replicas (
2801
+ spec )
2802
+
2803
+ pg_nodes = readwrite_replicas + readonly_replicas
2804
+ number_sync = readwrite_replicas + readonly_replicas if spec [POSTGRESQL ][READONLYINSTANCE ][STREAMING ] == STREAMING_SYNC else readwrite_replicas
2805
+ expect_number = number_sync - 2
2806
+ if expect_number < 0 :
2807
+ expect_number = 0
2808
+
2809
+ if pg_nodes >= 2 :
2810
+ autofailover_conns = connections (spec , meta , patch ,
2811
+ get_field (AUTOFAILOVER ), False ,
2812
+ None , logger , None , status , False )
2813
+ cmd = [
2814
+ "pgtools" , "-S" ,
2815
+ "' formation number-sync-standbys " + str (expect_number ) + PRIMARY_FORMATION + "'"
2816
+ ]
2817
+ logger .info (f"set number-sync-standbys with cmd { cmd } " )
2818
+ output = exec_command (autofailover_conns .get_conns ()[0 ], cmd , logger , interrupt = False )
2819
+ if output .find (SUCCESS ) == - 1 :
2820
+ logger .error (
2821
+ f"set number-sync-standbys failed { cmd } { output } " )
2822
+ autofailover_conns .free_conns ()
2823
+
2791
2824
2792
2825
def update_streaming (
2793
2826
meta : kopf .Meta ,
@@ -2799,20 +2832,27 @@ def update_streaming(
2799
2832
FIELD : Tuple ,
2800
2833
OLD : Any ,
2801
2834
NEW : Any ,
2802
- ) -> None :
2835
+ ) -> bool :
2836
+ need_update_number_sync_standbys = False
2803
2837
if FIELD == DIFF_FIELD_STREAMING :
2804
2838
if AC != DIFF_CHANGE :
2805
2839
logger .error (DIFF_FIELD_STREAMING + " only support " + DIFF_CHANGE )
2806
2840
else :
2807
2841
#pg_autoctl set node replication-quorum 0 --pgdata /var/lib/postgresql/data/pg_data/
2808
2842
if NEW == STREAMING_SYNC :
2809
2843
quorum = 1
2844
+ need_update_number_sync_standbys = True
2810
2845
elif NEW == STREAMING_ASYNC :
2811
2846
quorum = 0
2847
+ # must set number before set async
2848
+ logger .info ("waiting for update_cluster success on readonly treaming" )
2849
+ waiting_cluster_final_status (meta , spec , patch , status , logger )
2850
+ update_number_sync_standbys (meta , spec , patch , status , logger )
2812
2851
cmd = [
2813
2852
"pgtools" , "-S" ,
2814
2853
"'node replication-quorum " + str (quorum ) + "'"
2815
2854
]
2855
+ logger .info (f"set readonly streaming with cmd { cmd } " )
2816
2856
conns = connections (spec , meta , patch ,
2817
2857
get_field (POSTGRESQL , READONLYINSTANCE ), False ,
2818
2858
None , logger , None , status , False )
@@ -2823,6 +2863,8 @@ def update_streaming(
2823
2863
f"set readonly streaming failed { cmd } { output } " )
2824
2864
conns .free_conns ()
2825
2865
2866
+ return need_update_number_sync_standbys
2867
+
2826
2868
2827
2869
def postgresql_action (
2828
2870
meta : kopf .Meta ,
@@ -3077,7 +3119,8 @@ def update_replicas(
3077
3119
FIELD : Tuple ,
3078
3120
OLD : Any ,
3079
3121
NEW : Any ,
3080
- ) -> None :
3122
+ ) -> bool :
3123
+ need_update_number_sync_standbys = False
3081
3124
if FIELD == DIFF_FIELD_READWRITE_REPLICAS :
3082
3125
if AC != DIFF_CHANGE :
3083
3126
#raise kopf.TemporaryError("Exception when calling list_pod_for_all_namespaces: %s\n" % e)
@@ -3093,6 +3136,7 @@ def update_replicas(
3093
3136
meta , spec , patch , status , logger ,
3094
3137
get_field (POSTGRESQL , READWRITEINSTANCE ), None , [NEW , OLD ],
3095
3138
True )
3139
+ need_update_number_sync_standbys = True
3096
3140
3097
3141
if FIELD == DIFF_FIELD_READWRITE_MACHINES :
3098
3142
if AC != DIFF_CHANGE :
@@ -3109,8 +3153,10 @@ def update_replicas(
3109
3153
meta , spec , patch , status , logger ,
3110
3154
get_field (POSTGRESQL , READWRITEINSTANCE ),
3111
3155
[i for i in OLD if i not in NEW ], None , True )
3112
- delete_services (meta , spec , patch , status , logger )
3113
- create_services (meta , spec , patch , status , logger )
3156
+ delete_services (meta , spec , patch , status , logger )
3157
+ create_services (meta , spec , patch , status , logger )
3158
+
3159
+ need_update_number_sync_standbys = True
3114
3160
3115
3161
if FIELD == DIFF_FIELD_READONLY_REPLICAS :
3116
3162
if NEW > OLD :
@@ -3120,6 +3166,7 @@ def update_replicas(
3120
3166
delete_postgresql_readonly (meta , spec , patch , status , logger ,
3121
3167
get_field (POSTGRESQL , READONLYINSTANCE ),
3122
3168
None , [NEW , OLD ], True )
3169
+ need_update_number_sync_standbys = True
3123
3170
3124
3171
if FIELD == DIFF_FIELD_READONLY_MACHINES :
3125
3172
if OLD == None or (NEW != None and len (NEW ) > len (OLD )):
@@ -3140,6 +3187,10 @@ def update_replicas(
3140
3187
delete_services (meta , spec , patch , status , logger )
3141
3188
create_services (meta , spec , patch , status , logger )
3142
3189
3190
+ need_update_number_sync_standbys = True
3191
+
3192
+ return need_update_number_sync_standbys
3193
+
3143
3194
3144
3195
def delete_services (
3145
3196
meta : kopf .Meta ,
@@ -3563,6 +3614,7 @@ async def update_cluster(
3563
3614
logger .info ("check update_cluster params" )
3564
3615
check_param (spec , logger , create = False )
3565
3616
need_roll_update = False
3617
+ need_update_number_sync_standbys = False
3566
3618
3567
3619
for diff in diffs :
3568
3620
AC = diff [0 ]
@@ -3583,8 +3635,10 @@ async def update_cluster(
3583
3635
OLD = diff [2 ]
3584
3636
NEW = diff [3 ]
3585
3637
3586
- update_replicas (meta , spec , patch , status , logger , AC , FIELD , OLD ,
3638
+ return_update_number_sync_standbys = update_replicas (meta , spec , patch , status , logger , AC , FIELD , OLD ,
3587
3639
NEW )
3640
+ if need_update_number_sync_standbys == False and return_update_number_sync_standbys == True :
3641
+ need_update_number_sync_standbys = True
3588
3642
update_podspec_volume (meta , spec , patch , status , logger , AC , FIELD ,
3589
3643
OLD , NEW )
3590
3644
if FIELD [0 :len (DIFF_FIELD_SPEC_ANTIAFFINITY
@@ -3606,14 +3660,20 @@ async def update_cluster(
3606
3660
update_hbas (meta , spec , patch , status , logger , AC , FIELD , OLD , NEW )
3607
3661
update_users (meta , spec , patch , status , logger , AC , FIELD , OLD ,
3608
3662
NEW )
3609
- update_streaming (meta , spec , patch , status , logger , AC , FIELD , OLD ,
3663
+ return_update_number_sync_standbys = update_streaming (meta , spec , patch , status , logger , AC , FIELD , OLD ,
3610
3664
NEW )
3665
+ if need_update_number_sync_standbys == False and return_update_number_sync_standbys == True :
3666
+ need_update_number_sync_standbys = True
3611
3667
update_configs (meta , spec , patch , status , logger , AC , FIELD , OLD ,
3612
3668
NEW )
3613
3669
3614
3670
logger .info ("waiting for update_cluster success" )
3615
3671
waiting_cluster_final_status (meta , spec , patch , status , logger )
3616
3672
3673
+ # after waiting_cluster_final_status. update number_sync
3674
+ if need_update_number_sync_standbys :
3675
+ update_number_sync_standbys (meta , spec , patch , status , logger )
3676
+
3617
3677
# wait a few seconds to prevent the pod not running
3618
3678
time .sleep (5 )
3619
3679
if spec [ACTION ] == ACTION_STOP :
0 commit comments