Skip to content

Commit ebdeb30

Browse files
ybyanboer
yb
authored andcommitted
do not update when the cluster unhealth
1 parent ab9bec4 commit ebdeb30

File tree

6 files changed

+100
-10
lines changed

6 files changed

+100
-10
lines changed

docs/index.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ entries:
33
postgres-operator:
44
- apiVersion: v2
55
appVersion: v1.1.1
6-
created: "2022-11-14T13:27:14.428332408+08:00"
6+
created: "2022-11-30T12:43:03.631056582+08:00"
77
description: A Helm chart for Kubernetes
8-
digest: 0e589d8472e5ab40c4286cc36ca96f773088b38916bcd753792711429991ba65
8+
digest: bf36d07d7d5392f495105b37e7c3de1da247b99b66fef0d0afd52203262ef05d
99
home: https://github.com/radondb/multi-platform-postgresql
1010
keywords:
1111
- operator
@@ -27,7 +27,7 @@ entries:
2727
version: v1.0.1
2828
- apiVersion: v2
2929
appVersion: v1.1.0
30-
created: "2022-11-14T13:27:14.427794075+08:00"
30+
created: "2022-11-30T12:43:03.630493625+08:00"
3131
description: A Helm chart for Kubernetes
3232
digest: 1f6593413ae373c5caa7b8c41a3cd3697fbb164ba417260a3fefa6d32a1852e8
3333
home: https://github.com/radondb/multi-platform-postgresql
@@ -49,4 +49,4 @@ entries:
4949
urls:
5050
- https://radondb.github.io/multi-platform-postgresql/postgres-operator-v1.0.0.tgz
5151
version: v1.0.0
52-
generated: "2022-11-14T13:27:14.427097554+08:00"
52+
generated: "2022-11-30T12:43:03.629722869+08:00"

docs/postgres-operator-v1.0.1.tgz

110 Bytes
Binary file not shown.

platforms/kubernetes/postgres-operator/deploy/postgres-operator.yaml.template

+19-2
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,26 @@ spec:
3333
name: State
3434
type: string
3535
priority: 0 # show in standard view
36+
- jsonPath: .spec.updatetoleration
37+
description: update disable when the cluster status is unhealthy
38+
name: Updatetoleration
39+
type: boolean
40+
priority: 1 # show in wide view
41+
- jsonPath: .spec.volume_type
42+
description: if volume type is local, rebuild pvc during rolling upgrade
43+
name: Volumetype
44+
type: string
45+
priority: 1 # show in wide view
3646
- jsonPath: .spec.autofailover.podspec.containers[0].image
3747
description: The autofailover image
3848
name: FailoverImage
3949
type: string
40-
priority: 1 # show in standard view
50+
priority: 1 # show in wide view
4151
- jsonPath: .spec.postgresql.readwriteinstance.podspec.containers[0].image
4252
description: The postgresql image
4353
name: PostgresqlImage
4454
type: string
45-
priority: 1 # show in standard view
55+
priority: 1 # show in wide view
4656
- jsonPath: .spec.postgresql.readwriteinstance.replicas
4757
description: The readwriteinstance nodes
4858
name: RWnodes
@@ -93,11 +103,18 @@ spec:
93103
enum:
94104
- true
95105
- false
106+
updatetoleration:
107+
type: boolean
108+
enum:
109+
- true
110+
- false
111+
default: false
96112
volume_type:
97113
type: string
98114
enum:
99115
- 'local'
100116
- 'cloud'
117+
default: 'local'
101118
antiaffinity:
102119
type: object
103120
x-kubernetes-preserve-unknown-fields: true

platforms/kubernetes/postgres-operator/deploy/postgresql.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ metadata:
66
spec:
77
action: start #stop start
88
deletepvc: true
9-
volume_type: local # local/cloud
109
antiaffinity:
1110
policy: preferred # preferred/required
1211
podAntiAffinityTerm: autofailover-readwrite # none/autofailover-readwrite/autofailover-readwrite-readonly

platforms/kubernetes/postgres-operator/postgres/constants.py

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
STREAMING_ASYNC = "async"
3333
STREAMING_SYNC = "sync"
3434
DELETE_PVC = "deletepvc"
35+
UPDATE_TOLERATION = "updatetoleration"
3536
POSTGRESQL_PVC_NAME = "data"
3637
SUCCESS = "exec_success"
3738
FAILED = "exec_failed"

platforms/kubernetes/postgres-operator/postgres/handle.py

+76-3
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@
138138
MINUTES,
139139
HOURS,
140140
DAYS,
141+
UPDATE_TOLERATION,
141142
)
142143

143144
PGLOG_DIR = "log"
@@ -417,9 +418,12 @@ def waiting_cluster_final_status(
417418
status: kopf.Status,
418419
logger: logging.Logger,
419420
timeout: int = MINUTES * 1,
420-
) -> None:
421+
except_nodes: int = None,
422+
) -> bool:
423+
is_health = True
424+
421425
if spec[ACTION] == ACTION_STOP:
422-
return
426+
return is_health
423427

424428
# waiting for restart
425429
auto_failover_conns = connections(spec, meta, patch,
@@ -449,6 +453,7 @@ def waiting_cluster_final_status(
449453
if i >= maxtry:
450454
logger.warning(
451455
f"cluster maybe maybe not right. skip waitting.")
456+
is_health = False
452457
break
453458
output = exec_command(conn, primary_cmd, logger, interrupt=False)
454459
if output != '1':
@@ -475,6 +480,8 @@ def waiting_cluster_final_status(
475480
spec.get(POSTGRESQL).get(READWRITEINSTANCE).get(MACHINES)
476481
) + len(
477482
spec.get(POSTGRESQL).get(READONLYINSTANCE).get(MACHINES))
483+
if except_nodes is not None:
484+
total_nodes = except_nodes
478485
output = exec_command(conn, nodes_cmd, logger, interrupt=False)
479486
if output != str(total_nodes):
480487
logger.warning(
@@ -484,6 +491,7 @@ def waiting_cluster_final_status(
484491

485492
break
486493
auto_failover_conns.free_conns()
494+
return is_health
487495

488496

489497
def waiting_cluster_correct_status(
@@ -3321,7 +3329,7 @@ def update_antiaffinity(
33213329
timeout: int = MINUTES * 5,
33223330
) -> None:
33233331
# local volume
3324-
if spec.get(SPEC_VOLUME_TYPE) == SPEC_VOLUME_LOCAL:
3332+
if spec.get(SPEC_VOLUME_TYPE, 'local') == SPEC_VOLUME_LOCAL:
33253333
delete_disk = True
33263334
timeout = HOURS * 1
33273335
rolling_update(meta, spec, patch, status, logger, target_roles, exit,
@@ -3408,6 +3416,8 @@ def update_replicas(
34083416

34093417
need_update_number_sync_standbys = True
34103418

3419+
waiting_cluster_final_status(meta, spec, patch, status, logger, 1 * HOURS)
3420+
34113421
return need_update_number_sync_standbys
34123422

34133423

@@ -4019,6 +4029,48 @@ def local_create_user(OS: List,
40194029
auto_failover_conns.free_conns()
40204030

40214031

4032+
def get_except_nodes(
4033+
meta: kopf.Meta,
4034+
spec: kopf.Spec,
4035+
patch: kopf.Patch,
4036+
status: kopf.Status,
4037+
logger: logging.Logger,
4038+
diffs: kopf.Diff,
4039+
) -> int:
4040+
mode, autofailover_replicas, readwrite_replicas, readonly_replicas = get_replicas(
4041+
spec)
4042+
except_readwrite_nodes = readwrite_replicas
4043+
except_readonly_nodes = readonly_replicas
4044+
4045+
for diff in diffs:
4046+
AC = diff[0]
4047+
FIELD = diff[1]
4048+
OLD = diff[2]
4049+
NEW = diff[3]
4050+
4051+
if FIELD == DIFF_FIELD_READWRITE_REPLICAS:
4052+
if AC != DIFF_CHANGE:
4053+
logger.error(
4054+
str(DIFF_FIELD_ACTION) + " only support " + DIFF_CHANGE)
4055+
else:
4056+
except_readwrite_nodes = OLD
4057+
4058+
if FIELD == DIFF_FIELD_READWRITE_MACHINES:
4059+
if AC != DIFF_CHANGE:
4060+
logger.error(
4061+
str(DIFF_FIELD_ACTION) + " only support " + DIFF_CHANGE)
4062+
else:
4063+
except_readwrite_nodes = len(OLD)
4064+
4065+
if FIELD == DIFF_FIELD_READONLY_REPLICAS:
4066+
except_readwrite_nodes = OLD
4067+
4068+
if FIELD == DIFF_FIELD_READONLY_MACHINES:
4069+
except_readwrite_nodes = len(OLD)
4070+
4071+
return except_readwrite_nodes + except_readonly_nodes
4072+
4073+
40224074
# kubectl patch pg lzzhang --patch '{"spec": {"action": "stop"}}' --type=merge
40234075
def update_cluster(
40244076
meta: kopf.Meta,
@@ -4035,6 +4087,8 @@ def update_cluster(
40354087
check_param(spec, logger, create=False)
40364088
need_roll_update = False
40374089
need_update_number_sync_standbys = False
4090+
update_toleration = spec.get(UPDATE_TOLERATION, False)
4091+
except_nodes = get_except_nodes(meta, spec, patch, status, logger, diffs)
40384092

40394093
for diff in diffs:
40404094
AC = diff[0]
@@ -4055,10 +4109,25 @@ def update_cluster(
40554109
OLD = diff[2]
40564110
NEW = diff[3]
40574111

4112+
if update_toleration == False and waiting_cluster_final_status(meta, spec, patch, status, logger, except_nodes=except_nodes) == False:
4113+
logger.error(f"cluster status is not health.")
4114+
raise kopf.PermanentError(f"cluster status is not health.")
4115+
40584116
return_update_number_sync_standbys = update_replicas(meta, spec, patch, status, logger, AC, FIELD, OLD,
40594117
NEW)
40604118
if need_update_number_sync_standbys == False and return_update_number_sync_standbys == True:
40614119
need_update_number_sync_standbys = True
4120+
4121+
for diff in diffs:
4122+
AC = diff[0]
4123+
FIELD = diff[1]
4124+
OLD = diff[2]
4125+
NEW = diff[3]
4126+
4127+
if update_toleration == False and waiting_cluster_final_status(meta, spec, patch, status, logger) == False:
4128+
logger.error(f"cluster status is not health.")
4129+
raise kopf.PermanentError(f"cluster status is not health.")
4130+
40624131
update_podspec_volume(meta, spec, patch, status, logger, AC, FIELD,
40634132
OLD, NEW)
40644133
if FIELD[0:len(DIFF_FIELD_SPEC_ANTIAFFINITY
@@ -4077,6 +4146,10 @@ def update_cluster(
40774146
OLD = diff[2]
40784147
NEW = diff[3]
40794148

4149+
if update_toleration == False and waiting_cluster_final_status(meta, spec, patch, status, logger) == False:
4150+
logger.error(f"cluster status is not health.")
4151+
raise kopf.PermanentError(f"cluster status is not health.")
4152+
40804153
update_hbas(meta, spec, patch, status, logger, AC, FIELD, OLD, NEW)
40814154
update_users(meta, spec, patch, status, logger, AC, FIELD, OLD,
40824155
NEW)

0 commit comments

Comments
 (0)