diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/__init__.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/__init__.py new file mode 100644 index 0000000000..d7210a4422 --- /dev/null +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from .check import health_check diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/access_relate.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/access_relate.py new file mode 100644 index 0000000000..0b788353f6 --- /dev/null +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/access_relate.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from typing import List + +from django.utils.translation import ugettext_lazy as _ + +from backend.db_meta.enums import InstanceInnerRole, TenDBClusterSpiderRole +from backend.db_meta.models import Cluster +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.check_response import CheckResponse +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.decorator import checker_wrapper +from backend.db_report.enums import MetaCheckSubType + + +@checker_wrapper +def _cluster_spider_access_remote(c: Cluster) -> List[CheckResponse]: + """ + master spider 只能访问 remote master + slave spider 只能访问 remote slave + mnt master spider 只能访问 remote master + mnt slave spider 只能访问 remote slave + """ + bad = [] + for pi in c.proxyinstance_set.all(): + if pi.tendbclusterspiderext.spider_role in [ + TenDBClusterSpiderRole.SPIDER_MASTER, + TenDBClusterSpiderRole.SPIDER_MNT, + ]: + can_access_remote_role = InstanceInnerRole.MASTER + elif pi.tendbclusterspiderext.spider_role in [ + TenDBClusterSpiderRole.SPIDER_SLAVE, + TenDBClusterSpiderRole.SPIDER_SLAVE_MNT, + ]: + can_access_remote_role = InstanceInnerRole.SLAVE + else: + continue + + for si in pi.storageinstance.all(): + if si.instance_inner_role != can_access_remote_role: + bad.append( + CheckResponse( + msg=_( + "{} 关联到 {}: {}".format( + pi.tendbclusterspiderext.spider_role, si.instance_inner_role, si.ip_port + ) + ), + check_subtype=MetaCheckSubType.ClusterTopo, + instance=pi, + ) + ) + + return bad diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/check.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/check.py new file mode 100644 index 0000000000..34fac95e33 --- /dev/null +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/check.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from typing import List + +from backend.db_meta.enums import ClusterType +from backend.db_meta.models import Cluster +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.check_response import CheckResponse +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbcluster.access_relate import ( + _cluster_spider_access_remote, +) +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbcluster.entry_bind import ( + _cluster_entry_on_spider, + _cluster_entry_on_storage, +) +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbcluster.status import ( + _cluster_master_remote_count, + _cluster_master_spider_count, +) +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbha.replicate import ( + cluster_master_as_ejector, + cluster_replicate_out, + cluster_slave_as_receiver, +) +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbha.status import ( + cluster_instance_status, + cluster_master_entry_count, + cluster_master_status, + cluster_standby_slave_status, + cluster_status, +) +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbha.unique_cluster import ( + cluster_instance_unique_cluster, +) + + +def health_check(cluster_id: int) -> List[CheckResponse]: + """ + 集群状态正常 + 主入口数 >= 1 + 主 spider >= 2 + master 状态正常 + master 实例数和分片数一致 + 每个 master 实例唯一 standby slave + standby slave 状态正常 + 主/从入口 bind 的 spider 必须和正常 spider 数量一致 + master spider 只能访问 remote master + slave spider 只能访问 remote slave + mnt master spider 只能访问 remote master + mnt slave spider 只能访问 remote slave + master 只能作为 ejector + slave 只能作为 receiver + 不允许有到集群外部的同步关系 + """ + qs = Cluster.objects.filter(cluster_type=ClusterType.TenDBCluster).prefetch_related( + "clusterentry_set__proxyinstance_set", + "clusterentry_set__storageinstance_set", + "proxyinstance_set__storageinstance", + "storageinstance_set__as_receiver__ejector__cluster", + "storageinstance_set__as_ejector__receiver__cluster", + "storageinstance_set__cluster", + "proxyinstance_set__cluster", + "tendbclusterstorageset_set", + ) + cluster_obj = qs.get(id=cluster_id) + + res = [] + # unique + res.extend(cluster_instance_unique_cluster(cluster_obj)) + # status + res.extend(cluster_status(cluster_obj)) + res.extend(cluster_instance_status(cluster_obj)) + res.extend(cluster_master_entry_count(cluster_obj)) + res.extend(_cluster_master_spider_count(cluster_obj)) + res.extend(cluster_master_status(cluster_obj)) + res.extend(_cluster_master_remote_count(cluster_obj)) + res.extend(cluster_standby_slave_status(cluster_obj)) + # bind + res.extend(_cluster_entry_on_spider(cluster_obj)) + res.extend(_cluster_entry_on_storage(cluster_obj)) + # access relate + res.extend(_cluster_spider_access_remote(cluster_obj)) + # replicate + res.extend(cluster_master_as_ejector(cluster_obj)) + res.extend(cluster_slave_as_receiver(cluster_obj)) + res.extend(cluster_replicate_out(cluster_obj)) + return res diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/entry_bind.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/entry_bind.py new file mode 100644 index 0000000000..4f7209a9f8 --- /dev/null +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/entry_bind.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from typing import List + +from django.utils.translation import ugettext_lazy as _ + +from backend.db_meta.enums import ClusterEntryRole, InstancePhase, InstanceStatus, TenDBClusterSpiderRole +from backend.db_meta.models import Cluster +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.check_response import CheckResponse +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.decorator import checker_wrapper +from backend.db_report.enums import MetaCheckSubType + + +@checker_wrapper +def _cluster_entry_on_spider(c: Cluster) -> List[CheckResponse]: + """ + 访问入口 bind 到 spider 的数量必须和集群正常 spider 相等 + """ + bad = [] + for ce in c.clusterentry_set.all(): + + if ce.role == ClusterEntryRole.MASTER_ENTRY: + spider_role = TenDBClusterSpiderRole.SPIDER_MASTER + else: + spider_role = TenDBClusterSpiderRole.SPIDER_SLAVE + + cnt = 0 + for pi in c.proxyinstance_set.all(): + if ( + pi.status == InstanceStatus.RUNNING + and pi.phase == InstancePhase.ONLINE + and pi.tendbclusterspiderext.spider_role == spider_role + ): + cnt += 1 + + if cnt != ce.proxyinstance_set.count(): + bad.append( + CheckResponse( + msg=_("访问入口 {} 关联 {} 和集群 {} 数量不相等".format(ce.entry, spider_role, spider_role)), + check_subtype=MetaCheckSubType.ClusterTopo, + ) + ) + + return bad + + +@checker_wrapper +def _cluster_entry_on_storage(c: Cluster) -> List[CheckResponse]: + """ + 访问入口不能 bind 到存储 + """ + bad = [] + for ce in c.clusterentry_set.all(): + for si in ce.storageinstance_set.all(): + bad.append( + CheckResponse( + msg=_("访问入口 {} 关联到存储实例".format(ce.entry)), check_subtype=MetaCheckSubType.ClusterTopo, instance=si + ) + ) + + return bad diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/status.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/status.py new file mode 100644 index 0000000000..12d067bacd --- /dev/null +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbcluster/status.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from typing import List + +from django.utils.translation import gettext_lazy as _ + +from backend.db_meta.enums import InstanceInnerRole, InstancePhase, InstanceStatus, TenDBClusterSpiderRole +from backend.db_meta.models import Cluster +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.check_response import CheckResponse +from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.decorator import checker_wrapper +from backend.db_report.enums import MetaCheckSubType + + +@checker_wrapper +def _cluster_master_spider_count(c: Cluster) -> List[CheckResponse]: + """ + 至少 2 个存活的 master spider + """ + cnt = 0 + for pi in c.proxyinstance_set.all(): + if ( + pi.status == InstanceStatus.RUNNING + and pi.phase == InstancePhase.ONLINE + and pi.tendbclusterspiderext.spider_role == TenDBClusterSpiderRole.SPIDER_MASTER + ): + cnt += 1 + + bad = [] + if cnt < 2: + bad.append(CheckResponse(msg=_("正常 spider master 不足 2 个"), check_subtype=MetaCheckSubType.ClusterTopo)) + + return bad + + +@checker_wrapper +def _cluster_master_remote_count(c: Cluster) -> List[CheckResponse]: + """ + master remote 数量等于分片数 + """ + bad = [] + + remote_master_count = 0 + for si in c.storageinstance_set.all(): + if si.instance_inner_role == InstanceInnerRole.MASTER: + remote_master_count += 1 + + shard_count = c.tendbclusterstorageset_set.count() + if shard_count != remote_master_count: + bad.append( + CheckResponse( + msg=_("分片数 {} != remote master 数 {}".format(shard_count, remote_master_count)), + check_subtype=MetaCheckSubType.ClusterTopo, + ) + ) + + return bad + + +@checker_wrapper +def _cluster_one_standby_slave_each_shard(c: Cluster) -> List[CheckResponse]: + """ + 每个 shard 的 standby slave 是唯一的 + """ + bad = [] + + for si in c.storageinstance_set.all(): + if si.instance_inner_role == InstanceInnerRole.MASTER: + m = [] + for tp in si.as_ejector.all(): + if tp.receiver.is_stand_by: + m.append(tp.receiver) + + if len(m) <= 0: + bad.append( + CheckResponse( + msg=_("无 standby slave"), + check_subtype=MetaCheckSubType.ClusterTopo, + instance=si, + ) + ) + + if len(m) > 1: + bad.append( + CheckResponse( + msg=_("standby slave 多余 1 个: {}".format(",".join([ele.ip_port for ele in m]))), + check_subtype=MetaCheckSubType.ClusterTopo, + instance=si, + ) + ) + + return bad diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/check.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/check.py index bd87aa90bc..de1c5d6cc5 100644 --- a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/check.py +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/check.py @@ -22,22 +22,22 @@ _cluster_master_entry_on_storage, ) from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbha.replicate import ( - _cluster_master_as_ejector, - _cluster_replicate_out, - _cluster_slave_as_receiver, + cluster_master_as_ejector, + cluster_replicate_out, + cluster_slave_as_receiver, ) from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbha.status import ( - _cluster_instance_status, - _cluster_master_entry_count, - _cluster_master_status, - _cluster_one_master, - _cluster_one_standby_slave, _cluster_proxy_count, - _cluster_standby_slave_status, - _cluster_status, + cluster_instance_status, + cluster_master_entry_count, + cluster_master_status, + cluster_one_master, + cluster_one_standby_slave, + cluster_standby_slave_status, + cluster_status, ) from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbha.unique_cluster import ( - _cluster_instance_unique_cluster, + cluster_instance_unique_cluster, ) @@ -72,24 +72,25 @@ def health_check(cluster_id: int) -> List[CheckResponse]: res = [] # unique_cluster.py - res.extend(_cluster_instance_unique_cluster(cluster_obj)) + res.extend(cluster_instance_unique_cluster(cluster_obj)) # status.py - res.extend(_cluster_status(cluster_obj)) - res.extend(_cluster_instance_status(cluster_obj)) - res.extend(_cluster_master_entry_count(cluster_obj)) + res.extend(cluster_status(cluster_obj)) + res.extend(cluster_instance_status(cluster_obj)) + res.extend(cluster_master_entry_count(cluster_obj)) res.extend(_cluster_proxy_count(cluster_obj)) - res.extend(_cluster_one_master(cluster_obj)) - res.extend(_cluster_master_status(cluster_obj)) - res.extend(_cluster_one_standby_slave(cluster_obj)) - res.extend(_cluster_standby_slave_status(cluster_obj)) + res.extend(cluster_one_master(cluster_obj)) + res.extend(cluster_master_status(cluster_obj)) + res.extend(cluster_one_standby_slave(cluster_obj)) + res.extend(cluster_standby_slave_status(cluster_obj)) # entry_bind.py res.extend(_cluster_master_entry_on_proxy(cluster_obj)) res.extend(_cluster_master_entry_on_storage(cluster_obj)) res.extend(_cluster_entry_real_bind(cluster_obj)) + # access_relate.py res.extend(_cluster_proxy_access_master(cluster_obj)) # replicate.py - res.extend(_cluster_master_as_ejector(cluster_obj)) - res.extend(_cluster_slave_as_receiver(cluster_obj)) - res.extend(_cluster_replicate_out(cluster_obj)) + res.extend(cluster_master_as_ejector(cluster_obj)) + res.extend(cluster_slave_as_receiver(cluster_obj)) + res.extend(cluster_replicate_out(cluster_obj)) return res diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/replicate.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/replicate.py index a3e49b7115..a0510ecf52 100644 --- a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/replicate.py +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/replicate.py @@ -20,7 +20,7 @@ @checker_wrapper -def _cluster_master_as_ejector(c: Cluster) -> List[CheckResponse]: +def cluster_master_as_ejector(c: Cluster) -> List[CheckResponse]: """ master 只能是 ejector """ @@ -40,7 +40,7 @@ def _cluster_master_as_ejector(c: Cluster) -> List[CheckResponse]: @checker_wrapper -def _cluster_slave_as_receiver(c: Cluster) -> List[CheckResponse]: +def cluster_slave_as_receiver(c: Cluster) -> List[CheckResponse]: """ slave 只能是 receiver """ @@ -60,7 +60,7 @@ def _cluster_slave_as_receiver(c: Cluster) -> List[CheckResponse]: @checker_wrapper -def _cluster_replicate_out(c: Cluster) -> List[CheckResponse]: +def cluster_replicate_out(c: Cluster) -> List[CheckResponse]: """ 不能同步到集群外部 """ diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/status.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/status.py index b5111515ae..4dfbd1429b 100644 --- a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/status.py +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/status.py @@ -21,18 +21,21 @@ @checker_wrapper -def _cluster_status(c: Cluster) -> List[CheckResponse]: +def cluster_status(c: Cluster) -> List[CheckResponse]: + bad = [] if c.status != ClusterStatus.NORMAL: - return [ + bad.append( CheckResponse( msg=_("集群状态异常: {}".format(c.status)), check_subtype=MetaCheckSubType.ClusterTopo, ) - ] + ) + + return bad @checker_wrapper -def _cluster_instance_status(c: Cluster) -> List[CheckResponse]: +def cluster_instance_status(c: Cluster) -> List[CheckResponse]: bad = [] for si in c.storageinstance_set.all(): if si.status != InstanceStatus.RUNNING or si.phase != InstancePhase.ONLINE: @@ -58,7 +61,7 @@ def _cluster_instance_status(c: Cluster) -> List[CheckResponse]: @checker_wrapper -def _cluster_master_entry_count(c: Cluster) -> List[CheckResponse]: +def cluster_master_entry_count(c: Cluster) -> List[CheckResponse]: """ 至少 1 个主访问入口 """ @@ -67,8 +70,11 @@ def _cluster_master_entry_count(c: Cluster) -> List[CheckResponse]: if ce.role == ClusterEntryRole.MASTER_ENTRY: cnt += 1 + bad = [] if cnt <= 0: - return [CheckResponse(msg=_("缺少主访问入口"), check_subtype=MetaCheckSubType.ClusterTopo)] + bad.append(CheckResponse(msg=_("缺少主访问入口"), check_subtype=MetaCheckSubType.ClusterTopo)) + + return bad @checker_wrapper @@ -81,32 +87,38 @@ def _cluster_proxy_count(c: Cluster) -> List[CheckResponse]: if pi.status == InstanceStatus.RUNNING and pi.phase == InstancePhase.ONLINE: cnt += 1 + bad = [] if cnt < 2: - return [CheckResponse(msg=_("正常 proxy 不足 2 个"), check_subtype=MetaCheckSubType.ClusterTopo)] + bad.append(CheckResponse(msg=_("正常 proxy 不足 2 个"), check_subtype=MetaCheckSubType.ClusterTopo)) + + return bad @checker_wrapper -def _cluster_one_master(c: Cluster) -> List[CheckResponse]: +def cluster_one_master(c: Cluster) -> List[CheckResponse]: """只能有一个 master""" m = [] for si in c.storageinstance_set.all(): if si.instance_inner_role == InstanceInnerRole.MASTER: m.append(si) + bad = [] if len(m) <= 0: - return [CheckResponse(msg=_("无 master 实例"), check_subtype=MetaCheckSubType.ClusterTopo)] + bad.append(CheckResponse(msg=_("无 master 实例"), check_subtype=MetaCheckSubType.ClusterTopo)) if len(m) > 1: - return [ + bad.append( CheckResponse( msg=_("master 多余 1 个: {}".format(",".join([ele.ip_port for ele in m]))), check_subtype=MetaCheckSubType.ClusterTopo, ) - ] + ) + + return bad @checker_wrapper -def _cluster_master_status( +def cluster_master_status( c: Cluster, ) -> List[CheckResponse]: """ @@ -124,12 +136,15 @@ def _cluster_master_status( ): cnt += 1 + bad = [] if cnt <= 0: - return [CheckResponse(msg=_("无正常 master"), check_subtype=MetaCheckSubType.ClusterTopo)] + bad.append(CheckResponse(msg=_("无正常 master"), check_subtype=MetaCheckSubType.ClusterTopo)) + + return bad @checker_wrapper -def _cluster_one_standby_slave( +def cluster_one_standby_slave( c: Cluster, ) -> List[CheckResponse]: """ @@ -140,20 +155,23 @@ def _cluster_one_standby_slave( if si.instance_inner_role == InstanceInnerRole.SLAVE and si.is_stand_by is True: m.append(si) + bad = [] if len(m) <= 0: - return [CheckResponse(msg=_("无 standby slave"), check_subtype=MetaCheckSubType.ClusterTopo)] + bad.append(CheckResponse(msg=_("无 standby slave"), check_subtype=MetaCheckSubType.ClusterTopo)) if len(m) > 1: - return [ + bad.append( CheckResponse( msg=_("standby slave 多余 1 个: {}".format(",".join([ele.ip_port for ele in m]))), check_subtype=MetaCheckSubType.ClusterTopo, ) - ] + ) + + return bad @checker_wrapper -def _cluster_standby_slave_status(c: Cluster) -> List[CheckResponse]: +def cluster_standby_slave_status(c: Cluster) -> List[CheckResponse]: """ standby slave 必须正常 """ diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/unique_cluster.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/unique_cluster.py index 6744cd46b5..3d7f7aa46d 100644 --- a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/unique_cluster.py +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/mysql_cluster_topo/tendbha/unique_cluster.py @@ -19,7 +19,7 @@ @checker_wrapper -def _cluster_instance_unique_cluster(c: Cluster) -> List[CheckResponse]: +def cluster_instance_unique_cluster(c: Cluster) -> List[CheckResponse]: """ 实例只能属于一个集群 """ diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/sqlserver_cluster_topo/check.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/sqlserver_cluster_topo/check.py index e72aa44e43..02e545784c 100644 --- a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/sqlserver_cluster_topo/check.py +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/sqlserver_cluster_topo/check.py @@ -13,19 +13,19 @@ from backend.db_meta.models import Cluster from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.check_response import CheckResponse from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbha.replicate import ( - _cluster_master_as_ejector, - _cluster_slave_as_receiver, + cluster_master_as_ejector, + cluster_slave_as_receiver, ) from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbha.status import ( - _cluster_master_entry_count, - _cluster_master_status, - _cluster_one_master, - _cluster_one_standby_slave, - _cluster_standby_slave_status, - _cluster_status, + cluster_master_entry_count, + cluster_master_status, + cluster_one_master, + cluster_one_standby_slave, + cluster_standby_slave_status, + cluster_status, ) from backend.db_periodic_task.local_tasks.db_meta.db_meta_check.mysql_cluster_topo.tendbha.unique_cluster import ( - _cluster_instance_unique_cluster, + cluster_instance_unique_cluster, ) @@ -53,25 +53,25 @@ def sqlserver_dbmeta_check(cluster_id: int) -> List[CheckResponse]: res = [] for cluster_obj in clusters: # 检查集群状态 - res.extend(_cluster_status(cluster_obj)) + res.extend(cluster_status(cluster_obj)) # 实例有且只有属于一个集群 - res.extend(_cluster_instance_unique_cluster(cluster_obj)) + res.extend(cluster_instance_unique_cluster(cluster_obj)) # 主入口数 >= 1 - res.extend(_cluster_master_entry_count(cluster_obj)) + res.extend(cluster_master_entry_count(cluster_obj)) # 如果是ha架构,则需要检测下面子项 if cluster_obj.cluster_type == ClusterType.SqlserverHA: # 唯一 master - res.extend(_cluster_one_master(cluster_obj)) + res.extend(cluster_one_master(cluster_obj)) # master 状态 - res.extend(_cluster_master_status(cluster_obj)) + res.extend(cluster_master_status(cluster_obj)) # 唯一 standby slave - res.extend(_cluster_one_standby_slave(cluster_obj)) + res.extend(cluster_one_standby_slave(cluster_obj)) # standby slave 状态正常 - res.extend(_cluster_standby_slave_status(cluster_obj)) + res.extend(cluster_standby_slave_status(cluster_obj)) # master 只能作为 ejector - res.extend(_cluster_master_as_ejector(cluster_obj)) + res.extend(cluster_master_as_ejector(cluster_obj)) # slave 只能作为 receiver - res.extend(_cluster_slave_as_receiver(cluster_obj)) + res.extend(cluster_slave_as_receiver(cluster_obj)) return res diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/task.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/task.py index 5f422894cb..8d8a897680 100644 --- a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/task.py +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/task.py @@ -18,7 +18,7 @@ from backend.db_report.models import MetaCheckReport from .check_redis_instance import check_redis_instance -from .mysql_cluster_topo.tendbha import health_check +from .mysql_cluster_topo import tendbcluster, tendbha from .sqlserver_cluster_topo.check import sqlserver_dbmeta_check logger = logging.getLogger("celery") @@ -36,7 +36,15 @@ def db_meta_check_task(): def tendbha_topo_daily_check(): for c in Cluster.objects.filter(cluster_type=ClusterType.TenDBHA): r: MetaCheckReport - for r in health_check(c.id): + for r in tendbha.health_check(c.id): + r.save() + + +@register_periodic_task(run_every=crontab(hour=2, minute=30)) +def tendbcluster_topo_daily_check(): + for c in Cluster.objects.filter(cluster_type=ClusterType.TenDBCluster): + r: MetaCheckReport + for r in tendbcluster.health_check(c.id): r.save()