Skip to content

Commit 14b373f

Browse files
yyhenryyyiSecloud
authored andcommitted
fix(mongodb): 修复故障自愈排除某些集群 #11242
1 parent 34bb5a5 commit 14b373f

File tree

1 file changed

+45
-2
lines changed
  • dbm-ui/backend/db_services/redis/autofix

1 file changed

+45
-2
lines changed

dbm-ui/backend/db_services/redis/autofix/bill.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
from backend.ticket.models import Ticket
3131
from backend.utils.time import datetime2str
3232

33-
from .enums import AutofixStatus
33+
from .enums import AutofixItem, AutofixStatus
3434
from .message import get_ticket_heplers, send_msg_2_qywx
35-
from .models import RedisAutofixCore
35+
from .models import RedisAutofixCore, RedisAutofixCtl
3636

3737
logger = logging.getLogger("root")
3838

@@ -53,9 +53,52 @@ def generate_autofix_ticket(fault_clusters: QuerySet):
5353
cluster.save(update_fields=["status_version", "deal_status", "update_at"])
5454
continue
5555

56+
# 忽略自愈,支持按集群名配置
57+
if will_ignore_autofix_by_domain(cluster):
58+
cluster.status_version = _("ignore_by_ctl:{}".format(get_random_string(12)))
59+
cluster.update_at = datetime2str(datetime.datetime.now(timezone.utc))
60+
cluster.deal_status = AutofixStatus.AF_IGNORE.value
61+
cluster.save(update_fields=["status_version", "deal_status", "update_at"])
62+
continue
63+
5664
generate_single_autofix_ticket(cluster)
5765

5866

67+
# 增加支持忽略自愈控制
68+
def will_ignore_autofix_by_domain(cluster: RedisAutofixCore):
69+
ignore_domains = []
70+
try:
71+
ctl_item = RedisAutofixCtl.objects.filter(
72+
ctl_name=AutofixItem.IGNORE_DOMAINS.value, bk_biz_id=cluster.bk_biz_id
73+
).get()
74+
if ctl_item:
75+
ignore_domains = json.loads(ctl_item.ctl_value)
76+
except RedisAutofixCtl.DoesNotExist:
77+
RedisAutofixCtl.objects.create(
78+
bk_cloud_id=cluster.bk_cloud_id,
79+
bk_biz_id=cluster.bk_biz_id,
80+
ctl_value=json.dumps("[]"),
81+
ctl_name=AutofixItem.IGNORE_DOMAINS.value,
82+
).save()
83+
return False
84+
# 在忽略自愈的对象里边,直接返回就是
85+
if cluster.immute_domain in ignore_domains:
86+
logger.info(
87+
"cluster_autofix_ignore {}, admin confied ignore domains {}/{} ".format(
88+
cluster.immute_domain, cluster.immute_domain, ignore_domains
89+
)
90+
)
91+
msgs, title = {}, _("{} - 🥸忽略自愈🥸".format(cluster.immute_domain))
92+
msgs[_("BKID")] = cluster.bk_biz_id
93+
msgs[_("集群类型")] = cluster.cluster_type
94+
msgs[_("故障机S")] = json.dumps(cluster.fault_machines)
95+
msgs[_("配置列表")] = _("配置了忽略自愈的集群列表: {} ".format(json.dumps(ignore_domains)))
96+
send_msg_2_qywx(title, msgs)
97+
return True
98+
# 默认发起自愈
99+
return False
100+
101+
59102
# 独立出来
60103
def generate_single_autofix_ticket(cluster: RedisAutofixCore):
61104
try:

0 commit comments

Comments
 (0)