From 3f0e8848c4f4d32bbd5367943ee680cf2af5b275 Mon Sep 17 00:00:00 2001 From: showsmall Date: Mon, 10 Feb 2025 11:32:26 +0800 Subject: [PATCH] Fix multi datasource send alert failed (#127) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 每个DatasourceId处理完后,立即调用t.Recover()和t.GC(),优化代码,增加debug日志 * 修复多数据源告警发出失败问题 * 修复多数据源告警其他数据源 --------- 规则添加两个数据源,一个数据源有正常数据,另一个数据源是空的。能保证有数据的数据源正常发送告警。 Co-authored-by: libo --- alert/eval/eval.go | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/alert/eval/eval.go b/alert/eval/eval.go index 2bf68fa..e407ff3 100644 --- a/alert/eval/eval.go +++ b/alert/eval/eval.go @@ -79,7 +79,7 @@ func (t *AlertRule) Eval(ctx context.Context, rule models.AlertRule) { return } - var curFiringKeys, curPendingKeys []string + var curFingerprints, curPendingFingerprints []string for _, dsId := range rule.DatasourceIdList { instance, err := t.ctx.DB.Datasource().GetInstance(dsId) if err != nil { @@ -89,24 +89,28 @@ func (t *AlertRule) Eval(ctx context.Context, rule models.AlertRule) { if !provider.CheckDatasourceHealth(instance) { continue } - + var firingFingerprints, pendingFingerprints []string switch rule.DatasourceType { case "Prometheus", "VictoriaMetrics": - curFiringKeys, curPendingKeys = metrics(t.ctx, dsId, instance.Type, rule) + firingFingerprints, pendingFingerprints = metrics(t.ctx, dsId, instance.Type, rule) case "AliCloudSLS", "Loki", "ElasticSearch": - curFiringKeys = logs(t.ctx, dsId, instance.Type, rule) + firingFingerprints = logs(t.ctx, dsId, instance.Type, rule) case "Jaeger": - curFiringKeys = traces(t.ctx, dsId, instance.Type, rule) + firingFingerprints = traces(t.ctx, dsId, instance.Type, rule) case "CloudWatch": - curFiringKeys = cloudWatch(t.ctx, dsId, rule) + firingFingerprints = cloudWatch(t.ctx, dsId, rule) case "KubernetesEvent": - curFiringKeys = kubernetesEvent(t.ctx, dsId, rule) + firingFingerprints = kubernetesEvent(t.ctx, dsId, rule) + default: + continue } + // 追加当前数据源的指纹到总列表 + curFingerprints = append(curFingerprints, firingFingerprints...) + curPendingFingerprints = append(curPendingFingerprints, pendingFingerprints...) } logc.Infof(t.ctx.Ctx, fmt.Sprintf("规则评估 -> %v", tools.JsonMarshal(rule))) - - t.Recover(rule, curFiringKeys) - t.GC(rule, curFiringKeys, curPendingKeys) + t.Recover(rule, curFingerprints) + t.GC(rule, curFingerprints, curPendingFingerprints) case <-ctx.Done(): logc.Infof(t.ctx.Ctx, fmt.Sprintf("停止 RuleId: %v, RuleName: %s 的 Watch 协程", rule.RuleId, rule.RuleName)) return