Skip to content

Commit

Permalink
Fix multi datasource send alert failed (#127)
Browse files Browse the repository at this point in the history
* 每个DatasourceId处理完后,立即调用t.Recover()和t.GC(),优化代码,增加debug日志

* 修复多数据源告警发出失败问题

* 修复多数据源告警其他数据源

---------

规则添加两个数据源,一个数据源有正常数据,另一个数据源是空的。能保证有数据的数据源正常发送告警。

Co-authored-by: libo <[email protected]>
  • Loading branch information
showsmall and libo authored Feb 10, 2025
1 parent f88ff99 commit 3f0e884
Showing 1 changed file with 14 additions and 10 deletions.
24 changes: 14 additions & 10 deletions alert/eval/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (t *AlertRule) Eval(ctx context.Context, rule models.AlertRule) {
return
}

var curFiringKeys, curPendingKeys []string
var curFingerprints, curPendingFingerprints []string
for _, dsId := range rule.DatasourceIdList {
instance, err := t.ctx.DB.Datasource().GetInstance(dsId)
if err != nil {
Expand All @@ -89,24 +89,28 @@ func (t *AlertRule) Eval(ctx context.Context, rule models.AlertRule) {
if !provider.CheckDatasourceHealth(instance) {
continue
}

var firingFingerprints, pendingFingerprints []string
switch rule.DatasourceType {
case "Prometheus", "VictoriaMetrics":
curFiringKeys, curPendingKeys = metrics(t.ctx, dsId, instance.Type, rule)
firingFingerprints, pendingFingerprints = metrics(t.ctx, dsId, instance.Type, rule)
case "AliCloudSLS", "Loki", "ElasticSearch":
curFiringKeys = logs(t.ctx, dsId, instance.Type, rule)
firingFingerprints = logs(t.ctx, dsId, instance.Type, rule)
case "Jaeger":
curFiringKeys = traces(t.ctx, dsId, instance.Type, rule)
firingFingerprints = traces(t.ctx, dsId, instance.Type, rule)
case "CloudWatch":
curFiringKeys = cloudWatch(t.ctx, dsId, rule)
firingFingerprints = cloudWatch(t.ctx, dsId, rule)
case "KubernetesEvent":
curFiringKeys = kubernetesEvent(t.ctx, dsId, rule)
firingFingerprints = kubernetesEvent(t.ctx, dsId, rule)
default:
continue
}
// 追加当前数据源的指纹到总列表
curFingerprints = append(curFingerprints, firingFingerprints...)
curPendingFingerprints = append(curPendingFingerprints, pendingFingerprints...)
}
logc.Infof(t.ctx.Ctx, fmt.Sprintf("规则评估 -> %v", tools.JsonMarshal(rule)))

t.Recover(rule, curFiringKeys)
t.GC(rule, curFiringKeys, curPendingKeys)
t.Recover(rule, curFingerprints)
t.GC(rule, curFingerprints, curPendingFingerprints)
case <-ctx.Done():
logc.Infof(t.ctx.Ctx, fmt.Sprintf("停止 RuleId: %v, RuleName: %s 的 Watch 协程", rule.RuleId, rule.RuleName))
return
Expand Down

0 comments on commit 3f0e884

Please sign in to comment.