Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rule information to query paramters sent to the QFE #6539

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* [FEATURE] Update prometheus alertmanager version to v0.28.0 and add new integration msteamsv2, jira, and rocketchat. #6590
* [FEATURE] Ingester: Add a `-ingester.enable-ooo-native-histograms` flag to enable out-of-order native histogram ingestion per tenant. It only takes effect when `-blocks-storage.tsdb.enable-native-histograms=true` and `-ingester.out-of-order-time-window` > 0. It is applied after the restart if it is changed at runtime through the runtime config. #6626
* [ENHANCEMENT] Querier: limit label APIs to query only ingesters if `start` param is not been specified. #6618
* [ENHANCEMENT] Ruler: Add rule information (group name, namespace, name, and kind) to query parameters sent to the Query Frontend to leave rule information logs on query stats. #6539
* [ENHANCEMENT] Alertmanager: Add new limits `-alertmanager.max-silences-count` and `-alertmanager.max-silences-size-bytes` for limiting silences per tenant. #6605
* [ENHANCEMENT] Update prometheus version to v3.1.0. #6583
* [ENHANCEMENT] Add `compactor.auto-forget-delay` for compactor to auto forget compactors after X minutes without heartbeat. #6533
Expand Down
4 changes: 3 additions & 1 deletion docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -4854,7 +4854,9 @@ ring:
[disabled_tenants: <string> | default = ""]

# Report query statistics for ruler queries to complete as a per user metric and
# as an info level log message.
# as an info level log message. It works only when the -ruler.frontend-address
# is not configured. When -ruler.frontend-address enabled, the Query Frontend
# tracks query statistics logs and metrics.
# CLI flag: -ruler.query-stats-enabled
[query_stats_enabled: <boolean> | default = false]

Expand Down
21 changes: 18 additions & 3 deletions pkg/ruler/compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,20 @@ func EngineQueryFunc(engine promql.QueryEngine, frontendClient *frontendClient,
}

if frontendClient != nil {
v, err := frontendClient.InstantQuery(ctx, qs, t)
// query parameters sent to the Query Frontend to leave rule information logs on query stats
queryParams := map[string]string{}

if origin := ctx.Value(promql.QueryOrigin{}); origin != nil {
queryLabels := origin.(map[string]interface{})
rgMap := queryLabels["ruleGroup"].(map[string]string)
queryParams["rule_group"] = rgMap["name"]
queryParams["rule_namespace"] = rgMap["file"]
}
ruleDetail := rules.FromOriginContext(ctx)
queryParams["rule"] = ruleDetail.Name
queryParams["rule_kind"] = ruleDetail.Kind

v, err := frontendClient.InstantQuery(ctx, qs, t, queryParams)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -333,7 +346,9 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi
totalWrites := evalMetrics.TotalWritesVec.WithLabelValues(userID)
failedWrites := evalMetrics.FailedWritesVec.WithLabelValues(userID)

if cfg.FrontendAddress != "" {
shouldEvalFromQFE := cfg.FrontendAddress != ""
if shouldEvalFromQFE {
// evaluate rules via Query-Frontend
c, err := frontendPool.GetClientFor(cfg.FrontendAddress)
if err != nil {
return nil, err
Expand All @@ -343,7 +358,7 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi
var queryFunc rules.QueryFunc
engineQueryFunc := EngineQueryFunc(engine, client, q, overrides, userID, cfg.LookbackDelta)
metricsQueryFunc := MetricsQueryFunc(engineQueryFunc, totalQueries, failedQueries)
if cfg.EnableQueryStats {
if cfg.EnableQueryStats && !shouldEvalFromQFE {
queryFunc = RecordAndReportRuleQueryMetrics(metricsQueryFunc, userID, evalMetrics, logger)
} else {
queryFunc = metricsQueryFunc
Expand Down
11 changes: 8 additions & 3 deletions pkg/ruler/frontend_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,17 @@ func NewFrontendClient(client httpgrpc.HTTPClient, timeout time.Duration, promet
}
}

func (p *FrontendClient) makeRequest(ctx context.Context, qs string, ts time.Time) (*httpgrpc.HTTPRequest, error) {
func (p *FrontendClient) makeRequest(ctx context.Context, qs string, ts time.Time, queryParams map[string]string) (*httpgrpc.HTTPRequest, error) {
args := make(url.Values)
args.Set("query", qs)
if !ts.IsZero() {
args.Set("time", ts.Format(time.RFC3339Nano))
}
// set query parameters sent to the Query Frontend to leave rule information logs on query stats
for k, v := range queryParams {
args.Set(k, v)
}

body := []byte(args.Encode())

//lint:ignore faillint wrapper around upstream method
Expand Down Expand Up @@ -87,11 +92,11 @@ func (p *FrontendClient) makeRequest(ctx context.Context, qs string, ts time.Tim
return req, nil
}

func (p *FrontendClient) InstantQuery(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
func (p *FrontendClient) InstantQuery(ctx context.Context, qs string, t time.Time, queryParams map[string]string) (promql.Vector, error) {
log, ctx := spanlogger.New(ctx, "FrontendClient.InstantQuery")
defer log.Span.Finish()

req, err := p.makeRequest(ctx, qs, t)
req, err := p.makeRequest(ctx, qs, t, queryParams)
if err != nil {
level.Error(log).Log("err", err, "query", qs)
return nil, err
Expand Down
8 changes: 4 additions & 4 deletions pkg/ruler/frontend_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func TestTimeout(t *testing.T) {
ctx := context.Background()
ctx = user.InjectOrgID(ctx, "userID")
frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "json")
_, err := frontendClient.InstantQuery(ctx, "query", time.Now())
_, err := frontendClient.InstantQuery(ctx, "query", time.Now(), nil)
require.Equal(t, context.DeadlineExceeded, err)
}

Expand All @@ -41,7 +41,7 @@ func TestNoOrgId(t *testing.T) {
return nil, nil
}
frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "json")
_, err := frontendClient.InstantQuery(context.Background(), "query", time.Now())
_, err := frontendClient.InstantQuery(context.Background(), "query", time.Now(), nil)
require.Equal(t, user.ErrNoOrgID, err)
}

Expand Down Expand Up @@ -152,7 +152,7 @@ func TestInstantQueryJsonCodec(t *testing.T) {
ctx := context.Background()
ctx = user.InjectOrgID(ctx, "userID")
frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "json")
vector, err := frontendClient.InstantQuery(ctx, "query", time.Now())
vector, err := frontendClient.InstantQuery(ctx, "query", time.Now(), nil)
require.Equal(t, test.expected, vector)
require.Equal(t, test.expectedErr, err)
})
Expand Down Expand Up @@ -301,7 +301,7 @@ func TestInstantQueryProtoCodec(t *testing.T) {
ctx := context.Background()
ctx = user.InjectOrgID(ctx, "userID")
frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "protobuf")
vector, err := frontendClient.InstantQuery(ctx, "query", time.Now())
vector, err := frontendClient.InstantQuery(ctx, "query", time.Now(), nil)
require.Equal(t, test.expected, vector)
require.Equal(t, test.expectedErr, err)
})
Expand Down
2 changes: 1 addition & 1 deletion pkg/ruler/manager_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ func NewRuleEvalMetrics(cfg Config, reg prometheus.Registerer) *RuleEvalMetrics
Help: "Number of failed queries by ruler.",
}, []string{"user"}),
}
if cfg.EnableQueryStats {
if cfg.EnableQueryStats && cfg.FrontendAddress == "" {
m.RulerQuerySeconds = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
Name: "cortex_ruler_query_seconds_total",
Help: "Total amount of wall clock time spent processing queries by the ruler.",
Expand Down
2 changes: 1 addition & 1 deletion pkg/ruler/ruler.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.Var(&cfg.EnabledTenants, "ruler.enabled-tenants", "Comma separated list of tenants whose rules this ruler can evaluate. If specified, only these tenants will be handled by ruler, otherwise this ruler can process rules from all tenants. Subject to sharding.")
f.Var(&cfg.DisabledTenants, "ruler.disabled-tenants", "Comma separated list of tenants whose rules this ruler cannot evaluate. If specified, a ruler that would normally pick the specified tenant(s) for processing will ignore them instead. Subject to sharding.")

f.BoolVar(&cfg.EnableQueryStats, "ruler.query-stats-enabled", false, "Report query statistics for ruler queries to complete as a per user metric and as an info level log message.")
f.BoolVar(&cfg.EnableQueryStats, "ruler.query-stats-enabled", false, "Report query statistics for ruler queries to complete as a per user metric and as an info level log message. It works only when the -ruler.frontend-address is not configured. When -ruler.frontend-address enabled, the Query Frontend tracks query statistics logs and metrics.")
f.BoolVar(&cfg.DisableRuleGroupLabel, "ruler.disable-rule-group-label", false, "Disable the rule_group label on exported metrics")

f.BoolVar(&cfg.EnableHAEvaluation, "ruler.enable-ha-evaluation", false, "Enable high availability")
Expand Down
Loading