Skip to content

feat(eap): Add downsampling to the api #88023

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 31, 2025
2 changes: 2 additions & 0 deletions src/sentry/api/endpoints/organization_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ def get(self, request: Request, organization) -> Response:

dataset = self.get_dataset(request)
metrics_enhanced = dataset in {metrics_performance, metrics_enhanced_performance}
sampling_mode = request.GET.get("sampling")

sentry_sdk.set_tag("performance.metrics_enhanced", metrics_enhanced)
allow_metric_aggregates = request.GET.get("preventMetricAggregates") != "1"
Expand Down Expand Up @@ -465,6 +466,7 @@ def _data_fn(
auto_fields=True,
use_aggregate_conditions=use_aggregate_conditions,
),
sampling_mode=sampling_mode,
)
query_source = self.get_request_source(request)
return dataset_query(
Expand Down
1 change: 1 addition & 0 deletions src/sentry/incidents/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ def get_metric_issue_aggregates(
offset=0,
limit=1,
referrer=Referrer.API_ALERTS_ALERT_RULE_CHART.value,
sampling_mode=None,
config=SearchResolverConfig(
auto_fields=True,
),
Expand Down
1 change: 1 addition & 0 deletions src/sentry/profiles/flamegraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,7 @@ def get_spans_based_candidates(self, query: str | None, limit: int) -> EAPRespon
offset=0,
limit=limit,
referrer=Referrer.API_TRACE_EXPLORER_TRACE_SPANS_CANDIDATES_FLAMEGRAPH.value,
sampling_mode=None,
config=SearchResolverConfig(
auto_fields=True,
),
Expand Down
6 changes: 6 additions & 0 deletions src/sentry/search/eap/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Literal

from sentry_protos.snuba.v1.downsampled_storage_pb2 import DownsampledStorageConfig
from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import AggregationComparisonFilter
from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey
Expand Down Expand Up @@ -162,3 +163,8 @@
],
5: ["500", "501", "502", "503", "504", "505", "506", "507", "508", "509", "510", "511"],
}

SAMPLING_MODES = {
"BEST_EFFORT": DownsampledStorageConfig.MODE_BEST_EFFORT,
"PREFLIGHT": DownsampledStorageConfig.MODE_PREFLIGHT,
}
4 changes: 3 additions & 1 deletion src/sentry/search/eap/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
VirtualColumnDefinition,
)
from sentry.search.eap.types import SearchResolverConfig
from sentry.search.eap.utils import validate_sampling
from sentry.search.events import constants as qb_constants
from sentry.search.events import fields
from sentry.search.events import filter as event_filter
Expand Down Expand Up @@ -76,7 +77,7 @@ class SearchResolver:
] = field(default_factory=dict)

@sentry_sdk.trace
def resolve_meta(self, referrer: str) -> RequestMeta:
def resolve_meta(self, referrer: str, sampling_mode: str | None = None) -> RequestMeta:
if self.params.organization_id is None:
raise Exception("An organization is required to resolve queries")
span = sentry_sdk.get_current_span()
Expand All @@ -89,6 +90,7 @@ def resolve_meta(self, referrer: str) -> RequestMeta:
start_timestamp=self.params.rpc_start_date,
end_timestamp=self.params.rpc_end_date,
trace_item_type=self.definitions.trace_item_type,
downsampled_storage_config=validate_sampling(sampling_mode),
)

@sentry_sdk.trace
Expand Down
1 change: 1 addition & 0 deletions src/sentry/search/eap/spans/formulas.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ def time_spent_percentage(
orderby=None,
offset=0,
limit=1,
sampling_mode=None,
config=SearchResolverConfig(),
)

Expand Down
12 changes: 12 additions & 0 deletions src/sentry/search/eap/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from typing import Any, Literal

from google.protobuf.timestamp_pb2 import Timestamp
from sentry_protos.snuba.v1.downsampled_storage_pb2 import DownsampledStorageConfig
from sentry_protos.snuba.v1.endpoint_time_series_pb2 import Expression, TimeSeriesRequest
from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import Column
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import Function

from sentry.exceptions import InvalidSearchQuery
from sentry.search.eap.constants import SAMPLING_MODES
from sentry.search.eap.ourlogs.attributes import LOGS_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS
from sentry.search.eap.spans.attributes import SPANS_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS
from sentry.search.eap.types import SupportedTraceItemType
Expand Down Expand Up @@ -81,6 +83,16 @@ def transform_column_to_expression(column: Column) -> Expression:
)


def validate_sampling(sampling_mode: str | None) -> DownsampledStorageConfig:
if sampling_mode is None:
return DownsampledStorageConfig(mode=DownsampledStorageConfig.MODE_UNSPECIFIED)
sampling_mode = sampling_mode.upper()
if sampling_mode not in SAMPLING_MODES:
raise InvalidSearchQuery(f"sampling mode: {sampling_mode} is not supported")
else:
return DownsampledStorageConfig(mode=SAMPLING_MODES[sampling_mode])


INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS: dict[
SupportedTraceItemType, dict[Literal["string", "number"], dict[str, str]]
] = {
Expand Down
1 change: 1 addition & 0 deletions src/sentry/snuba/ourlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def query(
offset=offset or 0,
limit=limit,
referrer=referrer or "referrer unset",
sampling_mode=None,
resolver=get_resolver(
params=snuba_params,
config=SearchResolverConfig(
Expand Down
5 changes: 4 additions & 1 deletion src/sentry/snuba/rpc_dataset_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,13 @@ def run_table_query(
offset: int,
limit: int,
referrer: str,
sampling_mode: str | None,
resolver: SearchResolver,
debug: bool = False,
) -> EAPResponse:
"""Make the query"""
meta = resolver.resolve_meta(referrer=referrer)
sentry_sdk.set_tag("query.sampling_mode", sampling_mode)
meta = resolver.resolve_meta(referrer=referrer, sampling_mode=sampling_mode)
where, having, query_contexts = resolver.resolve_query(query_string)
columns, column_contexts = resolver.resolve_columns(selected_columns)
contexts = resolver.resolve_contexts(query_contexts + column_contexts)
Expand Down Expand Up @@ -98,6 +100,7 @@ def run_table_query(
virtual_column_contexts=[context for context in contexts if context is not None],
)
rpc_response = snuba_rpc.table_rpc([rpc_request])[0]
sentry_sdk.set_tag("query.storage_meta.tier", rpc_response.meta.downsampled_storage_meta.tier)

"""Process the results"""
final_data: SnubaData = []
Expand Down
3 changes: 3 additions & 0 deletions src/sentry/snuba/spans_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def run_table_query(
limit: int,
referrer: str,
config: SearchResolverConfig,
sampling_mode: str | None,
search_resolver: SearchResolver | None = None,
debug: bool = False,
) -> EAPResponse:
Expand All @@ -94,6 +95,7 @@ def run_table_query(
offset,
limit,
referrer,
sampling_mode,
search_resolver or get_resolver(params, config),
debug,
)
Expand Down Expand Up @@ -298,6 +300,7 @@ def run_top_events_timeseries_query(
limit,
referrer,
config,
None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this intentional set to none for the top events timeseries? And the sampling mode be implemented for any timeseries queries?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, was trying to rush this out originally and only do it for /events/, i'll follow up with stats

search_resolver,
)
if len(top_events["data"]) == 0:
Expand Down
1 change: 1 addition & 0 deletions src/sentry/snuba/uptime_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def query(
offset=offset or 0,
limit=limit,
referrer=referrer or "referrer unset",
sampling_mode=None,
resolver=get_resolver(
params=snuba_params,
config=SearchResolverConfig(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
from sentry.testutils.helpers import parse_link_header
from tests.snuba.api.endpoints.test_organization_events import OrganizationEventsEndpointTestBase

# Downsampling is deterministic, so unless the algorithm changes we can find a known id that will appear in the
# preflight and it will always show up
# If we need to get a new ID just query for event ids after loading 100s of events and use any of the ids that come back
KNOWN_PREFLIGHT_ID = "ca056dd858a24299"


class OrganizationEventsSpanIndexedEndpointTest(OrganizationEventsEndpointTestBase):
is_eap = False
Expand Down Expand Up @@ -3522,3 +3527,66 @@ def test_filtering_null_numeric_attr(self):
},
]
assert meta["dataset"] == self.dataset

def test_preflight_request(self):
span = self.create_span(
{"description": "foo", "sentry_tags": {"status": "success"}},
start_ts=self.ten_mins_ago,
)
span["span_id"] = KNOWN_PREFLIGHT_ID
span2 = self.create_span(
{"description": "zoo", "sentry_tags": {"status": "success"}},
start_ts=self.ten_mins_ago,
)
span2["span_id"] = "b" * 16
self.store_spans(
[span, span2],
is_eap=self.is_eap,
)
response = self.do_request(
{
"field": ["id", "description", "count()"],
"query": "",
"orderby": "description",
"project": self.project.id,
"dataset": self.dataset,
"statsPeriod": "1h",
"sampling": "PREFLIGHT",
}
)

assert response.status_code == 200, response.content
assert len(response.data["data"]) == 1
assert response.data["data"][0]["id"] == KNOWN_PREFLIGHT_ID

def test_best_effort_request(self):
span = self.create_span(
{"description": "foo", "sentry_tags": {"status": "success"}},
start_ts=self.ten_mins_ago,
)
span["span_id"] = KNOWN_PREFLIGHT_ID
span2 = self.create_span(
{"description": "zoo", "sentry_tags": {"status": "success"}},
start_ts=self.ten_mins_ago,
)
span2["span_id"] = "b" * 16
self.store_spans(
[span, span2],
is_eap=self.is_eap,
)
response = self.do_request(
{
"field": ["id", "description", "count()"],
"query": "",
"orderby": "description",
"project": self.project.id,
"dataset": self.dataset,
"statsPeriod": "1h",
"sampling": "BEST_EFFORT",
}
)

assert response.status_code == 200, response.content
assert len(response.data["data"]) == 2
assert response.data["data"][0]["id"] == KNOWN_PREFLIGHT_ID
assert response.data["data"][1]["id"] == "b" * 16
Loading