Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
**Bug Fixes**:

- Add `sentry.dsc.transaction` and `sentry.dsc.trace_id` to all v2 spans. ([#6001](https://github.com/getsentry/relay/pull/6001))
- Obtain PII values for `SpanData` fields from `sentry-conventions`. ([#5997](https://github.com/getsentry/relay/pull/5997))

## 26.5.0

Expand Down
9 changes: 5 additions & 4 deletions relay-event-schema/src/processor/attrs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -594,10 +594,11 @@ impl<'a> ProcessingState<'a> {

/// Derives the attrs for recursion.
pub fn inner_attrs(&self) -> Option<Cow<'_, FieldAttrs>> {
match self.pii() {
Pii::True => Some(Cow::Borrowed(&PII_TRUE_FIELD_ATTRS)),
Pii::False => None,
Pii::Maybe => Some(Cow::Borrowed(&PII_MAYBE_FIELD_ATTRS)),
match self.attrs().pii {
PiiMode::Static(Pii::True) => Some(Cow::Borrowed(&PII_TRUE_FIELD_ATTRS)),
PiiMode::Static(Pii::False) => None,
PiiMode::Static(Pii::Maybe) => Some(Cow::Borrowed(&PII_MAYBE_FIELD_ATTRS)),
PiiMode::Dynamic(f) => Some(Cow::Owned(DEFAULT_FIELD_ATTRS.pii_dynamic(f))),
Comment on lines -597 to +601
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, when deriving the attributes for recursion, we would eagerly resolve a dynamic PII value function into a static value. Now we instead pass the dynamic function through.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dynamic PII in inner_attrs over-scrubs nested field values

Medium Severity

The PiiMode::Dynamic(f) arm in inner_attrs passes span_data_pii_from_conventions through to children of complex-typed fields (e.g., gen_ai.input.messages of type Annotated<Value>). When such a field contains a nested object, each sub-key (like "role" or "content") is looked up in conventions via state.keys().next(). Sub-keys not found in conventions get Pii::True, whereas previously (with pii = "maybe" on SpanData) they inherited Pii::Maybe. This upgrades scrubbing from "only specific path selectors" to "generic selectors like $string match," causing unintended data removal in nested structures.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 7fa8c6d. Configure here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True in principle, but not really a concern.

}
}

Expand Down
70 changes: 43 additions & 27 deletions relay-event-schema/src/protocol/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use relay_protocol::{
Annotated, Array, Empty, Error, FromValue, Getter, IntoValue, Object, Val, Value,
};

use crate::processor::ProcessValue;
use crate::processor::{Pii, ProcessValue, ProcessingState};
use crate::protocol::{
EventId, IpAddr, JsonLenientString, LenientString, Measurements, OperationType, OriginType,
SpanId, SpanStatus, ThreadId, Timestamp, TraceId,
Expand Down Expand Up @@ -454,12 +454,32 @@ impl Getter for SentryTags {
}
}

/// Determines the `Pii` value for a field of [`SpanData`] by looking it up in `relay-conventions`.
///
/// If the field is not found in the conventions, this returns `Pii::True`
/// as a precaution.
fn span_data_pii_from_conventions(state: &ProcessingState) -> Pii {
fn inner(state: &ProcessingState) -> Option<Pii> {
// `state.keys().next()` is the _last_ segment in the state's
// path, i.e. the field name.
let key = state.keys().next()?;
Comment thread
loewenheim marked this conversation as resolved.

match relay_conventions::attribute_info(key)?.pii {
relay_conventions::Pii::True => Some(Pii::True),
relay_conventions::Pii::False => Some(Pii::False),
relay_conventions::Pii::Maybe => Some(Pii::Maybe),
}
}

inner(state).unwrap_or(Pii::True)
Comment thread
sentry[bot] marked this conversation as resolved.
Comment thread
cursor[bot] marked this conversation as resolved.
}

/// Arbitrary additional data on a span.
///
/// Besides arbitrary user data, this type also contains SDK-provided fields used by the
/// product (see <https://develop.sentry.dev/sdk/performance/span-data-conventions/>).
#[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)]
#[metastructure(trim = false, pii = "maybe")]
#[metastructure(trim = false, pii = "span_data_pii_from_conventions")]
pub struct SpanData {
/// Mobile app start variant.
///
Expand Down Expand Up @@ -594,11 +614,7 @@ pub struct SpanData {
pub gen_ai_response_object: Annotated<Value>,

/// Whether or not the AI model call's response was streamed back asynchronously
#[metastructure(
field = "gen_ai.response.streaming",
legacy_alias = "ai.streaming",
pii = "false"
)]
#[metastructure(field = "gen_ai.response.streaming", legacy_alias = "ai.streaming")]
pub gen_ai_response_streaming: Annotated<Value>,

/// Total output tokens per seconds throughput
Expand Down Expand Up @@ -751,15 +767,11 @@ pub struct SpanData {
pub db_collection_name: Annotated<Value>,

/// The sentry environment.
#[metastructure(
field = "sentry.environment",
legacy_alias = "environment",
pii = "false"
)]
#[metastructure(field = "sentry.environment", legacy_alias = "environment")]
pub environment: Annotated<String>,

/// The release version of the project.
#[metastructure(field = "sentry.release", legacy_alias = "release", pii = "false")]
#[metastructure(field = "sentry.release", legacy_alias = "release")]
pub release: Annotated<LenientString>,

/// The decoded body size of the response (in bytes).
Expand Down Expand Up @@ -811,7 +823,7 @@ pub struct SpanData {
pub thread_name: Annotated<String>,

/// ID of thread from where the span originated.
#[metastructure(field = "thread.id", pii = "false")]
#[metastructure(field = "thread.id")]
pub thread_id: Annotated<ThreadId>,

/// Name of the segment that this span belongs to (see `segment_id`).
Expand All @@ -831,19 +843,19 @@ pub struct SpanData {
pub url_scheme: Annotated<Value>,

/// User Display
#[metastructure(field = "user", pii = "true")]
#[metastructure(field = "user")]
pub user: Annotated<Value>,

/// User email address.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.email", pii = "true")]
#[metastructure(field = "user.email")]
pub user_email: Annotated<String>,

/// User’s full name.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.full_name", pii = "true")]
#[metastructure(field = "user.full_name")]
pub user_full_name: Annotated<String>,

/// Two-letter country code (ISO 3166-1 alpha-2).
Expand Down Expand Up @@ -873,45 +885,50 @@ pub struct SpanData {
/// Unique user hash to correlate information for a user in anonymized form.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.hash", pii = "true")]
#[metastructure(field = "user.hash")]
pub user_hash: Annotated<String>,

/// Unique identifier of the user.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.id", pii = "true")]
#[metastructure(field = "user.id")]
pub user_id: Annotated<String>,

/// Short name or login/username of the user.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.name", pii = "true")]
#[metastructure(field = "user.name")]
pub user_name: Annotated<String>,

/// Array of user roles at the time of the event.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.roles", pii = "true")]
#[metastructure(field = "user.roles")]
pub user_roles: Annotated<Array<String>>,

/// Exclusive Time
#[metastructure(field = "sentry.exclusive_time")]
pub exclusive_time: Annotated<Value>,

/// Profile ID
#[metastructure(field = "profile_id", pii = "false")]
#[metastructure(
field = "profile_id",
// This field is not defined in conventions, so we need to set
// PII explicitly.
pii = "false"
)]
pub profile_id: Annotated<Value>,

/// Replay ID
#[metastructure(field = "sentry.replay_id", legacy_alias = "replay_id", pii = "false")]
#[metastructure(field = "sentry.replay_id", legacy_alias = "replay_id")]
pub replay_id: Annotated<Value>,

/// The sentry SDK (see [`crate::protocol::ClientSdkInfo`]).
#[metastructure(field = "sentry.sdk.name", pii = "false")]
#[metastructure(field = "sentry.sdk.name")]
pub sdk_name: Annotated<String>,

/// The sentry SDK version (see [`crate::protocol::ClientSdkInfo`]).
#[metastructure(field = "sentry.sdk.version", pii = "false")]
#[metastructure(field = "sentry.sdk.version")]
pub sdk_version: Annotated<String>,

/// Slow Frames
Expand Down Expand Up @@ -975,7 +992,7 @@ pub struct SpanData {
pub http_query: Annotated<String>,

/// The client's IP address.
#[metastructure(field = "client.address", pii = "true")]
#[metastructure(field = "client.address")]
pub client_address: Annotated<IpAddr>,

/// The current route in the application.
Expand Down Expand Up @@ -1013,7 +1030,6 @@ pub struct SpanData {
/// Other fields in `span.data`.
#[metastructure(
additional_properties,
pii = "true",
Comment thread
cursor[bot] marked this conversation as resolved.
retain = true,
skip_serialization = "null" // applies to child elements
)]
Expand Down
3 changes: 3 additions & 0 deletions relay-pii/src/processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,9 @@ fn apply_regex_to_chunks<'a>(
return;
}

// ALERT: This logic assumes that `regex` doesn't match a capture
// group starting on a null byte. If you get an error in debug mode
// about `replacement_chunks` not being empty, check the regex.
static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
let regex = NULL_SPLIT_RE.get_or_init(|| {
#[allow(clippy::trivial_regex)]
Expand Down
2 changes: 1 addition & 1 deletion relay-pii/src/regexes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ regex!(
)
)
(
[^/\\\r\n]+
[^/\\\r\n\x00]+
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this regex can match a null byte it causes problems with redactions.

)
"
);
Expand Down
61 changes: 49 additions & 12 deletions tests/integration/test_pii.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,53 @@ def test_scrub_span_sentry_tags_advanced_rules(mini_sentry, relay):
assert event["spans"][0]["sentry_tags"]["user.geo.subregion"] == "***"


def test_http_query(mini_sentry, relay):
@pytest.mark.parametrize(
"field,pii",
[
# SpanData field, pii: true in conventions
("url.full", "true"),
# SpanData field, pii: maybe in conventions
("gen_ai.input.messages", "maybe"),
# SpanData field, pii: false in conventions
("sentry.release", "false"),
# SpanData field, missing in conventions, explicitly set on SpanData
("profile_id", "false"),
# Not a SpanData field, pii: true in conventions
("ai.warnings", "true"),
# Not a SpanData field, pii: maybe in conventions
("process.runtime.name", "maybe"),
# Not a SpanData field, pii: false in conventions
("sentry.cancellation_reason", "false"),
# Not a SpanData field and not defined in conventions
("madeup.field", "true"),
],
)
def test_spandata_conventions(mini_sentry, relay, field, pii):
project_id = 42
relay = relay(
mini_sentry,
)
config = mini_sentry.add_basic_project_config(project_id)
config["config"].setdefault(
"datascrubbingSettings",
{
"scrubData": True,
"scrubDefaults": True,
},
)

config["config"]["piiConfig"] = {
"rules": {
"custom_secret": {
"type": "pattern",
"pattern": r"(X-Amz-Algorithm|X-Amz-Credential)=[^& ]+",
"redaction": {"method": "mask"},
}
"pattern": ".*",
"redaction": {"method": "replace", "text": "[REDACTED]"},
},
},
"applications": {
"$string": ["@anything:mask"],
f"'{field}'": ["custom_secret"],
},
"applications": {"$span.data.*": ["custom_secret"]},
}

relay.send_event(
Expand All @@ -64,20 +96,25 @@ def test_http_query(mini_sentry, relay):
"timestamp": 1778131375.142457,
"start_timestamp": 1778131374.296492,
"exclusive_time": 845.965,
"data": {
"http.query": "X-Amz-Algorithm=Some_algorithm&X-Amz-Credential=foobar/19700101/us-east-1/s3/aws4_request&X-Amz-Signature=579375859378367127495789347628374",
},
"data": {field: "secret value"},
}
],
},
)

event = mini_sentry.get_captured_envelope().get_event()

assert (
event["spans"][0]["data"]["http.query"]
== "******************************&**********************************************************&X-Amz-Signature=579375859378367127495789347628374"
)
value = event["spans"][0]["data"][field]

# pii: true fields should get masked
if pii == "true":
assert value == "************"
# pii: maybe fields should get redacted
elif pii == "maybe":
assert value == "[REDACTED]"
# pii: false fields should be left alone
else:
assert value == "secret value"


@pytest.mark.parametrize(
Expand Down
11 changes: 9 additions & 2 deletions tests/integration/test_spansv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1481,12 +1481,19 @@ def test_spansv2_attribute_normalization(
http_result = spans_by_id[http_span_id]
assert http_result == {
**common,
"_meta": {
"attributes": {
"url.full": {
"value": {"": {"len": 63, "rem": [["@userpath", "s", 29, 35]]}}
}
}
},
Comment thread
Dav1dde marked this conversation as resolved.
"span_id": http_span_id,
"attributes": {
"sentry.category": {"type": "string", "value": "http"},
"sentry.description": {
"type": "string",
"value": "GET https://www.service.io/users/01234-qwerty/settings/98765-adfghj",
"value": "GET https://www.service.io/users/[user]/settings/98765-adfghj",
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This attribute is filled from url.full, hence inherits the redaction.

},
"sentry.op": {"type": "string", "value": "http.client"},
"sentry.observed_timestamp_nanos": {
Expand All @@ -1504,7 +1511,7 @@ def test_spansv2_attribute_normalization(
"sentry.dsc.transaction": {"type": "string", "value": "/my/fancy/endpoint"},
"url.full": {
"type": "string",
"value": "https://www.service.io/users/01234-qwerty/settings/98765-adfghj",
"value": "https://www.service.io/users/[user]/settings/98765-adfghj",
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This attribute has become pii: "true" in the current conventions version.

},
},
}
Expand Down
Loading