Skip to content

Commit 4ba8976

Browse files
chore(asm): more tags for appsec telemetry (#13005)
APPSEC-56592 - small type refactor, moving some classes from inside _ddwaf to appsec._utils - small telemetry refactor with proper definition of a class to accumulate results instead of just using dictionaries. - add 2 missing telemetry tags `waf_error` and `rate_limited` and telemetry config_error metric counter. - update some tests to handle the new tags and metrics ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent bbd364d commit 4ba8976

File tree

8 files changed

+235
-179
lines changed

8 files changed

+235
-179
lines changed

ddtrace/appsec/_asm_request_context.py

Lines changed: 72 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
import functools
22
import json
33
import re
4-
from typing import TYPE_CHECKING
54
from typing import Any
65
from typing import Callable
76
from typing import Dict
87
from typing import List
9-
from typing import Literal # noqa:F401
8+
from typing import Literal
109
from typing import Optional
1110
from typing import Set
1211
from typing import Union
1312
from urllib import parse
1413

1514
from ddtrace.appsec._constants import APPSEC
16-
from ddtrace.appsec._constants import EXPLOIT_PREVENTION
1715
from ddtrace.appsec._constants import SPAN_DATA_NAMES
18-
from ddtrace.appsec._utils import _observator
16+
from ddtrace.appsec._utils import DDWaf_info
17+
from ddtrace.appsec._utils import DDWaf_result
18+
from ddtrace.appsec._utils import Telemetry_result
1919
from ddtrace.appsec._utils import add_context_log
2020
from ddtrace.appsec._utils import get_triggers
2121
from ddtrace.internal import core
@@ -26,10 +26,6 @@
2626
from ddtrace.trace import Span
2727

2828

29-
if TYPE_CHECKING:
30-
from ddtrace.appsec._ddwaf import DDWaf_info
31-
from ddtrace.appsec._ddwaf import DDWaf_result
32-
3329
log = get_logger(__name__)
3430

3531
# Stopgap module for providing ASM context for the blocking features wrapping some contextvars.
@@ -42,7 +38,6 @@
4238
_CONTEXT_CALL: Literal["context"] = "context"
4339
_WAF_CALL: Literal["waf_run"] = "waf_run"
4440
_BLOCK_CALL: Literal["block"] = "block"
45-
_TELEMETRY_WAF_RESULTS: Literal["t_waf_results"] = "t_waf_results"
4641

4742

4843
GLOBAL_CALLBACKS: Dict[str, List[Callable]] = {_CONTEXT_CALL: []}
@@ -73,28 +68,10 @@ def __init__(self, span: Optional[Span] = None):
7368
else:
7469
self.framework = self.span.name
7570
self.framework = self.framework.lower().replace(" ", "_")
76-
self.waf_info: Optional[Callable[[], "DDWaf_info"]] = None
71+
self.waf_info: Optional[Callable[[], DDWaf_info]] = None
7772
self.waf_addresses: Dict[str, Any] = {}
7873
self.callbacks: Dict[str, Any] = {_CONTEXT_CALL: []}
79-
self.telemetry: Dict[str, Any] = {
80-
_TELEMETRY_WAF_RESULTS: {
81-
"blocked": False,
82-
"triggered": False,
83-
"timeout": 0,
84-
"version": None,
85-
"duration": 0.0,
86-
"total_duration": 0.0,
87-
"rasp": {
88-
"sum_eval": 0,
89-
"duration": 0.0,
90-
"total_duration": 0.0,
91-
"eval": {t: 0 for _, t in EXPLOIT_PREVENTION.TYPE},
92-
"match": {t: 0 for _, t in EXPLOIT_PREVENTION.TYPE},
93-
"timeout": {t: 0 for _, t in EXPLOIT_PREVENTION.TYPE},
94-
},
95-
"truncation": {"string_length": [], "container_size": [], "container_depth": []},
96-
}
97-
}
74+
self.telemetry: Telemetry_result = Telemetry_result()
9875
self.addresses_sent: Set[str] = set()
9976
self.waf_triggers: List[Dict[str, Any]] = []
10077
self.blocked: Optional[Dict[str, Any]] = None
@@ -183,6 +160,8 @@ def update_span_metrics(span: Span, name: str, value: Union[float, int]) -> None
183160

184161

185162
def flush_waf_triggers(env: ASM_Environment) -> None:
163+
from ddtrace.appsec._metrics import DDWAF_VERSION
164+
186165
# Make sure we find a root span to attach the triggers to
187166
if env.span is None:
188167
from ddtrace.trace import tracer
@@ -204,35 +183,29 @@ def flush_waf_triggers(env: ASM_Environment) -> None:
204183
else:
205184
root_span.set_tag(APPSEC.JSON, json.dumps({"triggers": report_list}, separators=(",", ":")))
206185
env.waf_triggers = []
207-
telemetry_results = get_value(_TELEMETRY, _TELEMETRY_WAF_RESULTS)
208-
if telemetry_results:
209-
from ddtrace.appsec._metrics import DDWAF_VERSION
210-
211-
root_span.set_tag_str(APPSEC.WAF_VERSION, DDWAF_VERSION)
212-
if telemetry_results["total_duration"]:
213-
update_span_metrics(root_span, APPSEC.WAF_DURATION, telemetry_results["duration"])
214-
telemetry_results["duration"] = 0.0
215-
update_span_metrics(root_span, APPSEC.WAF_DURATION_EXT, telemetry_results["total_duration"])
216-
telemetry_results["total_duration"] = 0.0
217-
if telemetry_results["timeout"]:
218-
update_span_metrics(root_span, APPSEC.WAF_TIMEOUTS, telemetry_results["timeout"])
219-
rasp_timeouts = sum(telemetry_results["rasp"]["timeout"].values())
220-
if rasp_timeouts:
221-
update_span_metrics(root_span, APPSEC.RASP_TIMEOUTS, rasp_timeouts)
222-
if telemetry_results["rasp"]["sum_eval"]:
223-
update_span_metrics(root_span, APPSEC.RASP_DURATION, telemetry_results["rasp"]["duration"])
224-
update_span_metrics(root_span, APPSEC.RASP_DURATION_EXT, telemetry_results["rasp"]["total_duration"])
225-
update_span_metrics(root_span, APPSEC.RASP_RULE_EVAL, telemetry_results["rasp"]["sum_eval"])
226-
if telemetry_results["truncation"]["string_length"]:
227-
root_span.set_metric(APPSEC.TRUNCATION_STRING_LENGTH, max(telemetry_results["truncation"]["string_length"]))
228-
if telemetry_results["truncation"]["container_size"]:
229-
root_span.set_metric(
230-
APPSEC.TRUNCATION_CONTAINER_SIZE, max(telemetry_results["truncation"]["container_size"])
231-
)
232-
if telemetry_results["truncation"]["container_depth"]:
233-
root_span.set_metric(
234-
APPSEC.TRUNCATION_CONTAINER_DEPTH, max(telemetry_results["truncation"]["container_depth"])
235-
)
186+
telemetry_results: Telemetry_result = env.telemetry
187+
188+
root_span.set_tag_str(APPSEC.WAF_VERSION, DDWAF_VERSION)
189+
if telemetry_results.total_duration:
190+
update_span_metrics(root_span, APPSEC.WAF_DURATION, telemetry_results.duration)
191+
telemetry_results.duration = 0.0
192+
update_span_metrics(root_span, APPSEC.WAF_DURATION_EXT, telemetry_results.total_duration)
193+
telemetry_results.total_duration = 0.0
194+
if telemetry_results.timeout:
195+
update_span_metrics(root_span, APPSEC.WAF_TIMEOUTS, telemetry_results.timeout)
196+
rasp_timeouts = sum(telemetry_results.rasp.timeout.values())
197+
if rasp_timeouts:
198+
update_span_metrics(root_span, APPSEC.RASP_TIMEOUTS, rasp_timeouts)
199+
if telemetry_results.rasp.sum_eval:
200+
update_span_metrics(root_span, APPSEC.RASP_DURATION, telemetry_results.rasp.duration)
201+
update_span_metrics(root_span, APPSEC.RASP_DURATION_EXT, telemetry_results.rasp.total_duration)
202+
update_span_metrics(root_span, APPSEC.RASP_RULE_EVAL, telemetry_results.rasp.sum_eval)
203+
if telemetry_results.truncation.string_length:
204+
root_span.set_metric(APPSEC.TRUNCATION_STRING_LENGTH, max(telemetry_results.truncation.string_length))
205+
if telemetry_results.truncation.container_size:
206+
root_span.set_metric(APPSEC.TRUNCATION_CONTAINER_SIZE, max(telemetry_results.truncation.container_size))
207+
if telemetry_results.truncation.container_depth:
208+
root_span.set_metric(APPSEC.TRUNCATION_CONTAINER_DEPTH, max(telemetry_results.truncation.container_depth))
236209

237210

238211
def finalize_asm_env(env: ASM_Environment) -> None:
@@ -338,7 +311,7 @@ def set_waf_callback(value) -> None:
338311
set_value(_CALLBACKS, _WAF_CALL, value)
339312

340313

341-
def set_waf_info(info: Callable[[], "DDWaf_info"]) -> None:
314+
def set_waf_info(info: Callable[[], DDWaf_info]) -> None:
342315
env = _get_asm_context()
343316
if env is None:
344317
info_str = add_context_log(log, "appsec.asm_context.warning::set_waf_info::no_active_context")
@@ -347,7 +320,7 @@ def set_waf_info(info: Callable[[], "DDWaf_info"]) -> None:
347320
env.waf_info = info
348321

349322

350-
def call_waf_callback(custom_data: Optional[Dict[str, Any]] = None, **kwargs) -> Optional["DDWaf_result"]:
323+
def call_waf_callback(custom_data: Optional[Dict[str, Any]] = None, **kwargs) -> Optional[DDWaf_result]:
351324
if not asm_config._asm_enabled:
352325
return None
353326
callback = get_value(_CALLBACKS, _WAF_CALL)
@@ -435,44 +408,53 @@ def asm_request_context_set(
435408

436409
def set_waf_telemetry_results(
437410
rules_version: Optional[str],
438-
is_triggered: bool,
439411
is_blocked: bool,
440-
is_timeout: bool,
412+
waf_results: DDWaf_result,
441413
rule_type: Optional[str],
442-
duration: float,
443-
total_duration: float,
444-
truncation: _observator,
414+
is_sampled: bool,
445415
) -> None:
446-
result = get_value(_TELEMETRY, _TELEMETRY_WAF_RESULTS)
416+
env = _get_asm_context()
417+
if env is None:
418+
return
419+
result: Telemetry_result = env.telemetry
420+
is_triggered = bool(waf_results.data)
447421
from ddtrace.appsec._metrics import _report_waf_truncations
448422

449-
_report_waf_truncations(truncation)
450-
if result is not None:
451-
for key in ["container_size", "container_depth", "string_length"]:
452-
res = getattr(truncation, key)
453-
if isinstance(res, int):
454-
result["truncation"][key].append(res)
455-
if rule_type is None:
456-
# Request Blocking telemetry
457-
result["triggered"] |= is_triggered
458-
result["blocked"] |= is_blocked
459-
result["timeout"] += is_timeout
460-
if rules_version is not None:
461-
result["version"] = rules_version
462-
result["duration"] += duration
463-
result["total_duration"] += total_duration
423+
result.rate_limited |= is_sampled
424+
if waf_results.return_code:
425+
if result.error:
426+
result.error = max(result.error, waf_results.return_code)
464427
else:
465-
# Exploit Prevention telemetry
466-
result["rasp"]["sum_eval"] += 1
467-
result["rasp"]["eval"][rule_type] += 1
468-
result["rasp"]["match"][rule_type] += int(is_triggered)
469-
result["rasp"]["timeout"][rule_type] += int(is_timeout)
470-
result["rasp"]["duration"] += duration
471-
result["rasp"]["total_duration"] += total_duration
428+
result.error = waf_results.return_code
429+
_report_waf_truncations(waf_results.truncation)
430+
for key in ["container_size", "container_depth", "string_length"]:
431+
res = getattr(waf_results.truncation, key)
432+
if isinstance(res, int):
433+
getattr(result.truncation, key).append(res)
434+
if rule_type is None:
435+
# Request Blocking telemetry
436+
result.triggered |= is_triggered
437+
result.blocked |= is_blocked
438+
result.timeout += waf_results.timeout
439+
if rules_version is not None:
440+
result.version = rules_version
441+
result.duration += waf_results.runtime
442+
result.total_duration += waf_results.total_runtime
443+
else:
444+
# Exploit Prevention telemetry
445+
result.rasp.sum_eval += 1
446+
result.rasp.eval[rule_type] += 1
447+
result.rasp.match[rule_type] += int(is_triggered)
448+
result.rasp.timeout[rule_type] += int(waf_results.timeout)
449+
result.rasp.duration += waf_results.runtime
450+
result.rasp.total_duration += waf_results.total_runtime
472451

473452

474-
def get_waf_telemetry_results() -> Optional[Dict[str, Any]]:
475-
return get_value(_TELEMETRY, _TELEMETRY_WAF_RESULTS)
453+
def get_waf_telemetry_results() -> Optional[Telemetry_result]:
454+
env = _get_asm_context()
455+
if env:
456+
return env.telemetry
457+
return None
476458

477459

478460
def store_waf_results_data(data) -> None:

ddtrace/appsec/_ddwaf/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from typing import Type
22

33
from ddtrace.appsec._ddwaf.waf_stubs import WAF
4-
from ddtrace.appsec._ddwaf.waf_stubs import DDWaf_info
5-
from ddtrace.appsec._ddwaf.waf_stubs import DDWaf_result
64
from ddtrace.appsec._ddwaf.waf_stubs import DDWafRulesType
5+
from ddtrace.appsec._utils import DDWaf_info
6+
from ddtrace.appsec._utils import DDWaf_result
77
from ddtrace.internal.logger import get_logger
88
from ddtrace.settings.asm import config as asm_config
99

ddtrace/appsec/_ddwaf/waf_stubs.py

Lines changed: 2 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
from typing import Union
1414

1515
from ddtrace.appsec._constants import DEFAULT
16-
from ddtrace.appsec._utils import _observator
16+
from ddtrace.appsec._utils import DDWaf_info
17+
from ddtrace.appsec._utils import DDWaf_result
1718
from ddtrace.internal.logger import get_logger
1819
from ddtrace.internal.remoteconfig import PayloadType
1920

@@ -27,56 +28,6 @@
2728
DDWafRulesType = Union[None, int, str, List[Any], Dict[str, Any]]
2829

2930

30-
class DDWaf_result:
31-
__slots__ = ["return_code", "data", "actions", "runtime", "total_runtime", "timeout", "truncation", "derivatives"]
32-
33-
def __init__(
34-
self,
35-
return_code: int,
36-
data: List[Dict[str, Any]],
37-
actions: Dict[str, Any],
38-
runtime: float,
39-
total_runtime: float,
40-
timeout: bool,
41-
truncation: _observator,
42-
derivatives: Dict[str, Any],
43-
):
44-
self.return_code = return_code
45-
self.data = data
46-
self.actions = actions
47-
self.runtime = runtime
48-
self.total_runtime = total_runtime
49-
self.timeout = timeout
50-
self.truncation = truncation
51-
self.derivatives = derivatives
52-
53-
def __repr__(self):
54-
return (
55-
f"DDWaf_result(return_code: {self.return_code} data: {self.data},"
56-
f" actions: {self.actions}, runtime: {self.runtime},"
57-
f" total_runtime: {self.total_runtime}, timeout: {self.timeout},"
58-
f" truncation: {self.truncation}, derivatives: {self.derivatives})"
59-
)
60-
61-
62-
class DDWaf_info:
63-
__slots__ = ["loaded", "failed", "errors", "version"]
64-
65-
def __init__(self, loaded: int, failed: int, errors: str, version: str):
66-
self.loaded = loaded
67-
self.failed = failed
68-
self.errors = errors
69-
self.version = version
70-
71-
def __repr__(self):
72-
return "{loaded: %d, failed: %d, errors: %s, version: %s}" % (
73-
self.loaded,
74-
self.failed,
75-
self.errors,
76-
self.version,
77-
)
78-
79-
8031
class ddwaf_handle_capsule(Generic[T]):
8132
def __init__(self, handle: Type[T], free_fn: Callable[[Type[T]], None]) -> None:
8233
self.handle = handle

ddtrace/appsec/_metrics.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,18 @@
2020

2121
@deduplication
2222
def _set_waf_error_log(msg: str, version: str, error_level: bool = True) -> None:
23-
tags = {
23+
log_tags = {
2424
"waf_version": DDWAF_VERSION,
2525
"event_rules_version": version or UNKNOWN_VERSION,
2626
"lib_language": "python",
2727
}
2828
level = TELEMETRY_LOG_LEVEL.ERROR if error_level else TELEMETRY_LOG_LEVEL.WARNING
29-
telemetry.telemetry_writer.add_log(level, msg, tags=tags)
29+
telemetry.telemetry_writer.add_log(level, msg, tags=log_tags)
30+
tags = (
31+
("waf_version", DDWAF_VERSION),
32+
("event_rules_version", version or UNKNOWN_VERSION),
33+
)
34+
telemetry.telemetry_writer.add_count_metric(TELEMETRY_NAMESPACE.APPSEC, "waf.config_errors", 1, tags=tags)
3035

3136

3237
def _set_waf_updates_metric(info, success: bool):
@@ -110,26 +115,26 @@ def _set_waf_request_metrics(*_args):
110115
# TODO: enable it when Telemetry intake accepts this tag
111116
# is_truncation = any((result.truncation for result in list_results))
112117

113-
truncation = result["truncation"]
114-
input_truncated = bool(
115-
truncation["string_length"] or truncation["container_size"] or truncation["container_depth"]
116-
)
118+
truncation = result.truncation
119+
input_truncated = bool(truncation.string_length or truncation.container_size or truncation.container_depth)
117120
tags_request = (
118-
("event_rules_version", result["version"] or UNKNOWN_VERSION),
121+
("event_rules_version", result.version or UNKNOWN_VERSION),
119122
("waf_version", DDWAF_VERSION),
120-
("rule_triggered", bool_str[result["triggered"]]),
121-
("request_blocked", bool_str[result["blocked"]]),
122-
("waf_timeout", bool_str[bool(result["timeout"])]),
123+
("rule_triggered", bool_str[result.triggered]),
124+
("request_blocked", bool_str[result.blocked]),
125+
("waf_timeout", bool_str[bool(result.timeout)]),
123126
("input_truncated", bool_str[input_truncated]),
127+
("waf_error", str(result.error)),
128+
("rate_limited", bool_str[result.rate_limited]),
124129
)
125130

126131
telemetry.telemetry_writer.add_count_metric(
127132
TELEMETRY_NAMESPACE.APPSEC, "waf.requests", 1, tags=tags_request
128133
)
129-
rasp = result["rasp"]
130-
if rasp["sum_eval"]:
134+
rasp = result.rasp
135+
if rasp.sum_eval:
131136
for t, n in [("eval", "rasp.rule.eval"), ("match", "rasp.rule.match"), ("timeout", "rasp.timeout")]:
132-
for rule_type, value in rasp[t].items():
137+
for rule_type, value in getattr(rasp, t).items():
133138
if value:
134139
telemetry.telemetry_writer.add_count_metric(
135140
TELEMETRY_NAMESPACE.APPSEC,
@@ -138,7 +143,7 @@ def _set_waf_request_metrics(*_args):
138143
tags=_TYPES_AND_TAGS.get(rule_type, ())
139144
+ (
140145
("waf_version", DDWAF_VERSION),
141-
("event_rules_version", result["version"] or UNKNOWN_VERSION),
146+
("event_rules_version", result.version or UNKNOWN_VERSION),
142147
),
143148
)
144149

0 commit comments

Comments
 (0)