Skip to content

Commit 82ca0cf

Browse files
juanjuxbrettlangdonP403n1x87rachelyangdogquinna-h
authored
chore: fix/update tracer flares to the format expected (#13447)
Fixes: - Default value for some remote config version fields (required by system tests). - Add uuid. - Use a strict ordering of the payload. - Check the contents of the config, not the name as per the RFC. - Don't send the API KEY since it will be forwarded by the agent. - Add more tests to check these changes. Signed-off-by: Juanjo Alvarez <[email protected]>## Checklist - [X] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [ ] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Signed-off-by: Juanjo Alvarez <[email protected]> Co-authored-by: brettlangdon <[email protected]> Co-authored-by: Gabriele N. Tornetta <[email protected]> Co-authored-by: Rachel Yang <[email protected]> Co-authored-by: Quinna Halim <[email protected]> Co-authored-by: Christophe Papazian <[email protected]> Co-authored-by: Federico Mon <[email protected]> Co-authored-by: ncybul <[email protected]>
1 parent 4fcd0a9 commit 82ca0cf

File tree

5 files changed

+758
-90
lines changed

5 files changed

+758
-90
lines changed

ddtrace/internal/flare/flare.py

Lines changed: 130 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import binascii
21
import dataclasses
32
import io
43
import json
@@ -7,9 +6,8 @@
76
import os
87
import pathlib
98
import shutil
10-
from typing import Dict
9+
import time
1110
from typing import Optional
12-
from typing import Tuple
1311
import zipfile
1412

1513
from ddtrace._logger import _add_file_handler
@@ -32,6 +30,7 @@ class FlareSendRequest:
3230
case_id: str
3331
hostname: str
3432
email: str
33+
uuid: str # UUID from AGENT_TASK config for race condition prevention
3534
source: str = "tracer_python"
3635

3736

@@ -55,6 +54,8 @@ def __init__(
5554
self.url: str = trace_agent_url
5655
self._api_key: Optional[str] = api_key
5756
self.ddconfig = ddconfig
57+
# Use a fixed boundary for consistency
58+
self._BOUNDARY = "83CAD6AA-8A24-462C-8B3D-FF9CC683B51B"
5859

5960
def prepare(self, log_level: str):
6061
"""
@@ -64,32 +65,15 @@ def prepare(self, log_level: str):
6465
try:
6566
self.flare_dir.mkdir(exist_ok=True)
6667
except Exception as e:
67-
log.error("Failed to create %s directory: %s", self.flare_dir, e)
68+
log.error("Flare prepare: failed to create %s directory: %s", self.flare_dir, e)
6869
return
6970

7071
flare_log_level_int = logging.getLevelName(log_level)
7172
if type(flare_log_level_int) != int:
72-
raise TypeError("Invalid log level provided: %s", log_level)
73+
raise TypeError("Flare prepare: Invalid log level provided: %s", log_level)
7374

74-
ddlogger = get_logger("ddtrace")
75-
pid = os.getpid()
76-
flare_file_path = self.flare_dir / f"tracer_python_{pid}.log"
77-
self.original_log_level = ddlogger.level
78-
79-
# Set the logger level to the more verbose between original and flare
80-
# We do this valid_original_level check because if the log level is NOTSET, the value is 0
81-
# which is the minimum value. In this case, we just want to use the flare level, but still
82-
# retain the original state as NOTSET/0
83-
valid_original_level = (
84-
logging.CRITICAL if self.original_log_level == logging.NOTSET else self.original_log_level
85-
)
86-
logger_level = min(valid_original_level, flare_log_level_int)
87-
ddlogger.setLevel(logger_level)
88-
self.file_handler = _add_file_handler(
89-
ddlogger, flare_file_path.__str__(), flare_log_level_int, TRACER_FLARE_FILE_HANDLER_NAME
90-
)
91-
92-
# Create and add config file
75+
# Setup logging and create config file
76+
pid = self._setup_flare_logging(flare_log_level_int)
9377
self._generate_config_file(pid)
9478

9579
def send(self, flare_send_req: FlareSendRequest):
@@ -98,36 +82,16 @@ def send(self, flare_send_req: FlareSendRequest):
9882
before sending the flare.
9983
"""
10084
self.revert_configs()
101-
# We only want the flare to be sent once, even if there are
102-
# multiple tracer instances
103-
lock_path = self.flare_dir / TRACER_FLARE_LOCK
104-
if not os.path.exists(lock_path):
105-
try:
106-
open(lock_path, "w").close()
107-
except Exception as e:
108-
log.error("Failed to create %s file", lock_path)
109-
raise e
110-
try:
111-
client = get_connection(self.url, timeout=self.timeout)
112-
headers, body = self._generate_payload(flare_send_req.__dict__)
113-
client.request("POST", TRACER_FLARE_ENDPOINT, body, headers)
114-
response = client.getresponse()
115-
if response.status == 200:
116-
log.info("Successfully sent the flare to Zendesk ticket %s", flare_send_req.case_id)
117-
else:
118-
msg = "Tracer flare upload responded with status code %s:(%s) %s" % (
119-
response.status,
120-
response.reason,
121-
response.read().decode(),
122-
)
123-
raise TracerFlareSendError(msg)
124-
except Exception as e:
125-
log.error("Failed to send tracer flare to Zendesk ticket %s: %s", flare_send_req.case_id, e)
126-
raise e
127-
finally:
128-
client.close()
129-
# Clean up files regardless of success/failure
130-
self.clean_up_files()
85+
86+
# Ensure the flare directory exists (it might have been deleted by clean_up_files)
87+
self.flare_dir.mkdir(exist_ok=True)
88+
89+
try:
90+
if not self._validate_case_id(flare_send_req.case_id):
91+
return
92+
self._send_flare_request(flare_send_req)
93+
finally:
94+
self.clean_up_files()
13195

13296
def _generate_config_file(self, pid: int):
13397
config_file = self.flare_dir / f"tracer_config_{pid}.json"
@@ -161,41 +125,134 @@ def revert_configs(self):
161125
log.debug("Could not find %s to remove", TRACER_FLARE_FILE_HANDLER_NAME)
162126
ddlogger.setLevel(self.original_log_level)
163127

164-
def _generate_payload(self, params: Dict[str, str]) -> Tuple[dict, bytes]:
128+
def _validate_case_id(self, case_id: str) -> bool:
129+
"""
130+
Validate case_id (must be a digit and cannot be 0 according to spec).
131+
Returns True if valid, False otherwise. Cleans up files if invalid.
132+
"""
133+
if case_id in ("0", 0):
134+
log.warning("Case ID cannot be 0, skipping flare send")
135+
return False
136+
137+
if not case_id.isdigit():
138+
log.warning("Case ID string must contain a digit, skipping flare send")
139+
return False
140+
141+
return True
142+
143+
def _setup_flare_logging(self, flare_log_level_int: int) -> int:
144+
"""
145+
Setup flare logging configuration.
146+
Returns the process ID.
147+
"""
148+
ddlogger = get_logger("ddtrace")
149+
pid = os.getpid()
150+
flare_file_path = self.flare_dir / f"tracer_python_{pid}.log"
151+
self.original_log_level = ddlogger.level
152+
153+
# Set the logger level to the more verbose between original and flare
154+
# We do this valid_original_level check because if the log level is NOTSET, the value is 0
155+
# which is the minimum value. In this case, we just want to use the flare level, but still
156+
# retain the original state as NOTSET/0
157+
valid_original_level = (
158+
logging.CRITICAL if self.original_log_level == logging.NOTSET else self.original_log_level
159+
)
160+
logger_level = min(valid_original_level, flare_log_level_int)
161+
ddlogger.setLevel(logger_level)
162+
self.file_handler = _add_file_handler(
163+
ddlogger, flare_file_path.__str__(), flare_log_level_int, TRACER_FLARE_FILE_HANDLER_NAME
164+
)
165+
return pid
166+
167+
def _create_zip_content(self) -> bytes:
168+
"""
169+
Create ZIP file content containing all flare files.
170+
Returns the ZIP file content as bytes.
171+
"""
165172
zip_stream = io.BytesIO()
166173
with zipfile.ZipFile(zip_stream, mode="w", compression=zipfile.ZIP_DEFLATED) as zipf:
167174
for flare_file_name in self.flare_dir.iterdir():
168175
zipf.write(flare_file_name, arcname=flare_file_name.name)
169176
zip_stream.seek(0)
177+
return zip_stream.getvalue()
170178

171-
newline = b"\r\n"
179+
def _write_body_field(self, body: io.BytesIO, name: str, value: str):
180+
"""Write a form field to the multipart body."""
181+
body.write(f"--{self._BOUNDARY}\r\n".encode())
182+
body.write(f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode())
183+
body.write(f"{value}\r\n".encode())
172184

173-
boundary = binascii.hexlify(os.urandom(16))
185+
def _generate_payload(self, flare_send_req):
186+
"""
187+
Generate the multipart form-data payload for the flare request.
188+
"""
189+
190+
# Create the multipart form data in the same order:
191+
# source, case_id, hostname, email, uuid, flare_file
174192
body = io.BytesIO()
175-
for key, value in params.items():
176-
encoded_key = key.encode()
177-
encoded_value = value.encode()
178-
body.write(b"--" + boundary + newline)
179-
body.write(b'Content-Disposition: form-data; name="%s"%s%s' % (encoded_key, newline, newline))
180-
body.write(b"%s%s" % (encoded_value, newline))
181-
182-
body.write(b"--" + boundary + newline)
183-
body.write((b'Content-Disposition: form-data; name="flare_file"; filename="flare.zip"%s' % newline))
184-
body.write(b"Content-Type: application/octet-stream%s%s" % (newline, newline))
185-
body.write(zip_stream.getvalue() + newline)
186-
body.write(b"--" + boundary + b"--")
193+
self._write_body_field(body, "source", "tracer_python")
194+
self._write_body_field(body, "case_id", flare_send_req.case_id)
195+
self._write_body_field(body, "hostname", flare_send_req.hostname)
196+
self._write_body_field(body, "email", flare_send_req.email)
197+
self._write_body_field(body, "uuid", flare_send_req.uuid)
198+
199+
# flare_file field with descriptive filename
200+
timestamp = int(time.time() * 1000)
201+
filename = f"tracer-python-{flare_send_req.case_id}-{timestamp}-debug.zip"
202+
body.write(f"--{self._BOUNDARY}\r\n".encode())
203+
body.write(f'Content-Disposition: form-data; name="flare_file"; filename="{filename}"\r\n'.encode())
204+
body.write(b"Content-Type: application/octet-stream\r\n\r\n")
205+
206+
# Create the zip file content separately
207+
body.write(self._create_zip_content() + b"\r\n")
208+
209+
# Ending boundary
210+
body.write(f"--{self._BOUNDARY}--\r\n".encode())
211+
212+
# Set headers
187213
headers = {
188-
"Content-Type": b"multipart/form-data; boundary=%s" % boundary,
189-
"Content-Length": body.getbuffer().nbytes,
214+
"Content-Type": f"multipart/form-data; boundary={self._BOUNDARY}",
215+
"Content-Length": str(body.tell()),
190216
}
191-
if self._api_key:
192-
headers["DD-API-KEY"] = self._api_key
217+
218+
# Note: don't send DD-API-KEY or Host Header - the agent should add it when forwarding to backend
193219
return headers, body.getvalue()
194220

195221
def _get_valid_logger_level(self, flare_log_level: int) -> int:
196222
valid_original_level = 100 if self.original_log_level == 0 else self.original_log_level
197223
return min(valid_original_level, flare_log_level)
198224

225+
def _send_flare_request(self, flare_send_req: FlareSendRequest):
226+
"""
227+
Send the flare request to the agent.
228+
"""
229+
# We only want the flare to be sent once, even if there are
230+
# multiple tracer instances
231+
lock_path = self.flare_dir / TRACER_FLARE_LOCK
232+
if not os.path.exists(lock_path):
233+
open(lock_path, "w").close()
234+
client = None
235+
try:
236+
client = get_connection(self.url, timeout=self.timeout)
237+
headers, body = self._generate_payload(flare_send_req)
238+
client.request("POST", TRACER_FLARE_ENDPOINT, body, headers)
239+
response = client.getresponse()
240+
if response.status == 200:
241+
log.info("Successfully sent the flare to Zendesk ticket %s", flare_send_req.case_id)
242+
else:
243+
msg = "Tracer flare upload responded with status code %s:(%s) %s" % (
244+
response.status,
245+
response.reason,
246+
response.read().decode(),
247+
)
248+
raise TracerFlareSendError(msg)
249+
except Exception as e:
250+
log.error("Failed to send tracer flare to Zendesk ticket %s: %s", flare_send_req.case_id, e)
251+
raise e
252+
finally:
253+
if client is not None:
254+
client.close()
255+
199256
def clean_up_files(self):
200257
try:
201258
shutil.rmtree(self.flare_dir)

ddtrace/internal/flare/handler.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,22 @@ def _prepare_tracer_flare(flare: Flare, configs: List[Any]) -> bool:
5959
"""
6060
for c in configs:
6161
# AGENT_CONFIG is currently being used for multiple purposes
62-
# We only want to prepare for a tracer flare if the config name
63-
# starts with 'flare-log-level'
62+
# We only want to prepare for a tracer flare if the config content
63+
# has a log_level.
6464
if not isinstance(c, dict):
6565
log.debug("Config item is not type dict, received type %s instead. Skipping...", str(type(c)))
6666
continue
67-
if not c.get("name", "").startswith("flare-log-level"):
67+
68+
config_content = c.get("config", {})
69+
log_level = config_content.get("log_level", "")
70+
if log_level == "":
6871
log.debug(
69-
"Config item name does not start with flare-log-level, received %s instead. Skipping...", c.get("name")
72+
"Config item does not contain log_level, received %s instead. Skipping...",
73+
config_content.get("log_level"),
7074
)
7175
continue
7276

73-
flare_log_level = c.get("config", {}).get("log_level").upper()
77+
flare_log_level = log_level.lower()
7478
flare.prepare(flare_log_level)
7579
return True
7680
return False
@@ -95,8 +99,13 @@ def _generate_tracer_flare(flare: Flare, configs: List[Any]) -> bool:
9599
)
96100
continue
97101
args = c.get("args", {})
102+
uuid = c.get("uuid")
103+
if not uuid:
104+
log.warning("AGENT_TASK config missing UUID, skipping tracer flare")
105+
continue
106+
98107
flare_request = FlareSendRequest(
99-
case_id=args.get("case_id"), hostname=args.get("hostname"), email=args.get("user_handle")
108+
case_id=args.get("case_id"), hostname=args.get("hostname"), email=args.get("user_handle"), uuid=uuid
100109
)
101110

102111
flare.revert_configs()

ddtrace/internal/remoteconfig/client.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class Root:
101101
expires: datetime
102102
keys: Mapping[str, Key]
103103
roles: Mapping[str, Role]
104-
version: int
104+
version: int = 0
105105

106106
def __post_init__(self):
107107
if self._type != "root":
@@ -143,7 +143,7 @@ class Targets:
143143
expires: datetime
144144
spec_version: str
145145
targets: Mapping[str, TargetDesc]
146-
version: int
146+
version: int = 0
147147

148148
def __post_init__(self):
149149
if self._type != "targets":
@@ -161,6 +161,7 @@ def __post_init__(self):
161161
class SignedTargets:
162162
signatures: List[Signature]
163163
signed: Targets
164+
version: int = 0
164165

165166
def __post_init__(self):
166167
for i in range(len(self.signatures)):

0 commit comments

Comments
 (0)