Skip to content

Commit c4448ea

Browse files
authored
fix(llmobs): propagate distributed headers via signal dispatching, not config (#12089)
This PR makes a change to our shared distributed tracing header injection method to dispatch signals/events instead of relying on the global config settings, which is only modifiable via env vars. This fixes distributed tracing for users that might rely solely on the `LLMObs.enable()` setup config. Programmatic `LLMObs.enable()/disable()` calls do not set the global `config._llmobs_enabled` boolean setting, which is only controlled by the `DD_LLMOBS_ENABLED` env var. This was problematic for users that relied on manual `LLMObs.enable()` setup (i.e. no env vars) because our distributed tracing injection code only checks the global config to inject llmobs parent IDs into request headers. If users manually enabled LLMObs without any env vars, then this would not be reflected in the global config value and thus LLMObs parent IDs would never be injected into the request headers. We can't check directly if LLMObs is enabled in the http injection module because: 1. This would require us to import significant product-specific LLMObs-code into the shared http injector helper module which would impact non-LLMObs users' app performance 2. Circular imports in LLMObs which imports http injector logic to use in its own helpers Instead of doing our check based on the global `config._llmobs_enabled` setting, we now send a tracing event to our shared product listeners, and register a corresponding `LLMObs._inject_llmobs_context()` hook to be called for all inject() calls if LLMObs is enabled (we check the LLMObs instance, not the global config setting value). ~One risk and why I don't like changing global config settings is because this then implies that it is no longer global or tied to an env var (I want to push for env var configuration where possible over manual overriding/enabling). If a global enabled config can be toggled indiscriminately then this could open a can of worms for enabling/disabling logic in our LLMObs service, which isn't really designed to be toggled on/off multiple times in the app's lifespan. However if some users cannot rely on env vars, then I don't see any other solution that does not couple tracer internal code with LLMObs code which is a no-option.~ (UPDATE: we avoided this issue by using signal dispatching) ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent 4f0bcb5 commit c4448ea

File tree

6 files changed

+49
-50
lines changed

6 files changed

+49
-50
lines changed

ddtrace/llmobs/_llmobs.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ def _stop_service(self) -> None:
285285
# Remove listener hooks for span events
286286
core.reset_listeners("trace.span_start", self._on_span_start)
287287
core.reset_listeners("trace.span_finish", self._on_span_finish)
288+
core.reset_listeners("http.span_inject", self._inject_llmobs_context)
288289

289290
forksafe.unregister(self._child_after_fork)
290291

@@ -369,6 +370,7 @@ def enable(
369370
# Register hooks for span events
370371
core.on("trace.span_start", cls._instance._on_span_start)
371372
core.on("trace.span_finish", cls._instance._on_span_finish)
373+
core.on("http.span_inject", cls._instance._inject_llmobs_context)
372374

373375
atexit.register(cls.disable)
374376
telemetry_writer.product_activated(TELEMETRY_APM_PRODUCT.LLMOBS, True)
@@ -1162,6 +1164,11 @@ def submit_evaluation(
11621164

11631165
cls._instance._llmobs_eval_metric_writer.enqueue(evaluation_metric)
11641166

1167+
def _inject_llmobs_context(self, span_context: Context, request_headers: Dict[str, str]) -> None:
1168+
if self.enabled is False:
1169+
return
1170+
_inject_llmobs_parent_id(span_context)
1171+
11651172
@classmethod
11661173
def inject_distributed_headers(cls, request_headers: Dict[str, str], span: Optional[Span] = None) -> Dict[str, str]:
11671174
"""Injects the span's distributed context into the given request headers."""
@@ -1179,7 +1186,6 @@ def inject_distributed_headers(cls, request_headers: Dict[str, str], span: Optio
11791186
if span is None:
11801187
log.warning("No span provided and no currently active span found.")
11811188
return request_headers
1182-
_inject_llmobs_parent_id(span.context)
11831189
HTTPPropagator.inject(span.context, request_headers)
11841190
return request_headers
11851191

ddtrace/propagation/http.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from ddtrace._trace.span import _get_64_lowest_order_bits_as_int
2929
from ddtrace._trace.span import _MetaDictType
3030
from ddtrace.appsec._constants import APPSEC
31+
from ddtrace.internal.core import dispatch
3132
from ddtrace.settings.asm import config as asm_config
3233

3334
from ..constants import AUTO_KEEP
@@ -1052,6 +1053,7 @@ def parent_call():
10521053
:param dict headers: HTTP headers to extend with tracing attributes.
10531054
:param Span non_active_span: Only to be used if injecting a non-active span.
10541055
"""
1056+
dispatch("http.span_inject", (span_context, headers))
10551057
if not config._propagation_style_inject:
10561058
return
10571059
if non_active_span is not None and non_active_span.context is not span_context:
@@ -1089,11 +1091,6 @@ def parent_call():
10891091
for key in span_context._baggage:
10901092
headers[_HTTP_BAGGAGE_PREFIX + key] = span_context._baggage[key]
10911093

1092-
if config._llmobs_enabled:
1093-
from ddtrace.llmobs._utils import _inject_llmobs_parent_id
1094-
1095-
_inject_llmobs_parent_id(span_context)
1096-
10971094
if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject:
10981095
_DatadogMultiHeader._inject(span_context, headers)
10991096
if PROPAGATION_STYLE_B3_MULTI in config._propagation_style_inject:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
LLM Observability: Resolves an issue where explicitly only using ``LLMObs.enable()`` to configure LLM Observability
5+
without environment variables would not automatically propagate distributed tracing headers.

tests/llmobs/test_llmobs_service.py

-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ def test_service_enable_proxy_default():
6060
assert llmobs_instance.tracer == dummy_tracer
6161
assert isinstance(llmobs_instance._llmobs_span_writer._clients[0], LLMObsProxiedEventClient)
6262
assert run_llmobs_trace_filter(dummy_tracer) is not None
63-
6463
llmobs_service.disable()
6564

6665

tests/llmobs/test_propagation.py

+35-19
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,24 @@ def test_propagate_correct_llmobs_parent_id_simple(run_python_code_in_subprocess
5757
"""
5858
code = """
5959
import json
60+
import mock
6061
61-
from ddtrace import tracer
62-
from ddtrace.ext import SpanTypes
62+
from ddtrace.internal.utils.http import Response
63+
from ddtrace.llmobs import LLMObs
6364
from ddtrace.propagation.http import HTTPPropagator
6465
65-
with tracer.trace("LLMObs span", span_type=SpanTypes.LLM) as root_span:
66-
with tracer.trace("Non-LLMObs span") as child_span:
67-
headers = {"_DD_LLMOBS_SPAN_ID": str(root_span.span_id)}
68-
HTTPPropagator.inject(child_span.context, headers)
66+
with mock.patch(
67+
"ddtrace.internal.writer.HTTPWriter._send_payload", return_value=Response(status=200, body="{}"),
68+
):
69+
LLMObs.enable(ml_app="test-app", api_key="<not-a-real-key>", agentless_enabled=True)
70+
with LLMObs.workflow("LLMObs span") as root_span:
71+
with LLMObs._instance.tracer.trace("Non-LLMObs span") as child_span:
72+
headers = {"_DD_LLMOBS_SPAN_ID": str(root_span.span_id)}
73+
HTTPPropagator.inject(child_span.context, headers)
6974
7075
print(json.dumps(headers))
7176
"""
7277
env = os.environ.copy()
73-
env["DD_LLMOBS_ENABLED"] = "1"
7478
env["DD_TRACE_ENABLED"] = "0"
7579
stdout, stderr, status, _ = run_python_code_in_subprocess(code=code, env=env)
7680
assert status == 0, (stdout, stderr)
@@ -93,21 +97,33 @@ def test_propagate_llmobs_parent_id_complex(run_python_code_in_subprocess):
9397
"""
9498
code = """
9599
import json
100+
import mock
96101
97-
from ddtrace import tracer
98-
from ddtrace.ext import SpanTypes
102+
from ddtrace.internal.utils.http import Response
103+
from ddtrace.llmobs import LLMObs
99104
from ddtrace.propagation.http import HTTPPropagator
100105
101-
with tracer.trace("LLMObs span", span_type=SpanTypes.LLM) as root_span:
102-
with tracer.trace("Non-LLMObs span") as child_span:
103-
headers = {"_DD_LLMOBS_SPAN_ID": str(root_span.span_id)}
104-
HTTPPropagator.inject(child_span.context, headers)
106+
with mock.patch(
107+
"ddtrace.internal.writer.HTTPWriter._send_payload", return_value=Response(status=200, body="{}"),
108+
):
109+
from ddtrace import auto # simulate ddtrace-run startup to ensure env var configs also propagate
110+
with LLMObs.workflow("LLMObs span") as root_span:
111+
with LLMObs._instance.tracer.trace("Non-LLMObs span") as child_span:
112+
headers = {"_DD_LLMOBS_SPAN_ID": str(root_span.span_id)}
113+
HTTPPropagator.inject(child_span.context, headers)
105114
106115
print(json.dumps(headers))
107116
"""
108117
env = os.environ.copy()
109-
env["DD_LLMOBS_ENABLED"] = "1"
110-
env["DD_TRACE_ENABLED"] = "0"
118+
env.update(
119+
{
120+
"DD_LLMOBS_ENABLED": "1",
121+
"DD_TRACE_ENABLED": "0",
122+
"DD_AGENTLESS_ENABLED": "1",
123+
"DD_API_KEY": "<not-a-real-key>",
124+
"DD_LLMOBS_ML_APP": "test-app",
125+
}
126+
)
111127
stdout, stderr, status, _ = run_python_code_in_subprocess(code=code, env=env)
112128
assert status == 0, (stdout, stderr)
113129
assert stderr == b"", (stdout, stderr)
@@ -124,25 +140,25 @@ def test_propagate_llmobs_parent_id_complex(run_python_code_in_subprocess):
124140

125141

126142
def test_no_llmobs_parent_id_propagated_if_no_llmobs_spans(run_python_code_in_subprocess):
127-
"""Test that the correct LLMObs parent ID (None) is extracted from the headers in a simple distributed scenario.
143+
"""Test that the correct LLMObs parent ID ('undefined') is extracted from headers in a simple distributed scenario.
128144
Service A (subprocess) has spans, but none are LLMObs spans.
129145
Service B (outside subprocess) has a LLMObs span.
130146
Service B's span should have no LLMObs parent ID as there are no LLMObs spans from service A.
131147
"""
132148
code = """
133149
import json
134150
135-
from ddtrace import tracer
151+
from ddtrace.llmobs import LLMObs
136152
from ddtrace.propagation.http import HTTPPropagator
137153
138-
with tracer.trace("Non-LLMObs span") as root_span:
154+
LLMObs.enable(ml_app="ml-app", agentless_enabled=True, api_key="<not-a-real-key>")
155+
with LLMObs._instance.tracer.trace("Non-LLMObs span") as root_span:
139156
headers = {}
140157
HTTPPropagator.inject(root_span.context, headers)
141158
142159
print(json.dumps(headers))
143160
"""
144161
env = os.environ.copy()
145-
env["DD_LLMOBS_ENABLED"] = "1"
146162
env["DD_TRACE_ENABLED"] = "0"
147163
stdout, stderr, status, _ = run_python_code_in_subprocess(code=code, env=env)
148164
assert status == 0, (stdout, stderr)

tests/tracer/test_propagation.py

-24
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import os
55
import pickle
66

7-
import mock
87
import pytest
98

109
import ddtrace
@@ -3387,29 +3386,6 @@ def test_DD_TRACE_PROPAGATION_STYLE_INJECT_overrides_DD_TRACE_PROPAGATION_STYLE(
33873386
assert result == expected_headers
33883387

33893388

3390-
def test_llmobs_enabled_injects_llmobs_parent_id():
3391-
with override_global_config(dict(_llmobs_enabled=True)):
3392-
with mock.patch("ddtrace.llmobs._utils._inject_llmobs_parent_id") as mock_llmobs_inject:
3393-
context = Context(trace_id=1, span_id=2)
3394-
HTTPPropagator.inject(context, {})
3395-
mock_llmobs_inject.assert_called_once_with(context)
3396-
3397-
3398-
def test_llmobs_disabled_does_not_inject_parent_id():
3399-
with override_global_config(dict(_llmobs_enabled=False)):
3400-
with mock.patch("ddtrace.llmobs._utils._inject_llmobs_parent_id") as mock_llmobs_inject:
3401-
context = Context(trace_id=1, span_id=2)
3402-
HTTPPropagator.inject(context, {})
3403-
mock_llmobs_inject.assert_not_called()
3404-
3405-
3406-
def test_llmobs_parent_id_not_injected_by_default():
3407-
with mock.patch("ddtrace.llmobs._utils._inject_llmobs_parent_id") as mock_llmobs_inject:
3408-
context = Context(trace_id=1, span_id=2)
3409-
HTTPPropagator.inject(context, {})
3410-
mock_llmobs_inject.assert_not_called()
3411-
3412-
34133389
@pytest.mark.parametrize(
34143390
"span_context,expected_headers",
34153391
[

0 commit comments

Comments
 (0)