fix(llmobs): reuse shared conn (#13339)

Yun-Kim · IAL32 · web-flow · commit 479699d87a47 · 2025-05-07T13:30:52.000-04:00
Resolves #13336. Credit to @IAL32 and a cherry-pick from #13338. When we made the jump from using the shared HTTPWriter to our own BaseLLMObsWriter class to submit spans and evals #12966, we used our own `_get_connection()` to return HTTP/HTTPS connections. However we forgot to include UDSHTTP connection (for the unix socket case), which means we broke UDS support until now. ### Why was this a problem in the first place? We used our own `_get_connection()` in #12966 because of an issue where creating the shared HTTPConnection helper class was leading to MRO superclass constructor issues in our tests. At the time we thought this was due to the shared HTTPConnection helper class having multiple superclasses and an issue with Python 3.10 in general, but this turns out to be due to vcrpy mocking HTTPConnection entirely and only being an issue in tests that rely on vcrpy. This PR makes some changes to avoid using vcrpy when not necessary, and making better assertions to ensure that spans are being sent (not necessary in most tests to have them be accepted). ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) [](https://datadoghq.atlassian.net/browse/MLOB-2725) --------- Co-authored-by: IAL32 <me@adct.it>
diff --git a/ddtrace/llmobs/_writer.py b/ddtrace/llmobs/_writer.py
@@ -1,5 +1,4 @@
 import atexit
-import http.client as httplib
 from typing import Any
 from typing import Dict
 from typing import List
@@ -19,7 +18,8 @@
 from ddtrace.internal import forksafe
 from ddtrace.internal.logger import get_logger
 from ddtrace.internal.periodic import PeriodicService
-from ddtrace.internal.utils.http import verify_url
+from ddtrace.internal.utils.http import Response
+from ddtrace.internal.utils.http import get_connection
 from ddtrace.internal.utils.retry import fibonacci_backoff_with_jitter
 from ddtrace.llmobs import _telemetry as telemetry
 from ddtrace.llmobs._constants import AGENTLESS_EVAL_BASE_URL
@@ -132,7 +132,7 @@ def __init__(
         self._send_payload_with_retry = fibonacci_backoff_with_jitter(
             attempts=self.RETRY_ATTEMPTS,
             initial_wait=0.618 * self.interval / (1.618**self.RETRY_ATTEMPTS) / 2,
-            until=lambda result: isinstance(result, httplib.HTTPResponse),
+            until=lambda result: isinstance(result, Response),
         )(self._send_payload)
 
     def start(self, *args, **kwargs):
@@ -201,7 +201,7 @@ def periodic(self) -> None:
             )
 
     def _send_payload(self, payload: bytes, num_events: int):
-        conn = self._get_connection()
+        conn = get_connection(self._intake)
         try:
             conn.request("POST", self._endpoint, payload, self._headers)
             resp = conn.getresponse()
@@ -217,7 +217,7 @@ def _send_payload(self, payload: bytes, num_events: int):
                 telemetry.record_dropped_payload(num_events, event_type=self.EVENT_TYPE, error="http_error")
             else:
                 logger.debug("sent %d LLMObs %s events to %s", num_events, self.EVENT_TYPE, self._url)
-            return resp
+            return Response.from_http_response(resp)
         except Exception:
             logger.error(
                 "failed to send %d LLMObs %s events to %s", num_events, self.EVENT_TYPE, self._intake, exc_info=True
@@ -226,15 +226,6 @@ def _send_payload(self, payload: bytes, num_events: int):
         finally:
             conn.close()
 
-    def _get_connection(self):
-        """Return the connection to the LLM Observability endpoint."""
-        parsed = verify_url(self._intake)
-        if parsed.scheme == "https":
-            return httplib.HTTPSConnection(parsed.hostname or "", parsed.port, timeout=self._timeout)
-        elif parsed.scheme == "http":
-            return httplib.HTTPConnection(parsed.hostname or "", parsed.port, timeout=self._timeout)
-        raise ConnectionError("Unable to connect, invalid URL: %s", self._intake)
-
     @property
     def _url(self) -> str:
         return f"{self._intake}{self._endpoint}"
diff --git a/releasenotes/notes/fix-llmobs-unix-agent-2b959d436e6474c3.yaml b/releasenotes/notes/fix-llmobs-unix-agent-2b959d436e6474c3.yaml
@@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    LLM Observability: Resolves an issue where spans and evaluation metrics were not being sent via Unix sockets.
diff --git a/tests/llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_eval_metric_agentless_writer.test_send_metric_bad_api_key.yaml b/tests/llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_eval_metric_agentless_writer.test_send_metric_bad_api_key.yaml
diff --git a/tests/llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_evaluator_runner.send_score_metric.yaml b/tests/llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_evaluator_runner.send_score_metric.yaml
diff --git a/tests/llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_span_agentless_writer.test_send_completion_bad_api_key.yaml b/tests/llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_span_agentless_writer.test_send_completion_bad_api_key.yaml
diff --git a/tests/llmobs/test_llmobs_eval_metric_agent_writer.py b/tests/llmobs/test_llmobs_eval_metric_agent_writer.py
@@ -12,6 +12,8 @@
 
 INTAKE_ENDPOINT = agent_config.trace_agent_url
 AGENT_PROXY_URL = f"{INTAKE_ENDPOINT}{EVP_PROXY_AGENT_BASE_PATH}{EVAL_ENDPOINT}"
+UNIX_AGENT_INTAKE = "unix:///var/run/datadog/apm.sock"
+UNIX_AGENT_PROXY_URL = "{}{}{}".format(UNIX_AGENT_INTAKE, EVP_PROXY_AGENT_BASE_PATH, EVAL_ENDPOINT)
 
 
 def test_writer_start(mock_writer_logs):
@@ -21,6 +23,15 @@ def test_writer_start(mock_writer_logs):
     llmobs_eval_metric_writer.stop()
 
 
+def test_unix_socket_writer_start(mock_writer_logs):
+    llmobs_eval_metric_writer = LLMObsEvalMetricWriter(1, 1, is_agentless=False, _override_url=UNIX_AGENT_INTAKE)
+    llmobs_eval_metric_writer.start()
+    mock_writer_logs.debug.assert_has_calls(
+        [mock.call("started %r to %r", "LLMObsEvalMetricWriter", UNIX_AGENT_PROXY_URL)]
+    )
+    llmobs_eval_metric_writer.stop()
+
+
 def test_buffer_limit(mock_writer_logs):
     llmobs_eval_metric_writer = LLMObsEvalMetricWriter(1, 1, is_agentless=False)
     for _ in range(1001):
diff --git a/tests/llmobs/test_llmobs_eval_metric_agentless_writer.py b/tests/llmobs/test_llmobs_eval_metric_agentless_writer.py
@@ -63,7 +63,6 @@ def test_buffer_limit(mock_writer_logs):
     )
 
 
-@pytest.mark.vcr_logs
 def test_send_metric_bad_api_key(mock_writer_logs):
     llmobs_eval_metric_writer = LLMObsEvalMetricWriter(1, 1, is_agentless=True, _site=DD_SITE, _api_key="<bad-api-key>")
 
@@ -97,7 +96,7 @@ def test_send_categorical_metric(mock_writer_logs):
     llmobs_eval_metric_writer.enqueue(_categorical_metric_event())
     llmobs_eval_metric_writer.periodic()
     mock_writer_logs.debug.assert_has_calls(
-        [mock.call("sent %d LLMObs %s events to %s", 1, "evaluation_metric", INTAKE_ENDPOINT)]
+        [mock.call("encoded %d LLMObs %s events to be sent", 1, "evaluation_metric")]
     )
 
 
@@ -107,7 +106,7 @@ def test_send_score_metric(mock_writer_logs):
     llmobs_eval_metric_writer.enqueue(_score_metric_event())
     llmobs_eval_metric_writer.periodic()
     mock_writer_logs.debug.assert_has_calls(
-        [mock.call("sent %d LLMObs %s events to %s", 1, "evaluation_metric", INTAKE_ENDPOINT)]
+        [mock.call("encoded %d LLMObs %s events to be sent", 1, "evaluation_metric")]
     )
 
 
@@ -120,13 +119,13 @@ def test_send_timed_events(mock_writer_logs):
     llmobs_eval_metric_writer.enqueue(_score_metric_event())
     time.sleep(0.1)
     mock_writer_logs.debug.assert_has_calls(
-        [mock.call("sent %d LLMObs %s events to %s", 1, "evaluation_metric", INTAKE_ENDPOINT)]
+        [mock.call("encoded %d LLMObs %s events to be sent", 1, "evaluation_metric")]
     )
     mock_writer_logs.reset_mock()
     llmobs_eval_metric_writer.enqueue(_categorical_metric_event())
     time.sleep(0.1)
     mock_writer_logs.debug.assert_has_calls(
-        [mock.call("sent %d LLMObs %s events to %s", 1, "evaluation_metric", INTAKE_ENDPOINT)]
+        [mock.call("encoded %d LLMObs %s events to be sent", 1, "evaluation_metric")]
     )
     llmobs_eval_metric_writer.stop()
 
@@ -138,7 +137,9 @@ def test_send_multiple_events(mock_writer_logs):
     llmobs_eval_metric_writer.enqueue(_score_metric_event())
     llmobs_eval_metric_writer.enqueue(_categorical_metric_event())
     llmobs_eval_metric_writer.periodic()
-    mock_writer_logs.debug.assert_called_with("sent %d LLMObs %s events to %s", 2, "evaluation_metric", INTAKE_ENDPOINT)
+    mock_writer_logs.debug.assert_has_calls(
+        [mock.call("encoded %d LLMObs %s events to be sent", 2, "evaluation_metric")]
+    )
 
 
 def test_send_on_exit(mock_writer_logs, run_python_code_in_subprocess):
@@ -149,15 +150,9 @@ def test_send_on_exit(mock_writer_logs, run_python_code_in_subprocess):
     env.update({"PYTHONPATH": ":".join(pypath), "DD_LLMOBS_ML_APP": "unnamed-ml-app"})
     out, err, status, pid = run_python_code_in_subprocess(
         """
-import atexit
-
 from ddtrace.llmobs._writer import LLMObsEvalMetricWriter
 from tests.llmobs.test_llmobs_eval_metric_agentless_writer import _score_metric_event
-from tests.llmobs._utils import logs_vcr
 
-ctx = logs_vcr.use_cassette("tests.llmobs.test_llmobs_eval_metric_agentless_writer.send_score_metric.yaml")
-ctx.__enter__()
-atexit.register(lambda: ctx.__exit__())
 llmobs_eval_metric_writer = LLMObsEvalMetricWriter(
     0.01, 1, is_agentless=True, _site="datad0g.com", _api_key="<not-a-real-key>"
 )
@@ -168,4 +163,8 @@ def test_send_on_exit(mock_writer_logs, run_python_code_in_subprocess):
     )
     assert status == 0, err
     assert out == b""
-    assert err == b""
+    assert b"got response code 403" in err
+    assert (
+        b'status: b\'{"status":"error","code":403,"errors":["Forbidden"],"statuspage":"http://status.datadoghq.com","twitter":"http://twitter.com/datadogops","email":"support@datadoghq.com"}\'\n'
+        in err
+    )
diff --git a/tests/llmobs/test_llmobs_evaluator_runner.py b/tests/llmobs/test_llmobs_evaluator_runner.py
@@ -102,16 +102,10 @@ def test_evaluator_runner_on_exit(mock_writer_logs, run_python_code_in_subproces
     )
     out, err, status, pid = run_python_code_in_subprocess(
         """
-import atexit
-
 from ddtrace.llmobs import LLMObs
 from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
-from tests.llmobs._utils import logs_vcr
 from tests.llmobs._utils import DummyEvaluator
 
-ctx = logs_vcr.use_cassette("tests.llmobs.test_llmobs_evaluator_runner.send_score_metric.yaml")
-ctx.__enter__()
-atexit.register(lambda: ctx.__exit__())
 LLMObs.enable(api_key="dummy-api-key", site="datad0g.com", ml_app="unnamed-ml-app", agentless_enabled=True)
 LLMObs._instance._evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=LLMObs))
 LLMObs._instance._evaluator_runner.start()
@@ -121,7 +115,11 @@ def test_evaluator_runner_on_exit(mock_writer_logs, run_python_code_in_subproces
     )
     assert status == 0, err
     assert out == b""
-    assert err == b""
+    assert b"got response code 403" in err
+    assert (
+        b'status: b\'{"status":"error","code":403,"errors":["Forbidden"],"statuspage":"http://status.datadoghq.com","twitter":"http://twitter.com/datadogops","email":"support@datadoghq.com"}\'\n'
+        in err
+    )
 
 
 def test_evaluator_runner_unsupported_evaluator():
diff --git a/tests/llmobs/test_llmobs_span_agent_writer.py b/tests/llmobs/test_llmobs_span_agent_writer.py
@@ -16,12 +16,22 @@
 
 INTAKE_ENDPOINT = agent_config.trace_agent_url
 AGENT_PROXY_URL = "{}{}{}".format(INTAKE_ENDPOINT, EVP_PROXY_AGENT_BASE_PATH, SPAN_ENDPOINT)
+UNIX_AGENT_INTAKE = "unix:///var/run/datadog/apm.sock"
+UNIX_AGENT_PROXY_URL = "{}{}{}".format(UNIX_AGENT_INTAKE, EVP_PROXY_AGENT_BASE_PATH, SPAN_ENDPOINT)
 
 
 def test_writer_start(mock_writer_logs):
     llmobs_span_writer = LLMObsSpanWriter(1, 1, is_agentless=False)
     llmobs_span_writer.start()
     mock_writer_logs.debug.assert_has_calls([mock.call("started %r to %r", "LLMObsSpanWriter", AGENT_PROXY_URL)])
+    llmobs_span_writer.stop()
+
+
+def test_unix_socket_writer_start(mock_writer_logs):
+    llmobs_span_writer = LLMObsSpanWriter(1, 1, is_agentless=False, _override_url=UNIX_AGENT_INTAKE)
+    llmobs_span_writer.start()
+    mock_writer_logs.debug.assert_has_calls([mock.call("started %r to %r", "LLMObsSpanWriter", UNIX_AGENT_PROXY_URL)])
+    llmobs_span_writer.stop()
 
 
 def test_buffer_limit(mock_writer_logs):
diff --git a/tests/llmobs/test_llmobs_span_agentless_writer.py b/tests/llmobs/test_llmobs_span_agentless_writer.py
@@ -87,7 +87,6 @@ def test_send_chat_completion_event(mock_writer_logs):
     mock_writer_logs.debug.assert_has_calls([mock.call("encoded %d LLMObs %s events to be sent", 1, "span")])
 
 
-@pytest.mark.vcr_logs
 def test_send_completion_bad_api_key(mock_writer_logs):
     llmobs_span_writer = LLMObsSpanWriter(1, 1, is_agentless=True, _site=DD_SITE, _api_key="<bad-api-key>")
     llmobs_span_writer.enqueue(_completion_event())
@@ -149,15 +148,9 @@ def test_send_on_exit(run_python_code_in_subprocess):
 
     out, err, status, pid = run_python_code_in_subprocess(
         """
-import atexit
-
 from ddtrace.llmobs._writer import LLMObsSpanWriter
 from tests.llmobs.test_llmobs_span_agentless_writer import _completion_event
-from tests.llmobs._utils import logs_vcr
 
-ctx = logs_vcr.use_cassette("tests.llmobs.test_llmobs_span_agentless_writer.test_send_completion_event.yaml")
-ctx.__enter__()
-atexit.register(lambda: ctx.__exit__())
 llmobs_span_writer = LLMObsSpanWriter(0.01, 1, is_agentless=True, _site="datad0g.com", _api_key="<not-a-real-key>")
 llmobs_span_writer.start()
 llmobs_span_writer.enqueue(_completion_event())
@@ -166,4 +159,5 @@ def test_send_on_exit(run_python_code_in_subprocess):
     )
     assert status == 0, err
     assert out == b""
-    assert err == b""
+    assert b"got response code 403" in err
+    assert b'status: b\'{"errors":[{"status":"403","title":"Forbidden","detail":"API key is invalid"}]}\'\n' in err

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +---
 +fixes:
 +  - |
 +    LLM Observability: Resolves an issue where spans and evaluation metrics were not being sent via Unix sockets.