From 57235092797646babdb528481d3c1639cf410bf1 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Wed, 11 Dec 2024 20:14:14 -0500
Subject: [PATCH 01/33] span links

---
 ddtrace/llmobs/_llmobs.py          | 3 +++
 ddtrace/llmobs/_trace_processor.py | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 07f4d6f93c2..c39ffc8ed9c 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -564,6 +564,7 @@ def annotate(
         span: Optional[Span] = None,
         parameters: Optional[Dict[str, Any]] = None,
         prompt: Optional[dict] = None,
+        span_links = None,
         input_data: Optional[Any] = None,
         output_data: Optional[Any] = None,
         metadata: Optional[Dict[str, Any]] = None,
@@ -630,6 +631,8 @@ def annotate(
             cls._tag_params(span, parameters)
         if _name is not None:
             span.name = _name
+        if span_links is not None:
+            span.set_tag("_ml_obs.span_links", safe_json(span_links))
         if prompt is not None:
             cls._tag_prompt(span, prompt)
         if not span_kind:
diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
index b4af0c5ffd1..c0fe10353cf 100644
--- a/ddtrace/llmobs/_trace_processor.py
+++ b/ddtrace/llmobs/_trace_processor.py
@@ -77,6 +77,7 @@ def submit_llmobs_span(self, span: Span) -> None:
 
     def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
         """Span event object structure."""
+
         span_kind = span._meta.pop(SPAN_KIND)
         meta: Dict[str, Any] = {"span.kind": span_kind, "input": {}, "output": {}}
         if span_kind in ("llm", "embedding") and span.get_tag(MODEL_NAME) is not None:
@@ -123,7 +124,6 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
             is_ragas_integration_span = True
 
         span.set_tag_str(ML_APP, ml_app)
-
         parent_id = str(_get_llmobs_parent_id(span) or "undefined")
         span._meta.pop(PARENT_ID_KEY, None)
 
@@ -138,14 +138,18 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
             "meta": meta,
             "metrics": metrics,
         }
+
+        if span.get_tag("_ml_obs.span_links") is not None:
+            llmobs_span_event["span_links"] = json.loads(span._meta.pop("_ml_obs.span_links"))
+
         session_id = _get_session_id(span)
         if session_id is not None:
             span.set_tag_str(SESSION_ID, session_id)
             llmobs_span_event["session_id"] = session_id
-
         llmobs_span_event["tags"] = self._llmobs_tags(
             span, ml_app, session_id, is_ragas_integration_span=is_ragas_integration_span
         )
+        print(llmobs_span_event)
         return llmobs_span_event, is_ragas_integration_span
 
     @staticmethod

From a1c0512a7946480142fbdc1c4be1d8b1353f94eb Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Thu, 12 Dec 2024 22:16:37 -0500
Subject: [PATCH 02/33] implict parent link

---
 ddtrace/llmobs/_llmobs.py                    |   2 +-
 ddtrace/llmobs/_trace_processor.py           |   3 +
 tests/llmobs/test_llmobs_ragas_evaluators.py | 477 +++++++++++++++++++
 3 files changed, 481 insertions(+), 1 deletion(-)
 create mode 100644 tests/llmobs/test_llmobs_ragas_evaluators.py

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index c39ffc8ed9c..6d2f60709db 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -564,7 +564,7 @@ def annotate(
         span: Optional[Span] = None,
         parameters: Optional[Dict[str, Any]] = None,
         prompt: Optional[dict] = None,
-        span_links = None,
+        span_links=None,
         input_data: Optional[Any] = None,
         output_data: Optional[Any] = None,
         metadata: Optional[Dict[str, Any]] = None,
diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
index c0fe10353cf..ffe64263ca5 100644
--- a/ddtrace/llmobs/_trace_processor.py
+++ b/ddtrace/llmobs/_trace_processor.py
@@ -140,6 +140,9 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
         }
 
         if span.get_tag("_ml_obs.span_links") is not None:
+            links = span._meta.pop("_ml_obs.span_links")
+            # implicit parent id link
+            links.append({"trace_id": "{:x}".format(span.trace_id), "span_id": parent_id})
             llmobs_span_event["span_links"] = json.loads(span._meta.pop("_ml_obs.span_links"))
 
         session_id = _get_session_id(span)
diff --git a/tests/llmobs/test_llmobs_ragas_evaluators.py b/tests/llmobs/test_llmobs_ragas_evaluators.py
new file mode 100644
index 00000000000..4c008dea50e
--- /dev/null
+++ b/tests/llmobs/test_llmobs_ragas_evaluators.py
@@ -0,0 +1,477 @@
+import os
+
+import mock
+import pytest
+
+from ddtrace.llmobs._evaluators.ragas.context_precision import RagasContextPrecisionEvaluator
+from ddtrace.llmobs._evaluators.ragas.faithfulness import RagasFaithfulnessEvaluator
+from ddtrace.span import Span
+from tests.llmobs._utils import _expected_llmobs_llm_span_event
+from tests.llmobs._utils import _expected_ragas_context_precision_spans
+from tests.llmobs._utils import _expected_ragas_faithfulness_spans
+from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_messages
+from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_prompt
+
+
+def _llm_span_without_io():
+    return _expected_llmobs_llm_span_event(Span("dummy"))
+
+
+def test_ragas_faithfulness_evaluator_init(ragas, LLMObs):
+    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
+    assert rf_evaluator.llmobs_service == LLMObs
+    assert rf_evaluator.ragas_faithfulness_instance == ragas.metrics.faithfulness
+    assert rf_evaluator.ragas_faithfulness_instance.llm == ragas.llms.llm_factory()
+
+
+def test_ragas_faithfulness_throws_if_dependencies_not_present(LLMObs, mock_ragas_dependencies_not_present, ragas):
+    with pytest.raises(NotImplementedError, match="Failed to load dependencies for `ragas_faithfulness` evaluator"):
+        RagasFaithfulnessEvaluator(LLMObs)
+
+
+def test_ragas_faithfulness_returns_none_if_inputs_extraction_fails(ragas, mock_llmobs_submit_evaluation, LLMObs):
+    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
+    failure_msg, _ = rf_evaluator.evaluate(_llm_span_without_io())
+    assert failure_msg == "fail_extract_faithfulness_inputs"
+    assert rf_evaluator.llmobs_service.submit_evaluation.call_count == 0
+
+
+def test_ragas_faithfulness_has_modified_faithfulness_instance(
+    ragas, mock_llmobs_submit_evaluation, reset_ragas_faithfulness_llm, LLMObs
+):
+    """Faithfulness instance used in ragas evaluator should match the global ragas faithfulness instance"""
+    from ragas.llms import BaseRagasLLM
+    from ragas.metrics import faithfulness
+
+    class FirstDummyLLM(BaseRagasLLM):
+        def __init__(self):
+            super().__init__()
+
+        def generate_text(self) -> str:
+            return "dummy llm"
+
+        def agenerate_text(self) -> str:
+            return "dummy llm"
+
+    faithfulness.llm = FirstDummyLLM()
+
+    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
+
+    assert rf_evaluator.ragas_faithfulness_instance.llm.generate_text() == "dummy llm"
+
+    class SecondDummyLLM(BaseRagasLLM):
+        def __init__(self):
+            super().__init__()
+
+        def generate_text(self, statements) -> str:
+            raise ValueError("dummy_llm")
+
+        def agenerate_text(self, statements) -> str:
+            raise ValueError("dummy_llm")
+
+    faithfulness.llm = SecondDummyLLM()
+
+    with pytest.raises(ValueError, match="dummy_llm"):
+        rf_evaluator.evaluate(_llm_span_with_expected_ragas_inputs_in_prompt())
+
+
+@pytest.mark.vcr_logs
+def test_ragas_faithfulness_submits_evaluation(ragas, LLMObs, mock_llmobs_submit_evaluation):
+    """Test that evaluation is submitted for a valid llm span where question is in the prompt variables"""
+    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
+    llm_span = _llm_span_with_expected_ragas_inputs_in_prompt()
+    rf_evaluator.run_and_submit_evaluation(llm_span)
+    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
+        [
+            mock.call(
+                span_context={
+                    "span_id": llm_span.get("span_id"),
+                    "trace_id": llm_span.get("trace_id"),
+                },
+                label=RagasFaithfulnessEvaluator.LABEL,
+                metric_type=RagasFaithfulnessEvaluator.METRIC_TYPE,
+                value=1.0,
+                metadata={
+                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
+                    "_dd.faithfulness_disagreements": mock.ANY,
+                    "_dd.evaluation_kind": "faithfulness",
+                },
+            )
+        ]
+    )
+
+
+@pytest.mark.vcr_logs
+def test_ragas_faithfulness_submits_evaluation_on_span_with_question_in_messages(
+    ragas, LLMObs, mock_llmobs_submit_evaluation
+):
+    """Test that evaluation is submitted for a valid llm span where the last message content is the question"""
+    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
+    llm_span = _llm_span_with_expected_ragas_inputs_in_messages()
+    rf_evaluator.run_and_submit_evaluation(llm_span)
+    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
+        [
+            mock.call(
+                span_context={
+                    "span_id": llm_span.get("span_id"),
+                    "trace_id": llm_span.get("trace_id"),
+                },
+                label=RagasFaithfulnessEvaluator.LABEL,
+                metric_type=RagasFaithfulnessEvaluator.METRIC_TYPE,
+                value=1.0,
+                metadata={
+                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
+                    "_dd.faithfulness_disagreements": mock.ANY,
+                    "_dd.evaluation_kind": "faithfulness",
+                },
+            )
+        ]
+    )
+
+
+@pytest.mark.vcr_logs
+def test_ragas_faithfulness_submits_evaluation_on_span_with_custom_keys(ragas, LLMObs, mock_llmobs_submit_evaluation):
+    """Test that evaluation is submitted for a valid llm span where the last message content is the question"""
+    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
+    llm_span = _expected_llmobs_llm_span_event(
+        Span("dummy"),
+        prompt={
+            "variables": {
+                "user_input": "Is france part of europe?",
+                "context_1": "hello, ",
+                "context_2": "france is ",
+                "context_3": "part of europe",
+            },
+            "_dd_context_variable_keys": ["context_1", "context_2", "context_3"],
+            "_dd_query_variable_keys": ["user_input"],
+        },
+        output_messages=[{"content": "France is indeed part of europe"}],
+    )
+    rf_evaluator.run_and_submit_evaluation(llm_span)
+    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
+        [
+            mock.call(
+                span_context={
+                    "span_id": llm_span.get("span_id"),
+                    "trace_id": llm_span.get("trace_id"),
+                },
+                label=RagasFaithfulnessEvaluator.LABEL,
+                metric_type=RagasFaithfulnessEvaluator.METRIC_TYPE,
+                value=1.0,
+                metadata={
+                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
+                    "_dd.faithfulness_disagreements": mock.ANY,
+                    "_dd.evaluation_kind": "faithfulness",
+                },
+            )
+        ]
+    )
+
+
+@pytest.mark.vcr_logs
+def test_ragas_faithfulness_emits_traces(ragas, LLMObs):
+    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
+    rf_evaluator.evaluate(_llm_span_with_expected_ragas_inputs_in_prompt())
+    assert rf_evaluator.llmobs_service._instance._llmobs_span_writer.enqueue.call_count == 7
+    calls = rf_evaluator.llmobs_service._instance._llmobs_span_writer.enqueue.call_args_list
+
+    spans = [call[0][0] for call in calls]
+
+    # check name, io, span kinds match
+    assert spans == _expected_ragas_faithfulness_spans()
+
+    # verify the trace structure
+    root_span = spans[0]
+    root_span_id = root_span["span_id"]
+    assert root_span["parent_id"] == "undefined"
+    assert root_span["meta"] is not None
+    assert root_span["meta"]["metadata"] is not None
+    assert isinstance(root_span["meta"]["metadata"]["faithfulness_list"], list)
+    assert isinstance(root_span["meta"]["metadata"]["statements"], list)
+    root_span_trace_id = root_span["trace_id"]
+    for child_span in spans[1:]:
+        assert child_span["trace_id"] == root_span_trace_id
+
+    assert spans[1]["parent_id"] == root_span_id  # input extraction (task)
+    assert spans[2]["parent_id"] == root_span_id  # create statements (workflow)
+    assert spans[4]["parent_id"] == root_span_id  # create verdicts (workflow)
+    assert spans[6]["parent_id"] == root_span_id  # create score (task)
+
+    assert spans[3]["parent_id"] == spans[2]["span_id"]  # create statements prompt (task)
+    assert spans[5]["parent_id"] == spans[4]["span_id"]  # create verdicts prompt (task)
+
+
+def test_llmobs_with_faithfulness_emits_traces_and_evals_on_exit(mock_writer_logs, run_python_code_in_subprocess):
+    env = os.environ.copy()
+    pypath = [os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))]
+    if "PYTHONPATH" in env:
+        pypath.append(env["PYTHONPATH"])
+    env.update(
+        {
+            "DD_API_KEY": os.getenv("DD_API_KEY", "dummy-api-key"),
+            "DD_SITE": "datad0g.com",
+            "PYTHONPATH": ":".join(pypath),
+            "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "dummy-openai-api-key"),
+            "DD_LLMOBS_ML_APP": "unnamed-ml-app",
+            "_DD_LLMOBS_EVALUATOR_INTERVAL": "5",
+            "_DD_LLMOBS_EVALUATORS": "ragas_faithfulness",
+            "DD_LLMOBS_AGENTLESS_ENABLED": "true",
+        }
+    )
+    out, err, status, pid = run_python_code_in_subprocess(
+        """
+import os
+import time
+import atexit
+import mock
+from ddtrace.llmobs import LLMObs
+from ddtrace.internal.utils.http import Response
+from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_messages
+from tests.llmobs._utils import logs_vcr
+
+ctx = logs_vcr.use_cassette(
+    "tests.llmobs.test_llmobs_ragas_evaluators.emits_traces_and_evaluations_on_exit.yaml"
+)
+ctx.__enter__()
+atexit.register(lambda: ctx.__exit__())
+with mock.patch(
+    "ddtrace.internal.writer.HTTPWriter._send_payload",
+    return_value=Response(
+        status=200,
+        body="{}",
+    ),
+):
+    LLMObs.enable()
+    LLMObs._instance._evaluator_runner.enqueue(_llm_span_with_expected_ragas_inputs_in_messages(), None)
+""",
+        env=env,
+    )
+    assert status == 0, err
+    assert out == b""
+    assert err == b""
+
+
+def test_ragas_context_precision_init(ragas, LLMObs):
+    rcp_evaluator = RagasContextPrecisionEvaluator(LLMObs)
+    assert rcp_evaluator.llmobs_service == LLMObs
+    assert rcp_evaluator.ragas_context_precision_instance == ragas.metrics.context_precision
+    assert rcp_evaluator.ragas_context_precision_instance.llm == ragas.llms.llm_factory()
+
+
+def test_ragas_context_precision_throws_if_dependencies_not_present(LLMObs, mock_ragas_dependencies_not_present, ragas):
+    with pytest.raises(
+        NotImplementedError, match="Failed to load dependencies for `ragas_context_precision` evaluator"
+    ):
+        RagasContextPrecisionEvaluator(LLMObs)
+
+
+def test_ragas_context_precision_returns_none_if_inputs_extraction_fails(ragas, mock_llmobs_submit_evaluation, LLMObs):
+    rcp_evaluator = RagasContextPrecisionEvaluator(LLMObs)
+    failure_msg, _ = rcp_evaluator.evaluate(_llm_span_without_io())
+    assert failure_msg == "fail_extract_context_precision_inputs"
+    assert rcp_evaluator.llmobs_service.submit_evaluation.call_count == 0
+
+
+def test_ragas_context_precision_has_modified_context_precision_instance(
+    ragas, mock_llmobs_submit_evaluation, reset_ragas_context_precision_llm, LLMObs
+):
+    """Context precision instance used in ragas evaluator should match the global ragas context precision instance"""
+    from ragas.llms import BaseRagasLLM
+    from ragas.metrics import context_precision
+
+    class FirstDummyLLM(BaseRagasLLM):
+        def __init__(self):
+            super().__init__()
+
+        def generate_text(self) -> str:
+            return "dummy llm"
+
+        def agenerate_text(self) -> str:
+            return "dummy llm"
+
+    context_precision.llm = FirstDummyLLM()
+
+    rf_evaluator = RagasContextPrecisionEvaluator(LLMObs)
+
+    assert rf_evaluator.ragas_context_precision_instance.llm.generate_text() == "dummy llm"
+
+    class SecondDummyLLM(BaseRagasLLM):
+        def __init__(self):
+            super().__init__()
+
+        def generate_text(self) -> str:
+            return "second dummy llm"
+
+        def agenerate_text(self) -> str:
+            return "second dummy llm"
+
+    context_precision.llm = SecondDummyLLM()
+
+    rf_evaluator = RagasContextPrecisionEvaluator(LLMObs)
+
+    assert rf_evaluator.ragas_context_precision_instance.llm.generate_text() == "second dummy llm"
+
+
+@pytest.mark.vcr_logs
+def test_ragas_context_precision_submits_evaluation(ragas, LLMObs, mock_llmobs_submit_evaluation):
+    """Test that evaluation is submitted for a valid llm span where question is in the prompt variables"""
+    rf_evaluator = RagasContextPrecisionEvaluator(LLMObs)
+    llm_span = _llm_span_with_expected_ragas_inputs_in_prompt()
+    rf_evaluator.run_and_submit_evaluation(llm_span)
+    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
+        [
+            mock.call(
+                span_context={
+                    "span_id": llm_span.get("span_id"),
+                    "trace_id": llm_span.get("trace_id"),
+                },
+                label=RagasContextPrecisionEvaluator.LABEL,
+                metric_type=RagasContextPrecisionEvaluator.METRIC_TYPE,
+                value=1.0,
+                metadata={
+                    "_dd.evaluation_kind": "context_precision",
+                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
+                },
+            )
+        ]
+    )
+
+
+@pytest.mark.vcr_logs
+def test_ragas_context_precision_submits_evaluation_on_span_with_question_in_messages(
+    ragas, LLMObs, mock_llmobs_submit_evaluation
+):
+    """Test that evaluation is submitted for a valid llm span where the last message content is the question"""
+    rf_evaluator = RagasContextPrecisionEvaluator(LLMObs)
+    llm_span = _llm_span_with_expected_ragas_inputs_in_messages()
+    rf_evaluator.run_and_submit_evaluation(llm_span)
+    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
+        [
+            mock.call(
+                span_context={
+                    "span_id": llm_span.get("span_id"),
+                    "trace_id": llm_span.get("trace_id"),
+                },
+                label=RagasContextPrecisionEvaluator.LABEL,
+                metric_type=RagasContextPrecisionEvaluator.METRIC_TYPE,
+                value=1.0,
+                metadata={
+                    "_dd.evaluation_kind": "context_precision",
+                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
+                },
+            )
+        ]
+    )
+
+
+@pytest.mark.vcr_logs
+def test_ragas_context_precision_submits_evaluation_on_span_with_custom_keys(
+    ragas, LLMObs, mock_llmobs_submit_evaluation
+):
+    """Test that evaluation is submitted for a valid llm span where the last message content is the question"""
+    rf_evaluator = RagasContextPrecisionEvaluator(LLMObs)
+    llm_span = _expected_llmobs_llm_span_event(
+        Span("dummy"),
+        prompt={
+            "variables": {
+                "user_input": "Is france part of europe?",
+                "context_2": "irrelevant",
+                "context_3": "France is part of europe",
+            },
+            "_dd_context_variable_keys": ["context_2", "context_3"],
+            "_dd_query_variable_keys": ["user_input"],
+        },
+        output_messages=[{"content": "France is indeed part of europe"}],
+    )
+    rf_evaluator.run_and_submit_evaluation(llm_span)
+    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
+        [
+            mock.call(
+                span_context={
+                    "span_id": llm_span.get("span_id"),
+                    "trace_id": llm_span.get("trace_id"),
+                },
+                label=RagasContextPrecisionEvaluator.LABEL,
+                metric_type=RagasContextPrecisionEvaluator.METRIC_TYPE,
+                value=0.5,
+                metadata={
+                    "_dd.evaluation_kind": "context_precision",
+                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
+                },
+            )
+        ]
+    )
+
+
+@pytest.mark.vcr_logs
+def test_ragas_context_precision_emits_traces(ragas, LLMObs):
+    rcp_evaluator = RagasContextPrecisionEvaluator(LLMObs)
+    rcp_evaluator.evaluate(_llm_span_with_expected_ragas_inputs_in_prompt())
+    assert rcp_evaluator.llmobs_service._instance._llmobs_span_writer.enqueue.call_count == 2
+    calls = rcp_evaluator.llmobs_service._instance._llmobs_span_writer.enqueue.call_args_list
+
+    spans = [call[0][0] for call in calls]
+
+    # check name, io, span kinds match
+    assert spans == _expected_ragas_context_precision_spans()
+
+    # verify the trace structure
+    root_span = spans[0]
+    root_span_id = root_span["span_id"]
+    assert root_span["parent_id"] == "undefined"
+    assert root_span["meta"] is not None
+
+    root_span_trace_id = root_span["trace_id"]
+    for child_span in spans[1:]:
+        assert child_span["trace_id"] == root_span_trace_id
+        assert child_span["parent_id"] == root_span_id
+
+
+def test_llmobs_with_context_precision_emits_traces_and_evals_on_exit(mock_writer_logs, run_python_code_in_subprocess):
+    env = os.environ.copy()
+    pypath = [os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))]
+    if "PYTHONPATH" in env:
+        pypath.append(env["PYTHONPATH"])
+    env.update(
+        {
+            "DD_API_KEY": os.getenv("DD_API_KEY", "dummy-api-key"),
+            "DD_SITE": "datad0g.com",
+            "PYTHONPATH": ":".join(pypath),
+            "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "dummy-openai-api-key"),
+            "DD_LLMOBS_ML_APP": "unnamed-ml-app",
+            "_DD_LLMOBS_EVALUATOR_INTERVAL": "5",
+            "_DD_LLMOBS_EVALUATORS": "ragas_context_precision",
+            "DD_LLMOBS_AGENTLESS_ENABLED": "true",
+        }
+    )
+    out, err, status, pid = run_python_code_in_subprocess(
+        """
+import os
+import time
+import atexit
+import mock
+from ddtrace.llmobs import LLMObs
+from ddtrace.internal.utils.http import Response
+from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_messages
+from tests.llmobs._utils import logs_vcr
+
+ctx = logs_vcr.use_cassette(
+    "tests.llmobs.test_llmobs_ragas_context_precision_evaluator.emits_traces_and_evaluations_on_exit.yaml"
+)
+ctx.__enter__()
+atexit.register(lambda: ctx.__exit__())
+with mock.patch(
+    "ddtrace.internal.writer.HTTPWriter._send_payload",
+    return_value=Response(
+        status=200,
+        body="{}",
+    ),
+):
+    LLMObs.enable()
+    LLMObs._instance._evaluator_runner.enqueue(_llm_span_with_expected_ragas_inputs_in_messages(), None)
+""",
+        env=env,
+    )
+    assert status == 0, err
+    assert out == b""
+    assert err == b""

From 2b106d077d120b78fbbeff014b9852277005bf2d Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Thu, 12 Dec 2024 23:23:01 -0500
Subject: [PATCH 03/33] automatic correlation

---
 ddtrace/llmobs/_trace_processor.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
index ffe64263ca5..61cf1828ac5 100644
--- a/ddtrace/llmobs/_trace_processor.py
+++ b/ddtrace/llmobs/_trace_processor.py
@@ -139,11 +139,14 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
             "metrics": metrics,
         }
 
+        links = [{"trace_id": "{:x}".format(span.trace_id), "span_id": parent_id}]
+
         if span.get_tag("_ml_obs.span_links") is not None:
-            links = span._meta.pop("_ml_obs.span_links")
-            # implicit parent id link
-            links.append({"trace_id": "{:x}".format(span.trace_id), "span_id": parent_id})
-            llmobs_span_event["span_links"] = json.loads(span._meta.pop("_ml_obs.span_links"))
+            links += json.loads(span._meta.pop("_ml_obs.span_links"))
+
+        if parent_id != "undefined":
+            print(links)
+            llmobs_span_event["span_links"] = links
 
         session_id = _get_session_id(span)
         if session_id is not None:
@@ -152,7 +155,7 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
         llmobs_span_event["tags"] = self._llmobs_tags(
             span, ml_app, session_id, is_ragas_integration_span=is_ragas_integration_span
         )
-        print(llmobs_span_event)
+        # print(llmobs_span_event)
         return llmobs_span_event, is_ragas_integration_span
 
     @staticmethod

From 05f3026f167cbb3e7afd63448b7792c2b0bcf38e Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Sun, 15 Dec 2024 15:33:53 -0500
Subject: [PATCH 04/33] save some progress on span linking

---
 ddtrace/llmobs/_llmobs.py          | 12 ++++++++++++
 ddtrace/llmobs/_trace_processor.py |  2 +-
 ddtrace/llmobs/decorators.py       | 14 +++++++++++++-
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 6d2f60709db..23cb8b0a04b 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -71,6 +71,18 @@
     "google_generativeai": "google_generativeai",
 }
 
+import wrapt
+
+
+class CustomProxy(wrapt.ObjectProxy):
+    def add_span_links(self, span_links):
+        if not hasattr(self, "span_links"):
+            setattr(self, span_links, [])
+        self.span_links += span_links
+
+    def get_span_links(self):
+        return getattr(self, "span_links", [])
+
 
 class LLMObs(Service):
     _instance = None  # type: LLMObs
diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
index 61cf1828ac5..f2d1a9113d0 100644
--- a/ddtrace/llmobs/_trace_processor.py
+++ b/ddtrace/llmobs/_trace_processor.py
@@ -147,7 +147,7 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
         if parent_id != "undefined":
             print(links)
             llmobs_span_event["span_links"] = links
-
+        print(llmobs_span_event)
         session_id = _get_session_id(span)
         if session_id is not None:
             span.set_tag_str(SESSION_ID, session_id)
diff --git a/ddtrace/llmobs/decorators.py b/ddtrace/llmobs/decorators.py
index 86d8a1c2593..0e5ff26f0b7 100644
--- a/ddtrace/llmobs/decorators.py
+++ b/ddtrace/llmobs/decorators.py
@@ -125,10 +125,14 @@ def wrapper(*args, **kwargs):
                         span_name = func.__name__
                     traced_operation = getattr(LLMObs, operation_kind, "workflow")
                     with traced_operation(name=span_name, session_id=session_id, ml_app=ml_app) as span:
+                        span_links = []
+                        for arg in args:
+                            if hasattr(arg, "span_links"):
+                                span_links += arg.span_links
                         func_signature = signature(func)
                         bound_args = func_signature.bind_partial(*args, **kwargs)
                         if _automatic_io_annotation and bound_args.arguments:
-                            LLMObs.annotate(span=span, input_data=bound_args.arguments)
+                            LLMObs.annotate(span=span, input_data=bound_args.arguments, span_links=span_links)
                         resp = func(*args, **kwargs)
                         if (
                             _automatic_io_annotation
@@ -137,6 +141,14 @@ def wrapper(*args, **kwargs):
                             and span.get_tag(OUTPUT_VALUE) is None
                         ):
                             LLMObs.annotate(span=span, output_data=resp)
+                        span_link = LLMObs.export_span()
+                        # if not hasattr(resp, "__dict___"):
+
+                        #     class StringWithAttributes(type(resp)):
+                        #         pass
+
+                        #     resp = StringWithAttributes(resp)
+                        setattr(resp, "span_links", [span_link])
                         return resp
 
             return wrapper

From 6fede1f0517942ad5c8e0b24c518b0c3dab727cf Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Tue, 17 Dec 2024 13:16:20 -0500
Subject: [PATCH 05/33] automatic span linking poc

---
 ddtrace/llmobs/_llmobs.py          | 97 ++++++++++++++++++++++++++----
 ddtrace/llmobs/_trace_processor.py | 11 ++--
 ddtrace/llmobs/decorators.py       | 15 +----
 3 files changed, 92 insertions(+), 31 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 23cb8b0a04b..5ac7f63604c 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -1,5 +1,7 @@
+import inspect
 import json
 import os
+import sys
 import time
 from typing import Any
 from typing import Dict
@@ -71,18 +73,6 @@
     "google_generativeai": "google_generativeai",
 }
 
-import wrapt
-
-
-class CustomProxy(wrapt.ObjectProxy):
-    def add_span_links(self, span_links):
-        if not hasattr(self, "span_links"):
-            setattr(self, span_links, [])
-        self.span_links += span_links
-
-    def get_span_links(self):
-        return getattr(self, "span_links", [])
-
 
 class LLMObs(Service):
     _instance = None  # type: LLMObs
@@ -118,6 +108,9 @@ def __init__(self, tracer=None):
         self._annotation_context_lock = forksafe.RLock()
         self.tracer.on_start_span(self._do_annotations)
 
+        self._object_span_links = {}
+        sys.settrace(self.trace_call)
+
     def _do_annotations(self, span):
         # get the current span context
         # only do the annotations if it matches the context
@@ -270,9 +263,9 @@ def disable(cls) -> None:
         if not cls.enabled:
             log.debug("%s not enabled", cls.__name__)
             return
+        sys.settrace(None)
         log.debug("Disabling %s", cls.__name__)
         atexit.unregister(cls.disable)
-
         cls._instance.stop()
         cls.enabled = False
         cls._instance.tracer.deregister_on_start_span(cls._instance._do_annotations)
@@ -280,6 +273,84 @@ def disable(cls) -> None:
 
         log.debug("%s disabled", cls.__name__)
 
+    def _record_relationship(self, source_obj, incoming_obj, operation):
+        """Record a relationship between objects."""
+        if source_obj is None or incoming_obj is None:
+            return
+        # print(f"Recording relationship between {source_obj} and {incoming_obj} with operation {operation}")
+        span_links = self.get_span_links(incoming_obj)
+        self.add_span_links(source_obj, span_links)
+
+    def trace_call(self, frame, event, arg):
+        """Track method calls and object interactions."""
+        if event != "call":
+            return self.trace_call
+
+        # Get function details
+        source_obj = frame.f_locals.get("self")
+        method_name = frame.f_code.co_name
+
+        # Track object creation or modification
+        try:
+            # Capture arguments that might involve object relationships
+            args = inspect.getargvalues(frame)
+
+            # Look for potential object interactions
+            for arg_name in args.args:
+                if arg_name == "self":
+                    continue
+                incoming_obj = frame.f_locals.get(arg_name)
+
+                # Check for object creation or modification
+                if method_name in ["__init__", "__new__", "__add__", "append", "extend", "update"]:
+                    self._record_relationship(source_obj, incoming_obj, f"{method_name} operation")
+        except Exception as e:
+            print("Error capturing object interactions ", e)
+
+        return self.trace_call
+
+    def record_object(self, span, obj, to):
+        carried_span_links = self.get_span_links(obj)
+        current_span_links = []
+        for span_link in carried_span_links:
+            if span_link["attributes"]["from"] == "input" and to == "output":
+                continue
+            span_link["attributes"]["to"] = to
+            current_span_links.append(span_link)
+
+        self._tag_span_links(span, current_span_links)
+        self.add_span_links(
+            obj,
+            [
+                {
+                    "trace_id": self.export_span(span)["trace_id"],
+                    "span_id": self.export_span(span)["span_id"],
+                    "attributes": {
+                        "from": to,
+                    },
+                }
+            ],
+        )
+
+    def _tag_span_links(self, span, span_links):
+        current_span_links = span.get_tag("_ml_obs.span_links")
+        if current_span_links:
+            current_span_links = json.loads(current_span_links)
+            span_links = current_span_links + span_links
+        span.set_tag_str("_ml_obs.span_links", safe_json(span_links))
+
+    def get_obj_id(self, obj):
+        return f"{type(obj).__name__}_{id(obj)}"
+
+    def add_span_links(self, obj, span_links):
+        obj_id = self.get_obj_id(obj)
+        if obj_id not in self._object_span_links:
+            self._object_span_links[obj_id] = []
+        self._object_span_links[obj_id] += span_links
+
+    def get_span_links(self, obj):
+        return self._object_span_links.get(self.get_obj_id(obj), [])
+
     @classmethod
     def annotation_context(
         cls, tags: Optional[Dict[str, Any]] = None, prompt: Optional[dict] = None, name: Optional[str] = None
diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
index f2d1a9113d0..c5ee020ae42 100644
--- a/ddtrace/llmobs/_trace_processor.py
+++ b/ddtrace/llmobs/_trace_processor.py
@@ -139,15 +139,15 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
             "metrics": metrics,
         }
 
-        links = [{"trace_id": "{:x}".format(span.trace_id), "span_id": parent_id}]
+        links = []
 
         if span.get_tag("_ml_obs.span_links") is not None:
             links += json.loads(span._meta.pop("_ml_obs.span_links"))
 
-        if parent_id != "undefined":
-            print(links)
-            llmobs_span_event["span_links"] = links
-        print(llmobs_span_event)
+        llmobs_span_event["span_links"] = links
+        print("links for span: {}".format(_get_span_name(span)))
+        print(llmobs_span_event["span_links"])
+
         session_id = _get_session_id(span)
         if session_id is not None:
             span.set_tag_str(SESSION_ID, session_id)
@@ -155,7 +155,6 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
         llmobs_span_event["tags"] = self._llmobs_tags(
             span, ml_app, session_id, is_ragas_integration_span=is_ragas_integration_span
         )
-        # print(llmobs_span_event)
         return llmobs_span_event, is_ragas_integration_span
 
     @staticmethod
diff --git a/ddtrace/llmobs/decorators.py b/ddtrace/llmobs/decorators.py
index 0e5ff26f0b7..775f813f7bf 100644
--- a/ddtrace/llmobs/decorators.py
+++ b/ddtrace/llmobs/decorators.py
@@ -125,14 +125,12 @@ def wrapper(*args, **kwargs):
                         span_name = func.__name__
                     traced_operation = getattr(LLMObs, operation_kind, "workflow")
                     with traced_operation(name=span_name, session_id=session_id, ml_app=ml_app) as span:
-                        span_links = []
                         for arg in args:
-                            if hasattr(arg, "span_links"):
-                                span_links += arg.span_links
+                            LLMObs._instance.record_object(span, arg, "input")
                         func_signature = signature(func)
                         bound_args = func_signature.bind_partial(*args, **kwargs)
                         if _automatic_io_annotation and bound_args.arguments:
-                            LLMObs.annotate(span=span, input_data=bound_args.arguments, span_links=span_links)
+                            LLMObs.annotate(span=span, input_data=bound_args.arguments)
                         resp = func(*args, **kwargs)
                         if (
                             _automatic_io_annotation
@@ -141,14 +139,7 @@ def wrapper(*args, **kwargs):
                             and span.get_tag(OUTPUT_VALUE) is None
                         ):
                             LLMObs.annotate(span=span, output_data=resp)
-                        span_link = LLMObs.export_span()
-                        # if not hasattr(resp, "__dict___"):
-
-                        #     class StringWithAttributes(type(resp)):
-                        #         pass
-
-                        #     resp = StringWithAttributes(resp)
-                        setattr(resp, "span_links", [span_link])
+                        LLMObs._instance.record_object(span, resp, "output")
                         return resp
 
             return wrapper

From 7acc0e73203724cb1bdc75f80a00d0f512c34256 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Tue, 17 Dec 2024 13:18:18 -0500
Subject: [PATCH 06/33] oops

---
 tests/llmobs/test_llmobs_ragas_evaluators.py | 477 -------------------
 1 file changed, 477 deletions(-)
 delete mode 100644 tests/llmobs/test_llmobs_ragas_evaluators.py

diff --git a/tests/llmobs/test_llmobs_ragas_evaluators.py b/tests/llmobs/test_llmobs_ragas_evaluators.py
deleted file mode 100644
index 4c008dea50e..00000000000
--- a/tests/llmobs/test_llmobs_ragas_evaluators.py
+++ /dev/null
@@ -1,477 +0,0 @@
-import os
-
-import mock
-import pytest
-
-from ddtrace.llmobs._evaluators.ragas.context_precision import RagasContextPrecisionEvaluator
-from ddtrace.llmobs._evaluators.ragas.faithfulness import RagasFaithfulnessEvaluator
-from ddtrace.span import Span
-from tests.llmobs._utils import _expected_llmobs_llm_span_event
-from tests.llmobs._utils import _expected_ragas_context_precision_spans
-from tests.llmobs._utils import _expected_ragas_faithfulness_spans
-from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_messages
-from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_prompt
-
-
-def _llm_span_without_io():
-    return _expected_llmobs_llm_span_event(Span("dummy"))
-
-
-def test_ragas_faithfulness_evaluator_init(ragas, LLMObs):
-    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
-    assert rf_evaluator.llmobs_service == LLMObs
-    assert rf_evaluator.ragas_faithfulness_instance == ragas.metrics.faithfulness
-    assert rf_evaluator.ragas_faithfulness_instance.llm == ragas.llms.llm_factory()
-
-
-def test_ragas_faithfulness_throws_if_dependencies_not_present(LLMObs, mock_ragas_dependencies_not_present, ragas):
-    with pytest.raises(NotImplementedError, match="Failed to load dependencies for `ragas_faithfulness` evaluator"):
-        RagasFaithfulnessEvaluator(LLMObs)
-
-
-def test_ragas_faithfulness_returns_none_if_inputs_extraction_fails(ragas, mock_llmobs_submit_evaluation, LLMObs):
-    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
-    failure_msg, _ = rf_evaluator.evaluate(_llm_span_without_io())
-    assert failure_msg == "fail_extract_faithfulness_inputs"
-    assert rf_evaluator.llmobs_service.submit_evaluation.call_count == 0
-
-
-def test_ragas_faithfulness_has_modified_faithfulness_instance(
-    ragas, mock_llmobs_submit_evaluation, reset_ragas_faithfulness_llm, LLMObs
-):
-    """Faithfulness instance used in ragas evaluator should match the global ragas faithfulness instance"""
-    from ragas.llms import BaseRagasLLM
-    from ragas.metrics import faithfulness
-
-    class FirstDummyLLM(BaseRagasLLM):
-        def __init__(self):
-            super().__init__()
-
-        def generate_text(self) -> str:
-            return "dummy llm"
-
-        def agenerate_text(self) -> str:
-            return "dummy llm"
-
-    faithfulness.llm = FirstDummyLLM()
-
-    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
-
-    assert rf_evaluator.ragas_faithfulness_instance.llm.generate_text() == "dummy llm"
-
-    class SecondDummyLLM(BaseRagasLLM):
-        def __init__(self):
-            super().__init__()
-
-        def generate_text(self, statements) -> str:
-            raise ValueError("dummy_llm")
-
-        def agenerate_text(self, statements) -> str:
-            raise ValueError("dummy_llm")
-
-    faithfulness.llm = SecondDummyLLM()
-
-    with pytest.raises(ValueError, match="dummy_llm"):
-        rf_evaluator.evaluate(_llm_span_with_expected_ragas_inputs_in_prompt())
-
-
-@pytest.mark.vcr_logs
-def test_ragas_faithfulness_submits_evaluation(ragas, LLMObs, mock_llmobs_submit_evaluation):
-    """Test that evaluation is submitted for a valid llm span where question is in the prompt variables"""
-    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
-    llm_span = _llm_span_with_expected_ragas_inputs_in_prompt()
-    rf_evaluator.run_and_submit_evaluation(llm_span)
-    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
-        [
-            mock.call(
-                span_context={
-                    "span_id": llm_span.get("span_id"),
-                    "trace_id": llm_span.get("trace_id"),
-                },
-                label=RagasFaithfulnessEvaluator.LABEL,
-                metric_type=RagasFaithfulnessEvaluator.METRIC_TYPE,
-                value=1.0,
-                metadata={
-                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
-                    "_dd.faithfulness_disagreements": mock.ANY,
-                    "_dd.evaluation_kind": "faithfulness",
-                },
-            )
-        ]
-    )
-
-
-@pytest.mark.vcr_logs
-def test_ragas_faithfulness_submits_evaluation_on_span_with_question_in_messages(
-    ragas, LLMObs, mock_llmobs_submit_evaluation
-):
-    """Test that evaluation is submitted for a valid llm span where the last message content is the question"""
-    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
-    llm_span = _llm_span_with_expected_ragas_inputs_in_messages()
-    rf_evaluator.run_and_submit_evaluation(llm_span)
-    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
-        [
-            mock.call(
-                span_context={
-                    "span_id": llm_span.get("span_id"),
-                    "trace_id": llm_span.get("trace_id"),
-                },
-                label=RagasFaithfulnessEvaluator.LABEL,
-                metric_type=RagasFaithfulnessEvaluator.METRIC_TYPE,
-                value=1.0,
-                metadata={
-                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
-                    "_dd.faithfulness_disagreements": mock.ANY,
-                    "_dd.evaluation_kind": "faithfulness",
-                },
-            )
-        ]
-    )
-
-
-@pytest.mark.vcr_logs
-def test_ragas_faithfulness_submits_evaluation_on_span_with_custom_keys(ragas, LLMObs, mock_llmobs_submit_evaluation):
-    """Test that evaluation is submitted for a valid llm span where the last message content is the question"""
-    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
-    llm_span = _expected_llmobs_llm_span_event(
-        Span("dummy"),
-        prompt={
-            "variables": {
-                "user_input": "Is france part of europe?",
-                "context_1": "hello, ",
-                "context_2": "france is ",
-                "context_3": "part of europe",
-            },
-            "_dd_context_variable_keys": ["context_1", "context_2", "context_3"],
-            "_dd_query_variable_keys": ["user_input"],
-        },
-        output_messages=[{"content": "France is indeed part of europe"}],
-    )
-    rf_evaluator.run_and_submit_evaluation(llm_span)
-    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
-        [
-            mock.call(
-                span_context={
-                    "span_id": llm_span.get("span_id"),
-                    "trace_id": llm_span.get("trace_id"),
-                },
-                label=RagasFaithfulnessEvaluator.LABEL,
-                metric_type=RagasFaithfulnessEvaluator.METRIC_TYPE,
-                value=1.0,
-                metadata={
-                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
-                    "_dd.faithfulness_disagreements": mock.ANY,
-                    "_dd.evaluation_kind": "faithfulness",
-                },
-            )
-        ]
-    )
-
-
-@pytest.mark.vcr_logs
-def test_ragas_faithfulness_emits_traces(ragas, LLMObs):
-    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
-    rf_evaluator.evaluate(_llm_span_with_expected_ragas_inputs_in_prompt())
-    assert rf_evaluator.llmobs_service._instance._llmobs_span_writer.enqueue.call_count == 7
-    calls = rf_evaluator.llmobs_service._instance._llmobs_span_writer.enqueue.call_args_list
-
-    spans = [call[0][0] for call in calls]
-
-    # check name, io, span kinds match
-    assert spans == _expected_ragas_faithfulness_spans()
-
-    # verify the trace structure
-    root_span = spans[0]
-    root_span_id = root_span["span_id"]
-    assert root_span["parent_id"] == "undefined"
-    assert root_span["meta"] is not None
-    assert root_span["meta"]["metadata"] is not None
-    assert isinstance(root_span["meta"]["metadata"]["faithfulness_list"], list)
-    assert isinstance(root_span["meta"]["metadata"]["statements"], list)
-    root_span_trace_id = root_span["trace_id"]
-    for child_span in spans[1:]:
-        assert child_span["trace_id"] == root_span_trace_id
-
-    assert spans[1]["parent_id"] == root_span_id  # input extraction (task)
-    assert spans[2]["parent_id"] == root_span_id  # create statements (workflow)
-    assert spans[4]["parent_id"] == root_span_id  # create verdicts (workflow)
-    assert spans[6]["parent_id"] == root_span_id  # create score (task)
-
-    assert spans[3]["parent_id"] == spans[2]["span_id"]  # create statements prompt (task)
-    assert spans[5]["parent_id"] == spans[4]["span_id"]  # create verdicts prompt (task)
-
-
-def test_llmobs_with_faithfulness_emits_traces_and_evals_on_exit(mock_writer_logs, run_python_code_in_subprocess):
-    env = os.environ.copy()
-    pypath = [os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))]
-    if "PYTHONPATH" in env:
-        pypath.append(env["PYTHONPATH"])
-    env.update(
-        {
-            "DD_API_KEY": os.getenv("DD_API_KEY", "dummy-api-key"),
-            "DD_SITE": "datad0g.com",
-            "PYTHONPATH": ":".join(pypath),
-            "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "dummy-openai-api-key"),
-            "DD_LLMOBS_ML_APP": "unnamed-ml-app",
-            "_DD_LLMOBS_EVALUATOR_INTERVAL": "5",
-            "_DD_LLMOBS_EVALUATORS": "ragas_faithfulness",
-            "DD_LLMOBS_AGENTLESS_ENABLED": "true",
-        }
-    )
-    out, err, status, pid = run_python_code_in_subprocess(
-        """
-import os
-import time
-import atexit
-import mock
-from ddtrace.llmobs import LLMObs
-from ddtrace.internal.utils.http import Response
-from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_messages
-from tests.llmobs._utils import logs_vcr
-
-ctx = logs_vcr.use_cassette(
-    "tests.llmobs.test_llmobs_ragas_evaluators.emits_traces_and_evaluations_on_exit.yaml"
-)
-ctx.__enter__()
-atexit.register(lambda: ctx.__exit__())
-with mock.patch(
-    "ddtrace.internal.writer.HTTPWriter._send_payload",
-    return_value=Response(
-        status=200,
-        body="{}",
-    ),
-):
-    LLMObs.enable()
-    LLMObs._instance._evaluator_runner.enqueue(_llm_span_with_expected_ragas_inputs_in_messages(), None)
-""",
-        env=env,
-    )
-    assert status == 0, err
-    assert out == b""
-    assert err == b""
-
-
-def test_ragas_context_precision_init(ragas, LLMObs):
-    rcp_evaluator = RagasContextPrecisionEvaluator(LLMObs)
-    assert rcp_evaluator.llmobs_service == LLMObs
-    assert rcp_evaluator.ragas_context_precision_instance == ragas.metrics.context_precision
-    assert rcp_evaluator.ragas_context_precision_instance.llm == ragas.llms.llm_factory()
-
-
-def test_ragas_context_precision_throws_if_dependencies_not_present(LLMObs, mock_ragas_dependencies_not_present, ragas):
-    with pytest.raises(
-        NotImplementedError, match="Failed to load dependencies for `ragas_context_precision` evaluator"
-    ):
-        RagasContextPrecisionEvaluator(LLMObs)
-
-
-def test_ragas_context_precision_returns_none_if_inputs_extraction_fails(ragas, mock_llmobs_submit_evaluation, LLMObs):
-    rcp_evaluator = RagasContextPrecisionEvaluator(LLMObs)
-    failure_msg, _ = rcp_evaluator.evaluate(_llm_span_without_io())
-    assert failure_msg == "fail_extract_context_precision_inputs"
-    assert rcp_evaluator.llmobs_service.submit_evaluation.call_count == 0
-
-
-def test_ragas_context_precision_has_modified_context_precision_instance(
-    ragas, mock_llmobs_submit_evaluation, reset_ragas_context_precision_llm, LLMObs
-):
-    """Context precision instance used in ragas evaluator should match the global ragas context precision instance"""
-    from ragas.llms import BaseRagasLLM
-    from ragas.metrics import context_precision
-
-    class FirstDummyLLM(BaseRagasLLM):
-        def __init__(self):
-            super().__init__()
-
-        def generate_text(self) -> str:
-            return "dummy llm"
-
-        def agenerate_text(self) -> str:
-            return "dummy llm"
-
-    context_precision.llm = FirstDummyLLM()
-
-    rf_evaluator = RagasContextPrecisionEvaluator(LLMObs)
-
-    assert rf_evaluator.ragas_context_precision_instance.llm.generate_text() == "dummy llm"
-
-    class SecondDummyLLM(BaseRagasLLM):
-        def __init__(self):
-            super().__init__()
-
-        def generate_text(self) -> str:
-            return "second dummy llm"
-
-        def agenerate_text(self) -> str:
-            return "second dummy llm"
-
-    context_precision.llm = SecondDummyLLM()
-
-    rf_evaluator = RagasContextPrecisionEvaluator(LLMObs)
-
-    assert rf_evaluator.ragas_context_precision_instance.llm.generate_text() == "second dummy llm"
-
-
-@pytest.mark.vcr_logs
-def test_ragas_context_precision_submits_evaluation(ragas, LLMObs, mock_llmobs_submit_evaluation):
-    """Test that evaluation is submitted for a valid llm span where question is in the prompt variables"""
-    rf_evaluator = RagasContextPrecisionEvaluator(LLMObs)
-    llm_span = _llm_span_with_expected_ragas_inputs_in_prompt()
-    rf_evaluator.run_and_submit_evaluation(llm_span)
-    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
-        [
-            mock.call(
-                span_context={
-                    "span_id": llm_span.get("span_id"),
-                    "trace_id": llm_span.get("trace_id"),
-                },
-                label=RagasContextPrecisionEvaluator.LABEL,
-                metric_type=RagasContextPrecisionEvaluator.METRIC_TYPE,
-                value=1.0,
-                metadata={
-                    "_dd.evaluation_kind": "context_precision",
-                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
-                },
-            )
-        ]
-    )
-
-
-@pytest.mark.vcr_logs
-def test_ragas_context_precision_submits_evaluation_on_span_with_question_in_messages(
-    ragas, LLMObs, mock_llmobs_submit_evaluation
-):
-    """Test that evaluation is submitted for a valid llm span where the last message content is the question"""
-    rf_evaluator = RagasContextPrecisionEvaluator(LLMObs)
-    llm_span = _llm_span_with_expected_ragas_inputs_in_messages()
-    rf_evaluator.run_and_submit_evaluation(llm_span)
-    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
-        [
-            mock.call(
-                span_context={
-                    "span_id": llm_span.get("span_id"),
-                    "trace_id": llm_span.get("trace_id"),
-                },
-                label=RagasContextPrecisionEvaluator.LABEL,
-                metric_type=RagasContextPrecisionEvaluator.METRIC_TYPE,
-                value=1.0,
-                metadata={
-                    "_dd.evaluation_kind": "context_precision",
-                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
-                },
-            )
-        ]
-    )
-
-
-@pytest.mark.vcr_logs
-def test_ragas_context_precision_submits_evaluation_on_span_with_custom_keys(
-    ragas, LLMObs, mock_llmobs_submit_evaluation
-):
-    """Test that evaluation is submitted for a valid llm span where the last message content is the question"""
-    rf_evaluator = RagasContextPrecisionEvaluator(LLMObs)
-    llm_span = _expected_llmobs_llm_span_event(
-        Span("dummy"),
-        prompt={
-            "variables": {
-                "user_input": "Is france part of europe?",
-                "context_2": "irrelevant",
-                "context_3": "France is part of europe",
-            },
-            "_dd_context_variable_keys": ["context_2", "context_3"],
-            "_dd_query_variable_keys": ["user_input"],
-        },
-        output_messages=[{"content": "France is indeed part of europe"}],
-    )
-    rf_evaluator.run_and_submit_evaluation(llm_span)
-    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
-        [
-            mock.call(
-                span_context={
-                    "span_id": llm_span.get("span_id"),
-                    "trace_id": llm_span.get("trace_id"),
-                },
-                label=RagasContextPrecisionEvaluator.LABEL,
-                metric_type=RagasContextPrecisionEvaluator.METRIC_TYPE,
-                value=0.5,
-                metadata={
-                    "_dd.evaluation_kind": "context_precision",
-                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
-                },
-            )
-        ]
-    )
-
-
-@pytest.mark.vcr_logs
-def test_ragas_context_precision_emits_traces(ragas, LLMObs):
-    rcp_evaluator = RagasContextPrecisionEvaluator(LLMObs)
-    rcp_evaluator.evaluate(_llm_span_with_expected_ragas_inputs_in_prompt())
-    assert rcp_evaluator.llmobs_service._instance._llmobs_span_writer.enqueue.call_count == 2
-    calls = rcp_evaluator.llmobs_service._instance._llmobs_span_writer.enqueue.call_args_list
-
-    spans = [call[0][0] for call in calls]
-
-    # check name, io, span kinds match
-    assert spans == _expected_ragas_context_precision_spans()
-
-    # verify the trace structure
-    root_span = spans[0]
-    root_span_id = root_span["span_id"]
-    assert root_span["parent_id"] == "undefined"
-    assert root_span["meta"] is not None
-
-    root_span_trace_id = root_span["trace_id"]
-    for child_span in spans[1:]:
-        assert child_span["trace_id"] == root_span_trace_id
-        assert child_span["parent_id"] == root_span_id
-
-
-def test_llmobs_with_context_precision_emits_traces_and_evals_on_exit(mock_writer_logs, run_python_code_in_subprocess):
-    env = os.environ.copy()
-    pypath = [os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))]
-    if "PYTHONPATH" in env:
-        pypath.append(env["PYTHONPATH"])
-    env.update(
-        {
-            "DD_API_KEY": os.getenv("DD_API_KEY", "dummy-api-key"),
-            "DD_SITE": "datad0g.com",
-            "PYTHONPATH": ":".join(pypath),
-            "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "dummy-openai-api-key"),
-            "DD_LLMOBS_ML_APP": "unnamed-ml-app",
-            "_DD_LLMOBS_EVALUATOR_INTERVAL": "5",
-            "_DD_LLMOBS_EVALUATORS": "ragas_context_precision",
-            "DD_LLMOBS_AGENTLESS_ENABLED": "true",
-        }
-    )
-    out, err, status, pid = run_python_code_in_subprocess(
-        """
-import os
-import time
-import atexit
-import mock
-from ddtrace.llmobs import LLMObs
-from ddtrace.internal.utils.http import Response
-from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_messages
-from tests.llmobs._utils import logs_vcr
-
-ctx = logs_vcr.use_cassette(
-    "tests.llmobs.test_llmobs_ragas_context_precision_evaluator.emits_traces_and_evaluations_on_exit.yaml"
-)
-ctx.__enter__()
-atexit.register(lambda: ctx.__exit__())
-with mock.patch(
-    "ddtrace.internal.writer.HTTPWriter._send_payload",
-    return_value=Response(
-        status=200,
-        body="{}",
-    ),
-):
-    LLMObs.enable()
-    LLMObs._instance._evaluator_runner.enqueue(_llm_span_with_expected_ragas_inputs_in_messages(), None)
-""",
-        env=env,
-    )
-    assert status == 0, err
-    assert out == b""
-    assert err == b""

From f627038ffdeee2d9a81bf0c3bceee6d719f7e7e8 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Tue, 17 Dec 2024 13:23:00 -0500
Subject: [PATCH 07/33] cleanmup

---
 ddtrace/llmobs/_llmobs.py          | 4 +---
 ddtrace/llmobs/_trace_processor.py | 3 ++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 5ac7f63604c..b958232b6a7 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -266,6 +266,7 @@ def disable(cls) -> None:
         sys.settrace(None)
         log.debug("Disabling %s", cls.__name__)
         atexit.unregister(cls.disable)
+
         cls._instance.stop()
         cls.enabled = False
         cls._instance.tracer.deregister_on_start_span(cls._instance._do_annotations)
@@ -647,7 +648,6 @@ def annotate(
         span: Optional[Span] = None,
         parameters: Optional[Dict[str, Any]] = None,
         prompt: Optional[dict] = None,
-        span_links=None,
         input_data: Optional[Any] = None,
         output_data: Optional[Any] = None,
         metadata: Optional[Dict[str, Any]] = None,
@@ -714,8 +714,6 @@ def annotate(
             cls._tag_params(span, parameters)
         if _name is not None:
             span.name = _name
-        if span_links is not None:
-            span.set_tag("_ml_obs.span_links", safe_json(span_links))
         if prompt is not None:
             cls._tag_prompt(span, prompt)
         if not span_kind:
diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
index c5ee020ae42..bf1669c5316 100644
--- a/ddtrace/llmobs/_trace_processor.py
+++ b/ddtrace/llmobs/_trace_processor.py
@@ -77,7 +77,6 @@ def submit_llmobs_span(self, span: Span) -> None:
 
     def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
         """Span event object structure."""
-
         span_kind = span._meta.pop(SPAN_KIND)
         meta: Dict[str, Any] = {"span.kind": span_kind, "input": {}, "output": {}}
         if span_kind in ("llm", "embedding") and span.get_tag(MODEL_NAME) is not None:
@@ -124,6 +123,7 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
             is_ragas_integration_span = True
 
         span.set_tag_str(ML_APP, ml_app)
+
         parent_id = str(_get_llmobs_parent_id(span) or "undefined")
         span._meta.pop(PARENT_ID_KEY, None)
 
@@ -152,6 +152,7 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
         if session_id is not None:
             span.set_tag_str(SESSION_ID, session_id)
             llmobs_span_event["session_id"] = session_id
+
         llmobs_span_event["tags"] = self._llmobs_tags(
             span, ml_app, session_id, is_ragas_integration_span=is_ragas_integration_span
         )

From 55191a5348e17216b4d74d28eb096c0202f8c28d Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Tue, 17 Dec 2024 22:14:58 -0500
Subject: [PATCH 08/33] janky solution for list, str, dicts

---
 ddtrace/llmobs/_llmobs.py    | 91 +++++++++++++++++++++++++++++++++++-
 ddtrace/llmobs/decorators.py |  4 +-
 2 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index b958232b6a7..89721a76508 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -74,6 +74,62 @@
 }
 
 
+class TrackedStr(str):
+    def __add__(self, other):
+        result = super().__add__(other)
+        LLMObs._instance._record_relationship(self, other, "__add__")
+        return result
+
+    def __radd__(self, other):
+        result = super().__radd__(other)
+        LLMObs._instance._record_relationship(self, other, "__radd__")
+        return result
+
+    def format(self, *args, **kwargs):
+        result = super().format(*args, **kwargs)
+        for arg in args:
+            LLMObs._instance._record_relationship(self, arg, "format")
+        for key, value in kwargs.items():
+            LLMObs._instance._record_relationship(self, value, "format")
+        return result
+
+    def split(self, *args, **kwargs):
+        return super().split(*args, **kwargs)
+
+    def join(self, *args, **kwargs):
+        return super().join(*args, **kwargs)
+
+
+class TrackedList(list):
+    def append(self, item):
+        result = super().append(item)
+        LLMObs._instance._record_relationship(self, item, "append")
+        return result
+
+    def extend(self, iterable):
+        result = super().extend(iterable)
+        LLMObs._instance._record_relationship(self, iterable, "extend")
+        return result
+
+    def __add__(self, other):
+        result = super().__add__(other)
+        LLMObs._instance._record_relationship(self, other, "__add__")
+        return result
+
+
+class TrackedDict(dict):
+    def update(self, *args, **kwargs):
+        result = super().update(*args, **kwargs)
+        for arg in args:
+            LLMObs._instance._record_relationship(self, arg, "update")
+        return result
+
+    def __setitem__(self, key, value):
+        super().__setitem__(key, value)
+        LLMObs._instance._record_relationship(self, value, "__setitem__")
+        LLMObs._instance._record_relationship(self, key, "__setitem__")
+
+
 class LLMObs(Service):
     _instance = None  # type: LLMObs
     enabled = False
@@ -303,15 +359,45 @@ def trace_call(self, frame, event, arg):
                 incoming_obj = frame.f_locals.get(arg_name)
 
                 # Check for object creation or modification
-                if method_name in ["__init__", "__new__", "__add__", "append", "extend", "update"]:
+                if method_name in [
+                    "__init__",
+                    "__new__",
+                    "__add__",
+                    "append",
+                    "extend",
+                    "update",
+                ]:
                     self._record_relationship(source_obj, incoming_obj, f"{method_name} operation")
         except Exception as e:
             print("Error capturing object interactions ", e)
 
         return self.trace_call
 
+    def search_for_links(self, obj):
+        print("searching for links on")
+        print(obj)
+        links = []
+        if isinstance(obj, dict):
+            for _, value in obj.items():
+                links += self.search_for_links(value)
+        elif isinstance(obj, list):
+            for value in obj:
+                links += self.search_for_links(value)
+        links += self.get_span_links(obj)
+        return links
+
     def record_object(self, span, obj, to):
-        carried_span_links = self.get_span_links(obj)
+        carried_span_links = []
+        if isinstance(obj, dict):
+            obj = TrackedDict(obj)
+            carried_span_links += self.search_for_links(obj)
+        elif isinstance(obj, list):
+            obj = TrackedList(obj)
+            carried_span_links += self.search_for_links(obj)
+        elif isinstance(obj, str):
+            obj = TrackedStr(obj)
+
+        carried_span_links += self.get_span_links(obj)
         current_span_links = []
         for span_link in carried_span_links:
             if span_link["attributes"]["from"] == "input" and to == "output":
@@ -332,6 +418,7 @@ def record_object(self, span, obj, to):
                 }
             ],
         )
+        return obj
 
     def _tag_span_links(self, span, span_links):
         current_span_links = span.get_tag("_ml_obs.span_links")
diff --git a/ddtrace/llmobs/decorators.py b/ddtrace/llmobs/decorators.py
index 775f813f7bf..7a245544825 100644
--- a/ddtrace/llmobs/decorators.py
+++ b/ddtrace/llmobs/decorators.py
@@ -126,7 +126,7 @@ def wrapper(*args, **kwargs):
                     traced_operation = getattr(LLMObs, operation_kind, "workflow")
                     with traced_operation(name=span_name, session_id=session_id, ml_app=ml_app) as span:
                         for arg in args:
-                            LLMObs._instance.record_object(span, arg, "input")
+                            arg = LLMObs._instance.record_object(span, arg, "input")
                         func_signature = signature(func)
                         bound_args = func_signature.bind_partial(*args, **kwargs)
                         if _automatic_io_annotation and bound_args.arguments:
@@ -139,7 +139,7 @@ def wrapper(*args, **kwargs):
                             and span.get_tag(OUTPUT_VALUE) is None
                         ):
                             LLMObs.annotate(span=span, output_data=resp)
-                        LLMObs._instance.record_object(span, resp, "output")
+                        resp = LLMObs._instance.record_object(span, resp, "output")
                         return resp
 
             return wrapper

From 72769ccb2cba34be0463fb0e6fddd7310317f378 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Wed, 18 Dec 2024 11:43:31 -0500
Subject: [PATCH 09/33] add default logic for parents

---
 ddtrace/llmobs/_llmobs.py          | 6 ++++--
 ddtrace/llmobs/_trace_processor.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 89721a76508..d6fd5a2df9f 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -374,8 +374,6 @@ def trace_call(self, frame, event, arg):
         return self.trace_call
 
     def search_for_links(self, obj):
-        print("searching for links on")
-        print(obj)
         links = []
         if isinstance(obj, dict):
             for _, value in obj.items():
@@ -795,6 +793,10 @@ def annotate(
             cls._tag_metrics(span, metrics)
         if tags is not None:
             cls._tag_span_tags(span, tags)
+        if input_data:
+            LLMObs._instance.record_object(span, input_data, "input")
+        elif output_data:
+            LLMObs._instance.record_object(span, output_data, "output")
         span_kind = span.get_tag(SPAN_KIND)
         if parameters is not None:
             log.warning("Setting parameters is deprecated, please set parameters and other metadata as tags instead.")
diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
index bf1669c5316..3dc76f1b896 100644
--- a/ddtrace/llmobs/_trace_processor.py
+++ b/ddtrace/llmobs/_trace_processor.py
@@ -139,7 +139,7 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
             "metrics": metrics,
         }
 
-        links = []
+        links = [{"trace_id": "{:x}".format(span.trace_id), "span_id": parent_id, "attributes": {"from": "input", "to": "input"}}]
 
         if span.get_tag("_ml_obs.span_links") is not None:
             links += json.loads(span._meta.pop("_ml_obs.span_links"))

From a4ead2964aed59b087df419a167d789720dcd70b Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Wed, 18 Dec 2024 14:04:47 -0500
Subject: [PATCH 10/33] catch fns returning iterables

---
 ddtrace/llmobs/_llmobs.py          | 35 ++++++++++++++++++++----------
 ddtrace/llmobs/_trace_processor.py |  8 ++++++-
 ddtrace/llmobs/decorators.py       |  5 +++--
 3 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index d6fd5a2df9f..ce5cea7adfd 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -77,20 +77,26 @@
 class TrackedStr(str):
     def __add__(self, other):
         result = super().__add__(other)
-        LLMObs._instance._record_relationship(self, other, "__add__")
+        result = TrackedStr(result)
+        LLMObs._instance._record_relationship(result, other, "__add__")
+        LLMObs._instance._record_relationship(result, self, "__add__")
         return result
 
     def __radd__(self, other):
         result = super().__radd__(other)
-        LLMObs._instance._record_relationship(self, other, "__radd__")
+        result = TrackedStr(result)
+        LLMObs._instance._record_relationship(result, other, "__radd__")
+        LLMObs._instance._record_relationship(result, self, "__radd__")
         return result
 
     def format(self, *args, **kwargs):
         result = super().format(*args, **kwargs)
+        result = TrackedStr(result)
         for arg in args:
-            LLMObs._instance._record_relationship(self, arg, "format")
+            LLMObs._instance._record_relationship(result, arg, "format")
         for key, value in kwargs.items():
-            LLMObs._instance._record_relationship(self, value, "format")
+            LLMObs._instance._record_relationship(result, value, "format")
+        LLMObs._instance._record_relationship(result, self, "format")
         return result
 
     def split(self, *args, **kwargs):
@@ -387,13 +393,19 @@ def search_for_links(self, obj):
     def record_object(self, span, obj, to):
         carried_span_links = []
         if isinstance(obj, dict):
-            obj = TrackedDict(obj)
+            old_obj = obj
+            obj = TrackedDict(old_obj)
+            self._record_relationship(obj, old_obj, "inherit")
             carried_span_links += self.search_for_links(obj)
         elif isinstance(obj, list):
-            obj = TrackedList(obj)
+            old_obj = obj
+            obj = TrackedList(old_obj)
+            self._record_relationship(obj, old_obj, "inherit")
             carried_span_links += self.search_for_links(obj)
         elif isinstance(obj, str):
-            obj = TrackedStr(obj)
+            old_obj = obj
+            obj = TrackedStr(old_obj)
+            self._record_relationship(obj, old_obj, "inherit")
 
         carried_span_links += self.get_span_links(obj)
         current_span_links = []
@@ -419,6 +431,7 @@ def record_object(self, span, obj, to):
         return obj
 
     def _tag_span_links(self, span, span_links):
+        span_links = [span_link for span_link in span_links if span_link["span_id"] != LLMObs.export_span(span)["span_id"]]
         current_span_links = span.get_tag("_ml_obs.span_links")
         if current_span_links:
             current_span_links = json.loads(current_span_links)
@@ -787,16 +800,16 @@ def annotate(
         if span.finished:
             log.warning("Cannot annotate a finished span.")
             return
+        if input_data is not None:
+            cls._instance.record_object(span, input_data, "input")
+        if output_data is not None:
+            cls._instance.record_object(span, output_data, "output")
         if metadata is not None:
             cls._tag_metadata(span, metadata)
         if metrics is not None:
             cls._tag_metrics(span, metrics)
         if tags is not None:
             cls._tag_span_tags(span, tags)
-        if input_data:
-            LLMObs._instance.record_object(span, input_data, "input")
-        elif output_data:
-            LLMObs._instance.record_object(span, output_data, "output")
         span_kind = span.get_tag(SPAN_KIND)
         if parameters is not None:
             log.warning("Setting parameters is deprecated, please set parameters and other metadata as tags instead.")
diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
index 3dc76f1b896..dcbb4347a22 100644
--- a/ddtrace/llmobs/_trace_processor.py
+++ b/ddtrace/llmobs/_trace_processor.py
@@ -139,7 +139,13 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
             "metrics": metrics,
         }
 
-        links = [{"trace_id": "{:x}".format(span.trace_id), "span_id": parent_id, "attributes": {"from": "input", "to": "input"}}]
+        links = [
+            # {
+            #     "trace_id": "{:x}".format(span.trace_id),
+            #     "span_id": parent_id,
+            #     "attributes": {"from": "input", "to": "input"},
+            # }
+        ]
 
         if span.get_tag("_ml_obs.span_links") is not None:
             links += json.loads(span._meta.pop("_ml_obs.span_links"))
diff --git a/ddtrace/llmobs/decorators.py b/ddtrace/llmobs/decorators.py
index 7a245544825..f3f96e73434 100644
--- a/ddtrace/llmobs/decorators.py
+++ b/ddtrace/llmobs/decorators.py
@@ -125,8 +125,7 @@ def wrapper(*args, **kwargs):
                         span_name = func.__name__
                     traced_operation = getattr(LLMObs, operation_kind, "workflow")
                     with traced_operation(name=span_name, session_id=session_id, ml_app=ml_app) as span:
-                        for arg in args:
-                            arg = LLMObs._instance.record_object(span, arg, "input")
+                        args = [LLMObs._instance.record_object(span, arg, "input") for arg in args]
                         func_signature = signature(func)
                         bound_args = func_signature.bind_partial(*args, **kwargs)
                         if _automatic_io_annotation and bound_args.arguments:
@@ -140,6 +139,8 @@ def wrapper(*args, **kwargs):
                         ):
                             LLMObs.annotate(span=span, output_data=resp)
                         resp = LLMObs._instance.record_object(span, resp, "output")
+                        if isinstance(resp, iter):
+                            resp = [LLMObs._instance.record_object(span, r, "output") for r in resp]
                         return resp
 
             return wrapper

From 6ce0a6f64ed45450ff8fe2f580feea28d9b2f77a Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Tue, 24 Dec 2024 13:22:21 -0500
Subject: [PATCH 11/33] more comp obj rel tracking

---
 ddtrace/llmobs/_llmobs.py    | 185 ++++++++++++++++++++++-------------
 ddtrace/llmobs/decorators.py |   7 +-
 2 files changed, 118 insertions(+), 74 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index ce5cea7adfd..6a71c313fe7 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -74,6 +74,42 @@
 }
 
 
+def track_object_interactions(frame, event, arg):
+    """Track method calls and object interactions."""
+    if event != "call":
+        return track_object_interactions
+
+    # Get function details
+    source_obj = frame.f_locals.get("self")
+    method_name = frame.f_code.co_name
+
+    # Track object creation or modification
+    try:
+        # Capture arguments that might involve object relationships
+        args = inspect.getargvalues(frame)
+
+        # Look for potential object interactions
+        for arg_name in args.args:
+            if arg_name == "self":
+                continue
+            incoming_obj = frame.f_locals.get(arg_name)
+
+            # Check for object creation or modification
+            if method_name in [
+                "__init__",
+                "__new__",
+                "__add__",
+                "append",
+                "extend",
+                "update",
+            ]:
+                LLMObs._instance._record_relationship(source_obj, incoming_obj, f"{method_name} operation")
+    except Exception as e:
+        print("Error capturing object interactions ", e)
+
+    return track_object_interactions
+
+
 class TrackedStr(str):
     def __add__(self, other):
         result = super().__add__(other)
@@ -107,6 +143,11 @@ def join(self, *args, **kwargs):
 
 
 class TrackedList(list):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        for item in self:
+            LLMObs._instance._record_relationship(self, item, "__init__")
+
     def append(self, item):
         result = super().append(item)
         LLMObs._instance._record_relationship(self, item, "append")
@@ -117,6 +158,15 @@ def extend(self, iterable):
         LLMObs._instance._record_relationship(self, iterable, "extend")
         return result
 
+    def __delitem__(self, key) -> None:
+        if key < len(self):
+            LLMObs._instance._remove_relationship(self, self[key], "__delitem__")
+        super().__delitem__(key)
+
+    def __setitem__(self, key, value):
+        super().__setitem__(key, value)
+        LLMObs._instance._record_relationship(self, value, "__setitem__")
+
     def __add__(self, other):
         result = super().__add__(other)
         LLMObs._instance._record_relationship(self, other, "__add__")
@@ -124,13 +174,42 @@ def __add__(self, other):
 
 
 class TrackedDict(dict):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        for key, value in self.items():
+            LLMObs._instance._record_relationship(self, key, "__init__")
+            LLMObs._instance._record_relationship(self, value, "__init__")
+
+    def fromkeys(self, *args, **kwargs):
+        ret = super().fromkeys(*args, **kwargs)
+        for key, value in ret.items():
+            LLMObs._instance._record_relationship(ret, key, "fromkeys")
+            LLMObs._instance._record_relationship(ret, value, "fromkeys")
+        return ret
+
     def update(self, *args, **kwargs):
         result = super().update(*args, **kwargs)
         for arg in args:
             LLMObs._instance._record_relationship(self, arg, "update")
         return result
 
+    def pop(self, key, *args, **kwargs):
+        val = super().pop(key, *args, **kwargs)
+        LLMObs._instance._remove_relationship(self, key, "pop")
+        if val is not None:
+            LLMObs._instance._remove_relationship(self, val, "pop")
+        return val
+
+    def __delitem__(self, key) -> None:
+        val = self.get(key)
+        super().__delitem__(key)
+        LLMObs._instance._remove_relationship(self, key, "__delitem__")
+        if val is not None:
+            LLMObs._instance._remove_relationship(self, val, "__delitem__")
+
     def __setitem__(self, key, value):
+        if key in self:
+            LLMObs._instance._remove_relationship(self, self[key], "__setitem__")
         super().__setitem__(key, value)
         LLMObs._instance._record_relationship(self, value, "__setitem__")
         LLMObs._instance._record_relationship(self, key, "__setitem__")
@@ -171,7 +250,8 @@ def __init__(self, tracer=None):
         self.tracer.on_start_span(self._do_annotations)
 
         self._object_span_links = {}
-        sys.settrace(self.trace_call)
+        self._object_relationships = {}
+        sys.settrace(track_object_interactions)
 
     def _do_annotations(self, span):
         # get the current span context
@@ -336,80 +416,45 @@ def disable(cls) -> None:
 
         log.debug("%s disabled", cls.__name__)
 
-    def _record_relationship(self, source_obj, incoming_obj, operation):
+    def _remove_relationship(self, source_obj, incoming_obj, operation):
         """Record a relationship between objects."""
         if source_obj is None or incoming_obj is None:
             return
-        # print(f"Recording relationship between {source_obj} and {incoming_obj} with operation {operation}")
-        span_links = self.get_span_links(incoming_obj)
-        self.add_span_links(source_obj, span_links)
+        if source_obj not in self._object_relationships:
+            self._object_relationships[self.get_obj_id(source_obj)] = set()
+        self._object_relationships[self.get_obj_id(source_obj)].remove(self.get_obj_id(incoming_obj))
 
-    def trace_call(self, frame, event, arg):
-        """Track method calls and object interactions."""
-        if event != "call":
-            return self.trace_call
-
-        # Get function details
-        source_obj = frame.f_locals.get("self")
-        method_name = frame.f_code.co_name
-
-        # Track object creation or modification
-        try:
-            # Capture arguments that might involve object relationships
-            args = inspect.getargvalues(frame)
-
-            # Look for potential object interactions
-            for arg_name in args.args:
-                if arg_name == "self":
-                    continue
-                incoming_obj = frame.f_locals.get(arg_name)
-
-                # Check for object creation or modification
-                if method_name in [
-                    "__init__",
-                    "__new__",
-                    "__add__",
-                    "append",
-                    "extend",
-                    "update",
-                ]:
-                    self._record_relationship(source_obj, incoming_obj, f"{method_name} operation")
-        except Exception as e:
-            print("Error capturing object interactions ", e)
-
-        return self.trace_call
-
-    def search_for_links(self, obj):
-        links = []
-        if isinstance(obj, dict):
-            for _, value in obj.items():
-                links += self.search_for_links(value)
-        elif isinstance(obj, list):
-            for value in obj:
-                links += self.search_for_links(value)
-        links += self.get_span_links(obj)
-        return links
+    def _record_relationship(self, source_obj, incoming_obj, operation):
+        """Record a relationship between objects."""
+        if source_obj is None or incoming_obj is None:
+            return
+        if self.get_obj_id(source_obj) not in self._object_relationships:
+            self._object_relationships[self.get_obj_id(source_obj)] = set()
+        self._object_relationships[self.get_obj_id(source_obj)].add(self.get_obj_id(incoming_obj))
+
+    def get_all_links(self, obj_id, visited: Optional[set] = None, links: Optional[list] = None):
+        if visited is None:
+            visited = set()
+        visited.add(obj_id)
+        if links is None:
+            links = []
+        if obj_id not in self._object_relationships:
+            return self._object_span_links.get(obj_id, [])
+        for obj in self._object_relationships[obj_id]:
+            if obj not in visited:
+                links += self.get_all_links(obj, visited, links)
+        return self._object_span_links.get(obj_id, [])
 
     def record_object(self, span, obj, to):
-        carried_span_links = []
-        if isinstance(obj, dict):
-            old_obj = obj
-            obj = TrackedDict(old_obj)
-            self._record_relationship(obj, old_obj, "inherit")
-            carried_span_links += self.search_for_links(obj)
-        elif isinstance(obj, list):
-            old_obj = obj
-            obj = TrackedList(old_obj)
-            self._record_relationship(obj, old_obj, "inherit")
-            carried_span_links += self.search_for_links(obj)
-        elif isinstance(obj, str):
-            old_obj = obj
-            obj = TrackedStr(old_obj)
-            self._record_relationship(obj, old_obj, "inherit")
-
-        carried_span_links += self.get_span_links(obj)
+        if isinstance(obj, str) and not isinstance(obj, TrackedStr):
+            obj = TrackedStr(obj)
+        elif isinstance(obj, list) and not isinstance(obj, TrackedList):
+            obj = TrackedList(obj)
+        elif isinstance(obj, dict) and not isinstance(obj, TrackedDict):
+            obj = TrackedDict(obj)
+
         current_span_links = []
-        for span_link in carried_span_links:
+        for span_link in self.get_all_links(self.get_obj_id(obj)):
             if span_link["attributes"]["from"] == "input" and to == "output":
                 continue
             span_link["attributes"]["to"] = to
@@ -431,7 +476,9 @@ def record_object(self, span, obj, to):
         return obj
 
     def _tag_span_links(self, span, span_links):
-        span_links = [span_link for span_link in span_links if span_link["span_id"] != LLMObs.export_span(span)["span_id"]]
+        span_links = [
+            span_link for span_link in span_links if span_link["span_id"] != LLMObs.export_span(span)["span_id"]
+        ]
         current_span_links = span.get_tag("_ml_obs.span_links")
         if current_span_links:
             current_span_links = json.loads(current_span_links)
diff --git a/ddtrace/llmobs/decorators.py b/ddtrace/llmobs/decorators.py
index f3f96e73434..f3534bcd991 100644
--- a/ddtrace/llmobs/decorators.py
+++ b/ddtrace/llmobs/decorators.py
@@ -125,7 +125,7 @@ def wrapper(*args, **kwargs):
                         span_name = func.__name__
                     traced_operation = getattr(LLMObs, operation_kind, "workflow")
                     with traced_operation(name=span_name, session_id=session_id, ml_app=ml_app) as span:
-                        args = [LLMObs._instance.record_object(span, arg, "input") for arg in args]
+                        args = tuple(LLMObs._instance.record_object(span, arg, "input") for arg in args)
                         func_signature = signature(func)
                         bound_args = func_signature.bind_partial(*args, **kwargs)
                         if _automatic_io_annotation and bound_args.arguments:
@@ -138,10 +138,7 @@ def wrapper(*args, **kwargs):
                             and span.get_tag(OUTPUT_VALUE) is None
                         ):
                             LLMObs.annotate(span=span, output_data=resp)
-                        resp = LLMObs._instance.record_object(span, resp, "output")
-                        if isinstance(resp, iter):
-                            resp = [LLMObs._instance.record_object(span, r, "output") for r in resp]
-                        return resp
+                        return LLMObs._instance.record_object(span, resp, "output")
 
             return wrapper
 

From baf952754059cab8b2c19a18d24d7cd86c886b70 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Mon, 30 Dec 2024 10:07:14 -0500
Subject: [PATCH 12/33] refactor

---
 ddtrace/llmobs/_links.py           | 194 ++++++++++++++++++++++++++++
 ddtrace/llmobs/_llmobs.py          | 196 ++---------------------------
 ddtrace/llmobs/_trace_processor.py |   8 +-
 3 files changed, 204 insertions(+), 194 deletions(-)
 create mode 100644 ddtrace/llmobs/_links.py

diff --git a/ddtrace/llmobs/_links.py b/ddtrace/llmobs/_links.py
new file mode 100644
index 00000000000..f18db3e3737
--- /dev/null
+++ b/ddtrace/llmobs/_links.py
@@ -0,0 +1,194 @@
+import inspect
+from typing import Optional
+
+
+_object_span_links = {}
+_object_relationships = {}
+
+
+def track_object_interactions(frame, event, arg):
+    """Track method calls and object interactions."""
+    if event != "call":
+        return track_object_interactions
+
+    # Get function details
+    source_obj = frame.f_locals.get("self")
+    method_name = frame.f_code.co_name
+
+    # Track object creation or modification
+    try:
+        # Capture arguments that might involve object relationships
+        args = inspect.getargvalues(frame)
+
+        # Look for potential object interactions
+        for arg_name in args.args:
+            if arg_name == "self":
+                continue
+            incoming_obj = frame.f_locals.get(arg_name)
+
+            # Check for object creation or modification
+            if method_name in [
+                "__init__",
+                "__new__",
+                "__add__",
+                "append",
+                "extend",
+                "update",
+            ]:
+                _record_relationship(source_obj, incoming_obj, f"{method_name} operation")
+    except Exception as e:
+        print("Error capturing object interactions ", e)
+
+    return track_object_interactions
+
+
+class TrackedStr(str):
+    def __add__(self, other):
+        result = super().__add__(other)
+        result = TrackedStr(result)
+        _record_relationship(result, other, "__add__")
+        _record_relationship(result, self, "__add__")
+        return result
+
+    def __radd__(self, other):
+        result = super().__radd__(other)
+        result = TrackedStr(result)
+        _record_relationship(result, other, "__radd__")
+        _record_relationship(result, self, "__radd__")
+        return result
+
+    def format(self, *args, **kwargs):
+        result = super().format(*args, **kwargs)
+        result = TrackedStr(result)
+        for arg in args:
+            _record_relationship(result, arg, "format")
+        for key, value in kwargs.items():
+            _record_relationship(result, value, "format")
+        _record_relationship(result, self, "format")
+        return result
+
+    def split(self, *args, **kwargs):
+        return super().split(*args, **kwargs)
+
+    def join(self, *args, **kwargs):
+        return super().join(*args, **kwargs)
+
+
+class TrackedList(list):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        for item in self:
+            _record_relationship(self, item, "__init__")
+
+    def append(self, item):
+        result = super().append(item)
+        _record_relationship(self, item, "append")
+        return result
+
+    def extend(self, iterable):
+        result = super().extend(iterable)
+        _record_relationship(self, iterable, "extend")
+        return result
+
+    def __delitem__(self, key) -> None:
+        if key < len(self):
+            _remove_relationship(self, self[key], "__delitem__")
+        super().__delitem__(key)
+
+    def __setitem__(self, key, value):
+        super().__setitem__(key, value)
+        _record_relationship(self, value, "__setitem__")
+
+    def __add__(self, other):
+        result = super().__add__(other)
+        _record_relationship(self, other, "__add__")
+        return result
+
+
+class TrackedDict(dict):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        for key, value in self.items():
+            _record_relationship(self, key, "__init__")
+            _record_relationship(self, value, "__init__")
+
+    def fromkeys(self, *args, **kwargs):
+        ret = super().fromkeys(*args, **kwargs)
+        for key, value in ret.items():
+            _record_relationship(ret, key, "fromkeys")
+            _record_relationship(ret, value, "fromkeys")
+        return ret
+
+    def update(self, *args, **kwargs):
+        result = super().update(*args, **kwargs)
+        for arg in args:
+            _record_relationship(self, arg, "update")
+        return result
+
+    def pop(self, key, *args, **kwargs):
+        val = super().pop(key, *args, **kwargs)
+        _remove_relationship(self, key, "pop")
+        if val is not None:
+            _remove_relationship(self, val, "pop")
+        return val
+
+    def __delitem__(self, key) -> None:
+        val = self.get(key)
+        super().__delitem__(key)
+        _remove_relationship(self, key, "__delitem__")
+        if val is not None:
+            _remove_relationship(self, val, "__delitem__")
+
+    def __setitem__(self, key, value):
+        if key in self:
+            _remove_relationship(self, self[key], "__setitem__")
+        super().__setitem__(key, value)
+        _record_relationship(self, value, "__setitem__")
+        _record_relationship(self, key, "__setitem__")
+
+
+def _remove_relationship(source_obj, incoming_obj, operation):
+    """Record a relationship between objects."""
+    if source_obj is None or incoming_obj is None:
+        return
+    if source_obj not in _object_relationships:
+        _object_relationships[get_obj_id(source_obj)] = set()
+    _object_relationships[get_obj_id(source_obj)].remove(get_obj_id(incoming_obj))
+
+
+def _record_relationship(source_obj, incoming_obj, operation):
+    """Record a relationship between objects."""
+    if source_obj is None or incoming_obj is None:
+        return
+    if get_obj_id(source_obj) not in _object_relationships:
+        _object_relationships[get_obj_id(source_obj)] = set()
+    _object_relationships[get_obj_id(source_obj)].add(get_obj_id(incoming_obj))
+
+
+def get_obj_id(obj):
+    return f"{type(obj).__name__}_{id(obj)}"
+
+
+def add_span_links(obj, span_links):
+    obj_id = get_obj_id(obj)
+    if obj_id not in _object_span_links:
+        _object_span_links[obj_id] = []
+    _object_span_links[obj_id] += span_links
+
+
+def get_span_links(obj):
+    return _object_span_links.get(get_obj_id(obj), [])
+
+
+def search_links_from_relationships(obj_id, visited: Optional[set] = None, links: Optional[list] = None):
+    if visited is None:
+        visited = set()
+    visited.add(obj_id)
+    if links is None:
+        links = []
+    if obj_id not in _object_relationships:
+        return _object_span_links.get(obj_id, [])
+    for obj in _object_relationships[obj_id]:
+        if obj not in visited:
+            links += search_links_from_relationships(obj, visited, links)
+    return _object_span_links.get(obj_id, [])
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 6a71c313fe7..a86badcef85 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -1,4 +1,3 @@
-import inspect
 import json
 import os
 import sys
@@ -46,6 +45,13 @@
 from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING
 from ddtrace.llmobs._constants import TAGS
 from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
+from ddtrace.llmobs._links import TrackedDict
+from ddtrace.llmobs._links import TrackedList
+from ddtrace.llmobs._links import TrackedStr
+from ddtrace.llmobs._links import add_span_links
+from ddtrace.llmobs._links import get_obj_id
+from ddtrace.llmobs._links import search_links_from_relationships
+from ddtrace.llmobs._links import track_object_interactions
 from ddtrace.llmobs._trace_processor import LLMObsTraceProcessor
 from ddtrace.llmobs._utils import AnnotationContext
 from ddtrace.llmobs._utils import _get_llmobs_parent_id
@@ -74,147 +80,6 @@
 }
 
 
-def track_object_interactions(frame, event, arg):
-    """Track method calls and object interactions."""
-    if event != "call":
-        return track_object_interactions
-
-    # Get function details
-    source_obj = frame.f_locals.get("self")
-    method_name = frame.f_code.co_name
-
-    # Track object creation or modification
-    try:
-        # Capture arguments that might involve object relationships
-        args = inspect.getargvalues(frame)
-
-        # Look for potential object interactions
-        for arg_name in args.args:
-            if arg_name == "self":
-                continue
-            incoming_obj = frame.f_locals.get(arg_name)
-
-            # Check for object creation or modification
-            if method_name in [
-                "__init__",
-                "__new__",
-                "__add__",
-                "append",
-                "extend",
-                "update",
-            ]:
-                LLMObs._instance._record_relationship(source_obj, incoming_obj, f"{method_name} operation")
-    except Exception as e:
-        print("Error capturing object interactions ", e)
-
-    return track_object_interactions
-
-
-class TrackedStr(str):
-    def __add__(self, other):
-        result = super().__add__(other)
-        result = TrackedStr(result)
-        LLMObs._instance._record_relationship(result, other, "__add__")
-        LLMObs._instance._record_relationship(result, self, "__add__")
-        return result
-
-    def __radd__(self, other):
-        result = super().__radd__(other)
-        result = TrackedStr(result)
-        LLMObs._instance._record_relationship(result, other, "__radd__")
-        LLMObs._instance._record_relationship(result, self, "__radd__")
-        return result
-
-    def format(self, *args, **kwargs):
-        result = super().format(*args, **kwargs)
-        result = TrackedStr(result)
-        for arg in args:
-            LLMObs._instance._record_relationship(result, arg, "format")
-        for key, value in kwargs.items():
-            LLMObs._instance._record_relationship(result, value, "format")
-        LLMObs._instance._record_relationship(result, self, "format")
-        return result
-
-    def split(self, *args, **kwargs):
-        return super().split(*args, **kwargs)
-
-    def join(self, *args, **kwargs):
-        return super().join(*args, **kwargs)
-
-
-class TrackedList(list):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        for item in self:
-            LLMObs._instance._record_relationship(self, item, "__init__")
-
-    def append(self, item):
-        result = super().append(item)
-        LLMObs._instance._record_relationship(self, item, "append")
-        return result
-
-    def extend(self, iterable):
-        result = super().extend(iterable)
-        LLMObs._instance._record_relationship(self, iterable, "extend")
-        return result
-
-    def __delitem__(self, key) -> None:
-        if key < len(self):
-            LLMObs._instance._remove_relationship(self, self[key], "__delitem__")
-        super().__delitem__(key)
-
-    def __setitem__(self, key, value):
-        super().__setitem__(key, value)
-        LLMObs._instance._record_relationship(self, value, "__setitem__")
-
-    def __add__(self, other):
-        result = super().__add__(other)
-        LLMObs._instance._record_relationship(self, other, "__add__")
-        return result
-
-
-class TrackedDict(dict):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        for key, value in self.items():
-            LLMObs._instance._record_relationship(self, key, "__init__")
-            LLMObs._instance._record_relationship(self, value, "__init__")
-
-    def fromkeys(self, *args, **kwargs):
-        ret = super().fromkeys(*args, **kwargs)
-        for key, value in ret.items():
-            LLMObs._instance._record_relationship(ret, key, "fromkeys")
-            LLMObs._instance._record_relationship(ret, value, "fromkeys")
-        return ret
-
-    def update(self, *args, **kwargs):
-        result = super().update(*args, **kwargs)
-        for arg in args:
-            LLMObs._instance._record_relationship(self, arg, "update")
-        return result
-
-    def pop(self, key, *args, **kwargs):
-        val = super().pop(key, *args, **kwargs)
-        LLMObs._instance._remove_relationship(self, key, "pop")
-        if val is not None:
-            LLMObs._instance._remove_relationship(self, val, "pop")
-        return val
-
-    def __delitem__(self, key) -> None:
-        val = self.get(key)
-        super().__delitem__(key)
-        LLMObs._instance._remove_relationship(self, key, "__delitem__")
-        if val is not None:
-            LLMObs._instance._remove_relationship(self, val, "__delitem__")
-
-    def __setitem__(self, key, value):
-        if key in self:
-            LLMObs._instance._remove_relationship(self, self[key], "__setitem__")
-        super().__setitem__(key, value)
-        LLMObs._instance._record_relationship(self, value, "__setitem__")
-        LLMObs._instance._record_relationship(self, key, "__setitem__")
-
-
 class LLMObs(Service):
     _instance = None  # type: LLMObs
     enabled = False
@@ -249,8 +114,6 @@ def __init__(self, tracer=None):
         self._annotation_context_lock = forksafe.RLock()
         self.tracer.on_start_span(self._do_annotations)
 
-        self._object_span_links = {}
-        self._object_relationships = {}
         sys.settrace(track_object_interactions)
 
     def _do_annotations(self, span):
@@ -416,35 +279,6 @@ def disable(cls) -> None:
 
         log.debug("%s disabled", cls.__name__)
 
-    def _remove_relationship(self, source_obj, incoming_obj, operation):
-        """Record a relationship between objects."""
-        if source_obj is None or incoming_obj is None:
-            return
-        if source_obj not in self._object_relationships:
-            self._object_relationships[self.get_obj_id(source_obj)] = set()
-        self._object_relationships[self.get_obj_id(source_obj)].remove(self.get_obj_id(incoming_obj))
-
-    def _record_relationship(self, source_obj, incoming_obj, operation):
-        """Record a relationship between objects."""
-        if source_obj is None or incoming_obj is None:
-            return
-        if self.get_obj_id(source_obj) not in self._object_relationships:
-            self._object_relationships[self.get_obj_id(source_obj)] = set()
-        self._object_relationships[self.get_obj_id(source_obj)].add(self.get_obj_id(incoming_obj))
-
-    def get_all_links(self, obj_id, visited: Optional[set] = None, links: Optional[list] = None):
-        if visited is None:
-            visited = set()
-        visited.add(obj_id)
-        if links is None:
-            links = []
-        if obj_id not in self._object_relationships:
-            return self._object_span_links.get(obj_id, [])
-        for obj in self._object_relationships[obj_id]:
-            if obj not in visited:
-                links += self.get_all_links(obj, visited, links)
-        return self._object_span_links.get(obj_id, [])
-
     def record_object(self, span, obj, to):
         if isinstance(obj, str) and not isinstance(obj, TrackedStr):
             obj = TrackedStr(obj)
@@ -454,14 +288,14 @@ def record_object(self, span, obj, to):
             obj = TrackedDict(obj)
 
         current_span_links = []
-        for span_link in self.get_all_links(self.get_obj_id(obj)):
+        for span_link in search_links_from_relationships(get_obj_id(obj)):
             if span_link["attributes"]["from"] == "input" and to == "output":
                 continue
             span_link["attributes"]["to"] = to
             current_span_links.append(span_link)
 
         self._tag_span_links(span, current_span_links)
-        self.add_span_links(
+        add_span_links(
             obj,
             [
                 {
@@ -485,18 +319,6 @@ def _tag_span_links(self, span, span_links):
             span_links = current_span_links + span_links
         span.set_tag_str("_ml_obs.span_links", safe_json(span_links))
 
-    def get_obj_id(self, obj):
-        return f"{type(obj).__name__}_{id(obj)}"
-
-    def add_span_links(self, obj, span_links):
-        obj_id = self.get_obj_id(obj)
-        if obj_id not in self._object_span_links:
-            self._object_span_links[obj_id] = []
-        self._object_span_links[obj_id] += span_links
-
-    def get_span_links(self, obj):
-        return self._object_span_links.get(self.get_obj_id(obj), [])
-
     @classmethod
     def annotation_context(
         cls, tags: Optional[Dict[str, Any]] = None, prompt: Optional[dict] = None, name: Optional[str] = None
diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
index dcbb4347a22..bf1669c5316 100644
--- a/ddtrace/llmobs/_trace_processor.py
+++ b/ddtrace/llmobs/_trace_processor.py
@@ -139,13 +139,7 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
             "metrics": metrics,
         }
 
-        links = [
-            # {
-            #     "trace_id": "{:x}".format(span.trace_id),
-            #     "span_id": parent_id,
-            #     "attributes": {"from": "input", "to": "input"},
-            # }
-        ]
+        links = []
 
         if span.get_tag("_ml_obs.span_links") is not None:
             links += json.loads(span._meta.pop("_ml_obs.span_links"))

From 0d147b510fab4d34771a88525621c89f82354c30 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Mon, 30 Dec 2024 10:38:31 -0500
Subject: [PATCH 13/33] cleanup

---
 ddtrace/llmobs/_links.py  | 24 +++++++++++-------------
 ddtrace/llmobs/_llmobs.py |  8 ++++----
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/ddtrace/llmobs/_links.py b/ddtrace/llmobs/_links.py
index f18db3e3737..36c76aec4b5 100644
--- a/ddtrace/llmobs/_links.py
+++ b/ddtrace/llmobs/_links.py
@@ -62,7 +62,7 @@ def format(self, *args, **kwargs):
         result = TrackedStr(result)
         for arg in args:
             _record_relationship(result, arg, "format")
-        for key, value in kwargs.items():
+        for _, value in kwargs.items():
             _record_relationship(result, value, "format")
         _record_relationship(result, self, "format")
         return result
@@ -148,36 +148,34 @@ def __setitem__(self, key, value):
 
 
 def _remove_relationship(source_obj, incoming_obj, operation):
-    """Record a relationship between objects."""
     if source_obj is None or incoming_obj is None:
         return
     if source_obj not in _object_relationships:
-        _object_relationships[get_obj_id(source_obj)] = set()
-    _object_relationships[get_obj_id(source_obj)].remove(get_obj_id(incoming_obj))
+        _object_relationships[get_object_id(source_obj)] = set()
+    _object_relationships[get_object_id(source_obj)].remove(get_object_id(incoming_obj))
 
 
 def _record_relationship(source_obj, incoming_obj, operation):
-    """Record a relationship between objects."""
     if source_obj is None or incoming_obj is None:
         return
-    if get_obj_id(source_obj) not in _object_relationships:
-        _object_relationships[get_obj_id(source_obj)] = set()
-    _object_relationships[get_obj_id(source_obj)].add(get_obj_id(incoming_obj))
+    if get_object_id(source_obj) not in _object_relationships:
+        _object_relationships[get_object_id(source_obj)] = set()
+    _object_relationships[get_object_id(source_obj)].add(get_object_id(incoming_obj))
 
 
-def get_obj_id(obj):
+def get_object_id(obj):
     return f"{type(obj).__name__}_{id(obj)}"
 
 
-def add_span_links(obj, span_links):
-    obj_id = get_obj_id(obj)
+def add_span_links_to_object(obj, span_links):
+    obj_id = get_object_id(obj)
     if obj_id not in _object_span_links:
         _object_span_links[obj_id] = []
     _object_span_links[obj_id] += span_links
 
 
-def get_span_links(obj):
-    return _object_span_links.get(get_obj_id(obj), [])
+def get_span_links_from_object(obj):
+    return _object_span_links.get(get_object_id(obj), [])
 
 
 def search_links_from_relationships(obj_id, visited: Optional[set] = None, links: Optional[list] = None):
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index a86badcef85..f765d33564d 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -48,8 +48,8 @@
 from ddtrace.llmobs._links import TrackedDict
 from ddtrace.llmobs._links import TrackedList
 from ddtrace.llmobs._links import TrackedStr
-from ddtrace.llmobs._links import add_span_links
-from ddtrace.llmobs._links import get_obj_id
+from ddtrace.llmobs._links import add_span_links_to_object
+from ddtrace.llmobs._links import get_object_id
 from ddtrace.llmobs._links import search_links_from_relationships
 from ddtrace.llmobs._links import track_object_interactions
 from ddtrace.llmobs._trace_processor import LLMObsTraceProcessor
@@ -288,14 +288,14 @@ def record_object(self, span, obj, to):
             obj = TrackedDict(obj)
 
         current_span_links = []
-        for span_link in search_links_from_relationships(get_obj_id(obj)):
+        for span_link in search_links_from_relationships(get_object_id(obj)):
             if span_link["attributes"]["from"] == "input" and to == "output":
                 continue
             span_link["attributes"]["to"] = to
             current_span_links.append(span_link)
 
         self._tag_span_links(span, current_span_links)
-        add_span_links(
+        add_span_links_to_object(
             obj,
             [
                 {

From 22d8030f618c7d824ad2bc7d2a86790feaa32aaf Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Mon, 30 Dec 2024 11:30:38 -0500
Subject: [PATCH 14/33] cleanup record rel

---
 ddtrace/llmobs/_links.py | 56 ++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/ddtrace/llmobs/_links.py b/ddtrace/llmobs/_links.py
index 36c76aec4b5..8da2963d0ed 100644
--- a/ddtrace/llmobs/_links.py
+++ b/ddtrace/llmobs/_links.py
@@ -35,7 +35,7 @@ def track_object_interactions(frame, event, arg):
                 "extend",
                 "update",
             ]:
-                _record_relationship(source_obj, incoming_obj, f"{method_name} operation")
+                _record_relationship(source_obj, incoming_obj)
     except Exception as e:
         print("Error capturing object interactions ", e)
 
@@ -46,25 +46,25 @@ class TrackedStr(str):
     def __add__(self, other):
         result = super().__add__(other)
         result = TrackedStr(result)
-        _record_relationship(result, other, "__add__")
-        _record_relationship(result, self, "__add__")
+        _record_relationship(result, other)
+        _record_relationship(result, self)
         return result
 
     def __radd__(self, other):
         result = super().__radd__(other)
         result = TrackedStr(result)
-        _record_relationship(result, other, "__radd__")
-        _record_relationship(result, self, "__radd__")
+        _record_relationship(result, other)
+        _record_relationship(result, self)
         return result
 
     def format(self, *args, **kwargs):
         result = super().format(*args, **kwargs)
         result = TrackedStr(result)
         for arg in args:
-            _record_relationship(result, arg, "format")
+            _record_relationship(result, arg)
         for _, value in kwargs.items():
-            _record_relationship(result, value, "format")
-        _record_relationship(result, self, "format")
+            _record_relationship(result, value)
+        _record_relationship(result, self)
         return result
 
     def split(self, *args, **kwargs):
@@ -78,30 +78,30 @@ class TrackedList(list):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         for item in self:
-            _record_relationship(self, item, "__init__")
+            _record_relationship(self, item)
 
     def append(self, item):
         result = super().append(item)
-        _record_relationship(self, item, "append")
+        _record_relationship(self, item)
         return result
 
     def extend(self, iterable):
         result = super().extend(iterable)
-        _record_relationship(self, iterable, "extend")
+        _record_relationship(self, iterable)
         return result
 
     def __delitem__(self, key) -> None:
         if key < len(self):
-            _remove_relationship(self, self[key], "__delitem__")
+            _remove_relationship(self, self[key])
         super().__delitem__(key)
 
     def __setitem__(self, key, value):
         super().__setitem__(key, value)
-        _record_relationship(self, value, "__setitem__")
+        _record_relationship(self, value)
 
     def __add__(self, other):
         result = super().__add__(other)
-        _record_relationship(self, other, "__add__")
+        _record_relationship(self, other)
         return result
 
 
@@ -109,45 +109,45 @@ class TrackedDict(dict):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         for key, value in self.items():
-            _record_relationship(self, key, "__init__")
-            _record_relationship(self, value, "__init__")
+            _record_relationship(self, key)
+            _record_relationship(self, value)
 
     def fromkeys(self, *args, **kwargs):
         ret = super().fromkeys(*args, **kwargs)
         for key, value in ret.items():
-            _record_relationship(ret, key, "fromkeys")
-            _record_relationship(ret, value, "fromkeys")
+            _record_relationship(ret, key)
+            _record_relationship(ret, value)
         return ret
 
     def update(self, *args, **kwargs):
         result = super().update(*args, **kwargs)
         for arg in args:
-            _record_relationship(self, arg, "update")
+            _record_relationship(self, arg)
         return result
 
     def pop(self, key, *args, **kwargs):
         val = super().pop(key, *args, **kwargs)
-        _remove_relationship(self, key, "pop")
+        _remove_relationship(self, key)
         if val is not None:
-            _remove_relationship(self, val, "pop")
+            _remove_relationship(self, val)
         return val
 
     def __delitem__(self, key) -> None:
         val = self.get(key)
         super().__delitem__(key)
-        _remove_relationship(self, key, "__delitem__")
+        _remove_relationship(self, key)
         if val is not None:
-            _remove_relationship(self, val, "__delitem__")
+            _remove_relationship(self, val)
 
     def __setitem__(self, key, value):
         if key in self:
-            _remove_relationship(self, self[key], "__setitem__")
+            _remove_relationship(self, self[key])
         super().__setitem__(key, value)
-        _record_relationship(self, value, "__setitem__")
-        _record_relationship(self, key, "__setitem__")
+        _record_relationship(self, value)
+        _record_relationship(self, key)
 
 
-def _remove_relationship(source_obj, incoming_obj, operation):
+def _remove_relationship(source_obj, incoming_obj):
     if source_obj is None or incoming_obj is None:
         return
     if source_obj not in _object_relationships:
@@ -155,7 +155,7 @@ def _remove_relationship(source_obj, incoming_obj, operation):
     _object_relationships[get_object_id(source_obj)].remove(get_object_id(incoming_obj))
 
 
-def _record_relationship(source_obj, incoming_obj, operation):
+def _record_relationship(source_obj, incoming_obj):
     if source_obj is None or incoming_obj is None:
         return
     if get_object_id(source_obj) not in _object_relationships:

From 0f92a029157bf6031d1acff1bd5748dbd48af99e Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Mon, 30 Dec 2024 13:02:44 -0500
Subject: [PATCH 15/33] helper track item if primitve fn

---
 ddtrace/llmobs/_links.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/ddtrace/llmobs/_links.py b/ddtrace/llmobs/_links.py
index 8da2963d0ed..aabf5866759 100644
--- a/ddtrace/llmobs/_links.py
+++ b/ddtrace/llmobs/_links.py
@@ -42,6 +42,16 @@ def track_object_interactions(frame, event, arg):
     return track_object_interactions
 
 
+def track_item_if_primitive(item):
+    if isinstance(item, str):
+        item = TrackedStr(item)
+    elif isinstance(item, list):
+        item = TrackedList(item)
+    elif isinstance(item, dict):
+        item = TrackedDict(item)
+    return item
+
+
 class TrackedStr(str):
     def __add__(self, other):
         result = super().__add__(other)
@@ -99,6 +109,11 @@ def __setitem__(self, key, value):
         super().__setitem__(key, value)
         _record_relationship(self, value)
 
+    def __getitem__(self, key):
+        item = super().__setitem__(key)
+        item = track_item_if_primitive(item)
+        return item
+
     def __add__(self, other):
         result = super().__add__(other)
         _record_relationship(self, other)
@@ -125,6 +140,11 @@ def update(self, *args, **kwargs):
             _record_relationship(self, arg)
         return result
 
+    def __getitem__(self, key):
+        item = super().__getitem__(key)
+        item = track_item_if_primitive(item)
+        return item
+
     def pop(self, key, *args, **kwargs):
         val = super().pop(key, *args, **kwargs)
         _remove_relationship(self, key)

From 2fd24359584239b6e502aee93fb37856fbd63f8b Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Mon, 30 Dec 2024 15:39:22 -0500
Subject: [PATCH 16/33] support getitem

---
 ddtrace/llmobs/_links.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ddtrace/llmobs/_links.py b/ddtrace/llmobs/_links.py
index aabf5866759..e6ac634f35f 100644
--- a/ddtrace/llmobs/_links.py
+++ b/ddtrace/llmobs/_links.py
@@ -111,7 +111,7 @@ def __setitem__(self, key, value):
 
     def __getitem__(self, key):
         item = super().__setitem__(key)
-        item = track_item_if_primitive(item)
+        _record_relationship(item, self)
         return item
 
     def __add__(self, other):
@@ -142,7 +142,7 @@ def update(self, *args, **kwargs):
 
     def __getitem__(self, key):
         item = super().__getitem__(key)
-        item = track_item_if_primitive(item)
+        _record_relationship(item, self)
         return item
 
     def pop(self, key, *args, **kwargs):

From 635a138fb85887cf031bd7a22498af1ffc3fc7c2 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Wed, 22 Jan 2025 12:46:50 -0500
Subject: [PATCH 17/33] remove a bunch of object tracking stuff

---
 ddtrace/llmobs/_links.py  | 196 --------------------------------------
 ddtrace/llmobs/_llmobs.py |  46 ++++-----
 2 files changed, 18 insertions(+), 224 deletions(-)

diff --git a/ddtrace/llmobs/_links.py b/ddtrace/llmobs/_links.py
index e6ac634f35f..37149b664ed 100644
--- a/ddtrace/llmobs/_links.py
+++ b/ddtrace/llmobs/_links.py
@@ -1,186 +1,4 @@
-import inspect
-from typing import Optional
-
-
 _object_span_links = {}
-_object_relationships = {}
-
-
-def track_object_interactions(frame, event, arg):
-    """Track method calls and object interactions."""
-    if event != "call":
-        return track_object_interactions
-
-    # Get function details
-    source_obj = frame.f_locals.get("self")
-    method_name = frame.f_code.co_name
-
-    # Track object creation or modification
-    try:
-        # Capture arguments that might involve object relationships
-        args = inspect.getargvalues(frame)
-
-        # Look for potential object interactions
-        for arg_name in args.args:
-            if arg_name == "self":
-                continue
-            incoming_obj = frame.f_locals.get(arg_name)
-
-            # Check for object creation or modification
-            if method_name in [
-                "__init__",
-                "__new__",
-                "__add__",
-                "append",
-                "extend",
-                "update",
-            ]:
-                _record_relationship(source_obj, incoming_obj)
-    except Exception as e:
-        print("Error capturing object interactions ", e)
-
-    return track_object_interactions
-
-
-def track_item_if_primitive(item):
-    if isinstance(item, str):
-        item = TrackedStr(item)
-    elif isinstance(item, list):
-        item = TrackedList(item)
-    elif isinstance(item, dict):
-        item = TrackedDict(item)
-    return item
-
-
-class TrackedStr(str):
-    def __add__(self, other):
-        result = super().__add__(other)
-        result = TrackedStr(result)
-        _record_relationship(result, other)
-        _record_relationship(result, self)
-        return result
-
-    def __radd__(self, other):
-        result = super().__radd__(other)
-        result = TrackedStr(result)
-        _record_relationship(result, other)
-        _record_relationship(result, self)
-        return result
-
-    def format(self, *args, **kwargs):
-        result = super().format(*args, **kwargs)
-        result = TrackedStr(result)
-        for arg in args:
-            _record_relationship(result, arg)
-        for _, value in kwargs.items():
-            _record_relationship(result, value)
-        _record_relationship(result, self)
-        return result
-
-    def split(self, *args, **kwargs):
-        return super().split(*args, **kwargs)
-
-    def join(self, *args, **kwargs):
-        return super().join(*args, **kwargs)
-
-
-class TrackedList(list):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        for item in self:
-            _record_relationship(self, item)
-
-    def append(self, item):
-        result = super().append(item)
-        _record_relationship(self, item)
-        return result
-
-    def extend(self, iterable):
-        result = super().extend(iterable)
-        _record_relationship(self, iterable)
-        return result
-
-    def __delitem__(self, key) -> None:
-        if key < len(self):
-            _remove_relationship(self, self[key])
-        super().__delitem__(key)
-
-    def __setitem__(self, key, value):
-        super().__setitem__(key, value)
-        _record_relationship(self, value)
-
-    def __getitem__(self, key):
-        item = super().__setitem__(key)
-        _record_relationship(item, self)
-        return item
-
-    def __add__(self, other):
-        result = super().__add__(other)
-        _record_relationship(self, other)
-        return result
-
-
-class TrackedDict(dict):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        for key, value in self.items():
-            _record_relationship(self, key)
-            _record_relationship(self, value)
-
-    def fromkeys(self, *args, **kwargs):
-        ret = super().fromkeys(*args, **kwargs)
-        for key, value in ret.items():
-            _record_relationship(ret, key)
-            _record_relationship(ret, value)
-        return ret
-
-    def update(self, *args, **kwargs):
-        result = super().update(*args, **kwargs)
-        for arg in args:
-            _record_relationship(self, arg)
-        return result
-
-    def __getitem__(self, key):
-        item = super().__getitem__(key)
-        _record_relationship(item, self)
-        return item
-
-    def pop(self, key, *args, **kwargs):
-        val = super().pop(key, *args, **kwargs)
-        _remove_relationship(self, key)
-        if val is not None:
-            _remove_relationship(self, val)
-        return val
-
-    def __delitem__(self, key) -> None:
-        val = self.get(key)
-        super().__delitem__(key)
-        _remove_relationship(self, key)
-        if val is not None:
-            _remove_relationship(self, val)
-
-    def __setitem__(self, key, value):
-        if key in self:
-            _remove_relationship(self, self[key])
-        super().__setitem__(key, value)
-        _record_relationship(self, value)
-        _record_relationship(self, key)
-
-
-def _remove_relationship(source_obj, incoming_obj):
-    if source_obj is None or incoming_obj is None:
-        return
-    if source_obj not in _object_relationships:
-        _object_relationships[get_object_id(source_obj)] = set()
-    _object_relationships[get_object_id(source_obj)].remove(get_object_id(incoming_obj))
-
-
-def _record_relationship(source_obj, incoming_obj):
-    if source_obj is None or incoming_obj is None:
-        return
-    if get_object_id(source_obj) not in _object_relationships:
-        _object_relationships[get_object_id(source_obj)] = set()
-    _object_relationships[get_object_id(source_obj)].add(get_object_id(incoming_obj))
 
 
 def get_object_id(obj):
@@ -196,17 +14,3 @@ def add_span_links_to_object(obj, span_links):
 
 def get_span_links_from_object(obj):
     return _object_span_links.get(get_object_id(obj), [])
-
-
-def search_links_from_relationships(obj_id, visited: Optional[set] = None, links: Optional[list] = None):
-    if visited is None:
-        visited = set()
-    visited.add(obj_id)
-    if links is None:
-        links = []
-    if obj_id not in _object_relationships:
-        return _object_span_links.get(obj_id, [])
-    for obj in _object_relationships[obj_id]:
-        if obj not in visited:
-            links += search_links_from_relationships(obj, visited, links)
-    return _object_span_links.get(obj_id, [])
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index f765d33564d..0e5b6c9d100 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -45,13 +45,8 @@
 from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING
 from ddtrace.llmobs._constants import TAGS
 from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
-from ddtrace.llmobs._links import TrackedDict
-from ddtrace.llmobs._links import TrackedList
-from ddtrace.llmobs._links import TrackedStr
 from ddtrace.llmobs._links import add_span_links_to_object
-from ddtrace.llmobs._links import get_object_id
-from ddtrace.llmobs._links import search_links_from_relationships
-from ddtrace.llmobs._links import track_object_interactions
+from ddtrace.llmobs._links import get_span_links_from_object
 from ddtrace.llmobs._trace_processor import LLMObsTraceProcessor
 from ddtrace.llmobs._utils import AnnotationContext
 from ddtrace.llmobs._utils import _get_llmobs_parent_id
@@ -114,8 +109,6 @@ def __init__(self, tracer=None):
         self._annotation_context_lock = forksafe.RLock()
         self.tracer.on_start_span(self._do_annotations)
 
-        sys.settrace(track_object_interactions)
-
     def _do_annotations(self, span):
         # get the current span context
         # only do the annotations if it matches the context
@@ -279,22 +272,20 @@ def disable(cls) -> None:
 
         log.debug("%s disabled", cls.__name__)
 
-    def record_object(self, span, obj, to):
-        if isinstance(obj, str) and not isinstance(obj, TrackedStr):
-            obj = TrackedStr(obj)
-        elif isinstance(obj, list) and not isinstance(obj, TrackedList):
-            obj = TrackedList(obj)
-        elif isinstance(obj, dict) and not isinstance(obj, TrackedDict):
-            obj = TrackedDict(obj)
-
-        current_span_links = []
-        for span_link in search_links_from_relationships(get_object_id(obj)):
-            if span_link["attributes"]["from"] == "input" and to == "output":
-                continue
-            span_link["attributes"]["to"] = to
-            current_span_links.append(span_link)
-
-        self._tag_span_links(span, current_span_links)
+    def _record_object(self, span, obj, input_or_output):
+        span_links = []
+        for span_link in get_span_links_from_object(obj):
+            span_links.append(
+                {
+                    "trace_id": span_link["trace_id"],
+                    "span_id": span_link["span_id"],
+                    "attributes": {
+                        "from": span_link["attributes"]["from"],
+                        "to": input_or_output,
+                    },
+                }
+            )
+        self._tag_span_links(span, span_links)
         add_span_links_to_object(
             obj,
             [
@@ -302,12 +293,11 @@ def record_object(self, span, obj, to):
                     "trace_id": self.export_span(span)["trace_id"],
                     "span_id": self.export_span(span)["span_id"],
                     "attributes": {
-                        "from": to,
+                        "from": input_or_output,
                     },
                 }
             ],
         )
-        return obj
 
     def _tag_span_links(self, span, span_links):
         span_links = [
@@ -670,9 +660,9 @@ def annotate(
             log.warning("Cannot annotate a finished span.")
             return
         if input_data is not None:
-            cls._instance.record_object(span, input_data, "input")
+            cls._instance._record_object(span, input_data, "input")
         if output_data is not None:
-            cls._instance.record_object(span, output_data, "output")
+            cls._instance._record_object(span, output_data, "output")
         if metadata is not None:
             cls._tag_metadata(span, metadata)
         if metrics is not None:

From a4eadff552867cfae5e3648cc4989619b7b9cb42 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Wed, 22 Jan 2025 12:52:03 -0500
Subject: [PATCH 18/33] remove the trace processor

---
 ddtrace/llmobs/_trace_processor.py | 185 -----------------------------
 1 file changed, 185 deletions(-)
 delete mode 100644 ddtrace/llmobs/_trace_processor.py

diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
deleted file mode 100644
index bf1669c5316..00000000000
--- a/ddtrace/llmobs/_trace_processor.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import json
-from typing import Any
-from typing import Dict
-from typing import List
-from typing import Optional
-from typing import Tuple
-
-import ddtrace
-from ddtrace import Span
-from ddtrace import config
-from ddtrace._trace.processor import TraceProcessor
-from ddtrace.constants import ERROR_MSG
-from ddtrace.constants import ERROR_STACK
-from ddtrace.constants import ERROR_TYPE
-from ddtrace.ext import SpanTypes
-from ddtrace.internal.logger import get_logger
-from ddtrace.llmobs._constants import INPUT_DOCUMENTS
-from ddtrace.llmobs._constants import INPUT_MESSAGES
-from ddtrace.llmobs._constants import INPUT_PARAMETERS
-from ddtrace.llmobs._constants import INPUT_PROMPT
-from ddtrace.llmobs._constants import INPUT_VALUE
-from ddtrace.llmobs._constants import METADATA
-from ddtrace.llmobs._constants import METRICS
-from ddtrace.llmobs._constants import ML_APP
-from ddtrace.llmobs._constants import MODEL_NAME
-from ddtrace.llmobs._constants import MODEL_PROVIDER
-from ddtrace.llmobs._constants import OUTPUT_DOCUMENTS
-from ddtrace.llmobs._constants import OUTPUT_MESSAGES
-from ddtrace.llmobs._constants import OUTPUT_VALUE
-from ddtrace.llmobs._constants import PARENT_ID_KEY
-from ddtrace.llmobs._constants import RAGAS_ML_APP_PREFIX
-from ddtrace.llmobs._constants import RUNNER_IS_INTEGRATION_SPAN_TAG
-from ddtrace.llmobs._constants import SESSION_ID
-from ddtrace.llmobs._constants import SPAN_KIND
-from ddtrace.llmobs._constants import TAGS
-from ddtrace.llmobs._utils import _get_llmobs_parent_id
-from ddtrace.llmobs._utils import _get_ml_app
-from ddtrace.llmobs._utils import _get_session_id
-from ddtrace.llmobs._utils import _get_span_name
-
-
-log = get_logger(__name__)
-
-
-class LLMObsTraceProcessor(TraceProcessor):
-    """
-    Processor that extracts LLM-type spans in a trace to submit as separate LLMObs span events to LLM Observability.
-    """
-
-    def __init__(self, llmobs_span_writer, evaluator_runner=None):
-        self._span_writer = llmobs_span_writer
-        self._evaluator_runner = evaluator_runner
-
-    def process_trace(self, trace: List[Span]) -> Optional[List[Span]]:
-        if not trace:
-            return None
-        for span in trace:
-            if span.span_type == SpanTypes.LLM:
-                self.submit_llmobs_span(span)
-        return None if config._llmobs_agentless_enabled else trace
-
-    def submit_llmobs_span(self, span: Span) -> None:
-        """Generate and submit an LLMObs span event to be sent to LLMObs."""
-        span_event = None
-        is_llm_span = span.get_tag(SPAN_KIND) == "llm"
-        is_ragas_integration_span = False
-        try:
-            span_event, is_ragas_integration_span = self._llmobs_span_event(span)
-            self._span_writer.enqueue(span_event)
-        except (KeyError, TypeError):
-            log.error("Error generating LLMObs span event for span %s, likely due to malformed span", span)
-        finally:
-            if not span_event or not is_llm_span or is_ragas_integration_span:
-                return
-            if self._evaluator_runner:
-                self._evaluator_runner.enqueue(span_event, span)
-
-    def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
-        """Span event object structure."""
-        span_kind = span._meta.pop(SPAN_KIND)
-        meta: Dict[str, Any] = {"span.kind": span_kind, "input": {}, "output": {}}
-        if span_kind in ("llm", "embedding") and span.get_tag(MODEL_NAME) is not None:
-            meta["model_name"] = span._meta.pop(MODEL_NAME)
-            meta["model_provider"] = span._meta.pop(MODEL_PROVIDER, "custom").lower()
-        if span.get_tag(METADATA) is not None:
-            meta["metadata"] = json.loads(span._meta.pop(METADATA))
-        if span.get_tag(INPUT_PARAMETERS):
-            meta["input"]["parameters"] = json.loads(span._meta.pop(INPUT_PARAMETERS))
-        if span_kind == "llm" and span.get_tag(INPUT_MESSAGES) is not None:
-            meta["input"]["messages"] = json.loads(span._meta.pop(INPUT_MESSAGES))
-        if span.get_tag(INPUT_VALUE) is not None:
-            meta["input"]["value"] = span._meta.pop(INPUT_VALUE)
-        if span_kind == "llm" and span.get_tag(OUTPUT_MESSAGES) is not None:
-            meta["output"]["messages"] = json.loads(span._meta.pop(OUTPUT_MESSAGES))
-        if span_kind == "embedding" and span.get_tag(INPUT_DOCUMENTS) is not None:
-            meta["input"]["documents"] = json.loads(span._meta.pop(INPUT_DOCUMENTS))
-        if span.get_tag(OUTPUT_VALUE) is not None:
-            meta["output"]["value"] = span._meta.pop(OUTPUT_VALUE)
-        if span_kind == "retrieval" and span.get_tag(OUTPUT_DOCUMENTS) is not None:
-            meta["output"]["documents"] = json.loads(span._meta.pop(OUTPUT_DOCUMENTS))
-        if span.get_tag(INPUT_PROMPT) is not None:
-            prompt_json_str = span._meta.pop(INPUT_PROMPT)
-            if span_kind != "llm":
-                log.warning(
-                    "Dropping prompt on non-LLM span kind, annotating prompts is only supported for LLM span kinds."
-                )
-            else:
-                meta["input"]["prompt"] = json.loads(prompt_json_str)
-        if span.error:
-            meta[ERROR_MSG] = span.get_tag(ERROR_MSG)
-            meta[ERROR_STACK] = span.get_tag(ERROR_STACK)
-            meta[ERROR_TYPE] = span.get_tag(ERROR_TYPE)
-        if not meta["input"]:
-            meta.pop("input")
-        if not meta["output"]:
-            meta.pop("output")
-        metrics = json.loads(span._meta.pop(METRICS, "{}"))
-        ml_app = _get_ml_app(span)
-
-        is_ragas_integration_span = False
-
-        if ml_app.startswith(RAGAS_ML_APP_PREFIX):
-            is_ragas_integration_span = True
-
-        span.set_tag_str(ML_APP, ml_app)
-
-        parent_id = str(_get_llmobs_parent_id(span) or "undefined")
-        span._meta.pop(PARENT_ID_KEY, None)
-
-        llmobs_span_event = {
-            "trace_id": "{:x}".format(span.trace_id),
-            "span_id": str(span.span_id),
-            "parent_id": parent_id,
-            "name": _get_span_name(span),
-            "start_ns": span.start_ns,
-            "duration": span.duration_ns,
-            "status": "error" if span.error else "ok",
-            "meta": meta,
-            "metrics": metrics,
-        }
-
-        links = []
-
-        if span.get_tag("_ml_obs.span_links") is not None:
-            links += json.loads(span._meta.pop("_ml_obs.span_links"))
-
-        llmobs_span_event["span_links"] = links
-        print("links for span: {}".format(_get_span_name(span)))
-        print(llmobs_span_event["span_links"])
-
-        session_id = _get_session_id(span)
-        if session_id is not None:
-            span.set_tag_str(SESSION_ID, session_id)
-            llmobs_span_event["session_id"] = session_id
-
-        llmobs_span_event["tags"] = self._llmobs_tags(
-            span, ml_app, session_id, is_ragas_integration_span=is_ragas_integration_span
-        )
-        return llmobs_span_event, is_ragas_integration_span
-
-    @staticmethod
-    def _llmobs_tags(
-        span: Span, ml_app: str, session_id: Optional[str] = None, is_ragas_integration_span: bool = False
-    ) -> List[str]:
-        tags = {
-            "version": config.version or "",
-            "env": config.env or "",
-            "service": span.service or "",
-            "source": "integration",
-            "ml_app": ml_app,
-            "ddtrace.version": ddtrace.__version__,
-            "language": "python",
-            "error": span.error,
-        }
-        err_type = span.get_tag(ERROR_TYPE)
-        if err_type:
-            tags["error_type"] = err_type
-        if session_id:
-            tags["session_id"] = session_id
-        if is_ragas_integration_span:
-            tags[RUNNER_IS_INTEGRATION_SPAN_TAG] = "ragas"
-        existing_tags = span._meta.pop(TAGS, None)
-        if existing_tags is not None:
-            tags.update(json.loads(existing_tags))
-        return ["{}:{}".format(k, v) for k, v in tags.items()]

From a4aa30c99e68d1f3c1cc9e306f363280a394241b Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Wed, 22 Jan 2025 17:54:23 -0500
Subject: [PATCH 19/33] add a simple test case

---
 ddtrace/llmobs/_links.py               | 25 ++++++++++++-------------
 ddtrace/llmobs/_llmobs.py              | 21 ++++++++-------------
 ddtrace/llmobs/_utils.py               | 17 +++++++++++++++++
 ddtrace/llmobs/decorators.py           |  8 ++++++--
 tests/llmobs/test_llmobs_decorators.py | 25 +++++++++++++++++++++++++
 5 files changed, 68 insertions(+), 28 deletions(-)

diff --git a/ddtrace/llmobs/_links.py b/ddtrace/llmobs/_links.py
index 37149b664ed..c54585b8cec 100644
--- a/ddtrace/llmobs/_links.py
+++ b/ddtrace/llmobs/_links.py
@@ -1,16 +1,15 @@
-_object_span_links = {}
+class AutoLinking:
+    def __init__(self, object_span_links=None):
+        self._object_span_links = object_span_links or {}
 
+    def get_object_id(self, obj):
+        return f"{type(obj).__name__}_{id(obj)}"
 
-def get_object_id(obj):
-    return f"{type(obj).__name__}_{id(obj)}"
+    def add_span_links_to_object(self, obj, span_links):
+        obj_id = self.get_object_id(obj)
+        if obj_id not in self._object_span_links:
+            self._object_span_links[obj_id] = []
+        self._object_span_links[obj_id] += span_links
 
-
-def add_span_links_to_object(obj, span_links):
-    obj_id = get_object_id(obj)
-    if obj_id not in _object_span_links:
-        _object_span_links[obj_id] = []
-    _object_span_links[obj_id] += span_links
-
-
-def get_span_links_from_object(obj):
-    return _object_span_links.get(get_object_id(obj), [])
+    def get_span_links_from_object(self, obj):
+        return self._object_span_links.get(self.get_object_id(obj), [])
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 93f025cd8e5..e4b845763ee 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -1,6 +1,5 @@
 import json
 import os
-import sys
 import time
 from typing import Any
 from typing import Dict
@@ -54,9 +53,8 @@
 from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING
 from ddtrace.llmobs._constants import TAGS
 from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
-from ddtrace.llmobs._links import add_span_links_to_object
-from ddtrace.llmobs._links import get_span_links_from_object
 from ddtrace.llmobs._utils import AnnotationContext
+from ddtrace.llmobs._utils import LinkTracker
 from ddtrace.llmobs._utils import _get_llmobs_parent_id
 from ddtrace.llmobs._utils import _get_ml_app
 from ddtrace.llmobs._utils import _get_session_id
@@ -112,6 +110,7 @@ def __init__(self, tracer=None):
 
         forksafe.register(self._child_after_fork)
 
+        self._link_tracker = LinkTracker()
         self._annotations = []
         self._annotation_context_lock = forksafe.RLock()
 
@@ -388,7 +387,6 @@ def disable(cls) -> None:
         if not cls.enabled:
             log.debug("%s not enabled", cls.__name__)
             return
-        sys.settrace(None)
         log.debug("Disabling %s", cls.__name__)
         atexit.unregister(cls.disable)
 
@@ -400,7 +398,9 @@ def disable(cls) -> None:
 
     def _record_object(self, span, obj, input_or_output):
         span_links = []
-        for span_link in get_span_links_from_object(obj):
+        for span_link in self._link_tracker.get_span_links_from_object(obj):
+            if span_link["attributes"]["from"] == "input" and input_or_output == "output":
+                continue
             span_links.append(
                 {
                     "trace_id": span_link["trace_id"],
@@ -412,7 +412,7 @@ def _record_object(self, span, obj, input_or_output):
                 }
             )
         self._tag_span_links(span, span_links)
-        add_span_links_to_object(
+        self._link_tracker.add_span_links_to_object(
             obj,
             [
                 {
@@ -429,11 +429,10 @@ def _tag_span_links(self, span, span_links):
         span_links = [
             span_link for span_link in span_links if span_link["span_id"] != LLMObs.export_span(span)["span_id"]
         ]
-        current_span_links = span.get_tag("_ml_obs.span_links")
+        current_span_links = span._get_ctx_item(SPAN_LINKS)
         if current_span_links:
-            current_span_links = json.loads(current_span_links)
             span_links = current_span_links + span_links
-        span.set_tag_str("_ml_obs.span_links", safe_json(span_links))
+        span._set_ctx_item(SPAN_LINKS, span_links)
 
     @classmethod
     def annotation_context(
@@ -797,10 +796,6 @@ def annotate(
         if span.finished:
             log.warning("Cannot annotate a finished span.")
             return
-        if input_data is not None:
-            cls._instance._record_object(span, input_data, "input")
-        if output_data is not None:
-            cls._instance._record_object(span, output_data, "output")
         if metadata is not None:
             cls._tag_metadata(span, metadata)
         if metrics is not None:
diff --git a/ddtrace/llmobs/_utils.py b/ddtrace/llmobs/_utils.py
index c2f44689a05..2111f479fe4 100644
--- a/ddtrace/llmobs/_utils.py
+++ b/ddtrace/llmobs/_utils.py
@@ -73,6 +73,23 @@ def validate_prompt(prompt: dict) -> Dict[str, Union[str, dict, List[str]]]:
     return validated_prompt
 
 
+class LinkTracker:
+    def __init__(self, object_span_links=None):
+        self._object_span_links = object_span_links or {}
+
+    def get_object_id(self, obj):
+        return f"{type(obj).__name__}_{id(obj)}"
+
+    def add_span_links_to_object(self, obj, span_links):
+        obj_id = self.get_object_id(obj)
+        if obj_id not in self._object_span_links:
+            self._object_span_links[obj_id] = []
+        self._object_span_links[obj_id] += span_links
+
+    def get_span_links_from_object(self, obj):
+        return self._object_span_links.get(self.get_object_id(obj), [])
+
+
 class AnnotationContext:
     def __init__(self, _register_annotator, _deregister_annotator):
         self._register_annotator = _register_annotator
diff --git a/ddtrace/llmobs/decorators.py b/ddtrace/llmobs/decorators.py
index 9fdd7ea4e85..e7cd09d51e4 100644
--- a/ddtrace/llmobs/decorators.py
+++ b/ddtrace/llmobs/decorators.py
@@ -231,7 +231,10 @@ def wrapper(*args, **kwargs):
                     _, span_name = _get_llmobs_span_options(name, None, func)
                     traced_operation = getattr(LLMObs, operation_kind, LLMObs.workflow)
                     with traced_operation(name=span_name, session_id=session_id, ml_app=ml_app) as span:
-                        args = tuple(LLMObs._instance.record_object(span, arg, "input") for arg in args)
+                        for arg in args:
+                            LLMObs._instance._record_object(span, arg, "input")
+                        for arg in kwargs.values():
+                            LLMObs._instance._record_object(span, arg, "input")
                         func_signature = signature(func)
                         bound_args = func_signature.bind_partial(*args, **kwargs)
                         if _automatic_io_annotation and bound_args.arguments:
@@ -244,7 +247,8 @@ def wrapper(*args, **kwargs):
                             and span._get_ctx_item(OUTPUT_VALUE) is None
                         ):
                             LLMObs.annotate(span=span, output_data=resp)
-                        return LLMObs._instance.record_object(span, resp, "output")
+                        LLMObs._instance._record_object(span, resp, "output")
+                        return resp
 
             return generator_wrapper if (isgeneratorfunction(func) or isasyncgenfunction(func)) else wrapper
 
diff --git a/tests/llmobs/test_llmobs_decorators.py b/tests/llmobs/test_llmobs_decorators.py
index 056de72ee96..935edc4f944 100644
--- a/tests/llmobs/test_llmobs_decorators.py
+++ b/tests/llmobs/test_llmobs_decorators.py
@@ -828,3 +828,28 @@ def get_next_element(alist):
         error_message=span.get_tag("error.message"),
         error_stack=span.get_tag("error.stack"),
     )
+
+
+def construct_link(span_event, link_from, link_to):
+    return {
+        "trace_id": span_event["trace_id"],
+        "span_id": span_event["span_id"],
+        "attributes": {"from": link_from, "to": link_to},
+    }
+
+
+def test_decorator_records_span_links(llmobs, llmobs_events):
+    @workflow
+    def f(inp):
+        return 1
+
+    @task
+    def g(inp):
+        return inp
+
+    f(g("test_input"))
+
+    assert not llmobs_events[0]["span_links"]
+    assert len(llmobs_events[1]["span_links"]) == 2
+    assert llmobs_events[1]["span_links"][0] == construct_link(llmobs_events[0], "input", "input")
+    assert llmobs_events[1]["span_links"][1] == construct_link(llmobs_events[0], "output", "input")

From f2ed301f4966c1992c1211267e5aa8bad6b66c81 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Thu, 23 Jan 2025 10:58:43 -0500
Subject: [PATCH 20/33] tests

---
 ddtrace/llmobs/_links.py               | 15 ------
 ddtrace/llmobs/_llmobs.py              |  2 +
 tests/llmobs/test_llmobs_decorators.py | 68 +++++++++++++++++++++++---
 3 files changed, 63 insertions(+), 22 deletions(-)
 delete mode 100644 ddtrace/llmobs/_links.py

diff --git a/ddtrace/llmobs/_links.py b/ddtrace/llmobs/_links.py
deleted file mode 100644
index c54585b8cec..00000000000
--- a/ddtrace/llmobs/_links.py
+++ /dev/null
@@ -1,15 +0,0 @@
-class AutoLinking:
-    def __init__(self, object_span_links=None):
-        self._object_span_links = object_span_links or {}
-
-    def get_object_id(self, obj):
-        return f"{type(obj).__name__}_{id(obj)}"
-
-    def add_span_links_to_object(self, obj, span_links):
-        obj_id = self.get_object_id(obj)
-        if obj_id not in self._object_span_links:
-            self._object_span_links[obj_id] = []
-        self._object_span_links[obj_id] += span_links
-
-    def get_span_links_from_object(self, obj):
-        return self._object_span_links.get(self.get_object_id(obj), [])
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index e4b845763ee..5d42468135b 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -397,6 +397,8 @@ def disable(cls) -> None:
         log.debug("%s disabled", cls.__name__)
 
     def _record_object(self, span, obj, input_or_output):
+        if obj is None:
+            return
         span_links = []
         for span_link in self._link_tracker.get_span_links_from_object(obj):
             if span_link["attributes"]["from"] == "input" and input_or_output == "output":
diff --git a/tests/llmobs/test_llmobs_decorators.py b/tests/llmobs/test_llmobs_decorators.py
index 935edc4f944..edb13007464 100644
--- a/tests/llmobs/test_llmobs_decorators.py
+++ b/tests/llmobs/test_llmobs_decorators.py
@@ -840,16 +840,70 @@ def construct_link(span_event, link_from, link_to):
 
 def test_decorator_records_span_links(llmobs, llmobs_events):
     @workflow
-    def f(inp):
+    def one(inp):
         return 1
 
     @task
-    def g(inp):
+    def two(inp):
         return inp
 
-    f(g("test_input"))
+    two(one("test_input"))
+    one_span = llmobs_events[0]
+    two_span = llmobs_events[1]
 
-    assert not llmobs_events[0]["span_links"]
-    assert len(llmobs_events[1]["span_links"]) == 2
-    assert llmobs_events[1]["span_links"][0] == construct_link(llmobs_events[0], "input", "input")
-    assert llmobs_events[1]["span_links"][1] == construct_link(llmobs_events[0], "output", "input")
+    assert not one_span["span_links"]
+    assert len(two_span["span_links"]) == 2
+    assert two_span["span_links"][0] == construct_link(one_span, "output", "input")
+    assert two_span["span_links"][1] == construct_link(one_span, "output", "output")
+
+
+def test_decorator_records_span_links_for_multi_input_functions(llmobs, llmobs_events):
+    @agent
+    def some_agent(a, b):
+        pass
+
+    @workflow
+    def one():
+        return 1
+
+    @task
+    def two():
+        return 2
+
+    some_agent(one(), two())
+
+    one_span = llmobs_events[0]
+    two_span = llmobs_events[1]
+    three_span = llmobs_events[2]
+
+    assert not one_span["span_links"]
+    assert not two_span["span_links"]
+    assert len(three_span["span_links"]) == 2
+    assert three_span["span_links"][0] == construct_link(one_span, "output", "input")
+    assert three_span["span_links"][1] == construct_link(two_span, "output", "input")
+
+
+def test_decorator_records_span_links_via_kwargs(llmobs, llmobs_events):
+    @agent
+    def some_agent(a=None, b=None):
+        pass
+
+    @workflow
+    def one():
+        return 1
+
+    @task
+    def two():
+        return 2
+
+    some_agent(one(), two())
+
+    one_span = llmobs_events[0]
+    two_span = llmobs_events[1]
+    three_span = llmobs_events[2]
+
+    assert not one_span["span_links"]
+    assert not two_span["span_links"]
+    assert len(three_span["span_links"]) == 2
+    assert three_span["span_links"][0] == construct_link(one_span, "output", "input")
+    assert three_span["span_links"][1] == construct_link(two_span, "output", "input")

From 8d1e5a6330b3ff581720516d6671c43731ad3feb Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Thu, 23 Jan 2025 11:13:58 -0500
Subject: [PATCH 21/33] change util fn name

---
 tests/llmobs/_utils.py                 |  8 ++++++++
 tests/llmobs/test_llmobs_decorators.py | 21 +++++++--------------
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/tests/llmobs/_utils.py b/tests/llmobs/_utils.py
index 8343aee530e..5cf25d86bad 100644
--- a/tests/llmobs/_utils.py
+++ b/tests/llmobs/_utils.py
@@ -771,3 +771,11 @@ def _expected_ragas_answer_relevancy_spans(ragas_inputs=None):
             "tags": expected_ragas_trace_tags(),
         },
     ]
+
+
+def _expected_span_link(span_event, link_from, link_to):
+    return {
+        "trace_id": span_event["trace_id"],
+        "span_id": span_event["span_id"],
+        "attributes": {"from": link_from, "to": link_to},
+    }
diff --git a/tests/llmobs/test_llmobs_decorators.py b/tests/llmobs/test_llmobs_decorators.py
index edb13007464..e96f587d798 100644
--- a/tests/llmobs/test_llmobs_decorators.py
+++ b/tests/llmobs/test_llmobs_decorators.py
@@ -11,6 +11,7 @@
 from ddtrace.llmobs.decorators import workflow
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
 from tests.llmobs._utils import _expected_llmobs_non_llm_span_event
+from tests.llmobs._utils import _expected_span_link
 
 
 @pytest.fixture
@@ -830,14 +831,6 @@ def get_next_element(alist):
     )
 
 
-def construct_link(span_event, link_from, link_to):
-    return {
-        "trace_id": span_event["trace_id"],
-        "span_id": span_event["span_id"],
-        "attributes": {"from": link_from, "to": link_to},
-    }
-
-
 def test_decorator_records_span_links(llmobs, llmobs_events):
     @workflow
     def one(inp):
@@ -853,8 +846,8 @@ def two(inp):
 
     assert not one_span["span_links"]
     assert len(two_span["span_links"]) == 2
-    assert two_span["span_links"][0] == construct_link(one_span, "output", "input")
-    assert two_span["span_links"][1] == construct_link(one_span, "output", "output")
+    assert two_span["span_links"][0] == _expected_span_link(one_span, "output", "input")
+    assert two_span["span_links"][1] == _expected_span_link(one_span, "output", "output")
 
 
 def test_decorator_records_span_links_for_multi_input_functions(llmobs, llmobs_events):
@@ -879,8 +872,8 @@ def two():
     assert not one_span["span_links"]
     assert not two_span["span_links"]
     assert len(three_span["span_links"]) == 2
-    assert three_span["span_links"][0] == construct_link(one_span, "output", "input")
-    assert three_span["span_links"][1] == construct_link(two_span, "output", "input")
+    assert three_span["span_links"][0] == _expected_span_link(one_span, "output", "input")
+    assert three_span["span_links"][1] == _expected_span_link(two_span, "output", "input")
 
 
 def test_decorator_records_span_links_via_kwargs(llmobs, llmobs_events):
@@ -905,5 +898,5 @@ def two():
     assert not one_span["span_links"]
     assert not two_span["span_links"]
     assert len(three_span["span_links"]) == 2
-    assert three_span["span_links"][0] == construct_link(one_span, "output", "input")
-    assert three_span["span_links"][1] == construct_link(two_span, "output", "input")
+    assert three_span["span_links"][0] == _expected_span_link(one_span, "output", "input")
+    assert three_span["span_links"][1] == _expected_span_link(two_span, "output", "input")

From 641f3f981962a6bd40d833930fe2ac410a95fd12 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Thu, 23 Jan 2025 11:43:10 -0500
Subject: [PATCH 22/33] dont set empty list of span links

---
 ddtrace/llmobs/_llmobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 5d42468135b..0768c3f6f72 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -208,7 +208,7 @@ def _llmobs_span_event(cls, span: Span) -> Dict[str, Any]:
         llmobs_span_event["tags"] = cls._llmobs_tags(span, ml_app, session_id)
 
         span_links = span._get_ctx_item(SPAN_LINKS)
-        if isinstance(span_links, list):
+        if isinstance(span_links, list) and span_links:
             llmobs_span_event["span_links"] = span_links
 
         return llmobs_span_event

From 8cfd66a7a37595df28c825e8771452b46796d8fd Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Thu, 23 Jan 2025 12:57:25 -0500
Subject: [PATCH 23/33] link onyl spans of same trace

---
 ddtrace/llmobs/_llmobs.py              |  7 ++++++-
 ddtrace/llmobs/decorators.py           | 10 ++++++++--
 tests/llmobs/_utils.py                 |  8 +++++++-
 tests/llmobs/test_llmobs_decorators.py | 20 ++++++++++++--------
 4 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 0768c3f6f72..4ca97f1144a 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -428,8 +428,13 @@ def _record_object(self, span, obj, input_or_output):
         )
 
     def _tag_span_links(self, span, span_links):
+        if not span_links:
+            return
         span_links = [
-            span_link for span_link in span_links if span_link["span_id"] != LLMObs.export_span(span)["span_id"]
+            span_link
+            for span_link in span_links
+            if span_link["span_id"] != LLMObs.export_span(span)["span_id"]
+            and span_link["trace_id"] == LLMObs.export_span(span)["trace_id"]
         ]
         current_span_links = span._get_ctx_item(SPAN_LINKS)
         if current_span_links:
diff --git a/ddtrace/llmobs/decorators.py b/ddtrace/llmobs/decorators.py
index e7cd09d51e4..6f792b75054 100644
--- a/ddtrace/llmobs/decorators.py
+++ b/ddtrace/llmobs/decorators.py
@@ -138,8 +138,14 @@ def wrapper(*args, **kwargs):
                         name=span_name,
                         session_id=session_id,
                         ml_app=ml_app,
-                    ):
-                        return func(*args, **kwargs)
+                    ) as span:
+                        for arg in args:
+                            LLMObs._instance._record_object(span, arg, "input")
+                        for arg in kwargs.values():
+                            LLMObs._instance._record_object(span, arg, "input")
+                        ret = func(*args, **kwargs)
+                        LLMObs._instance._record_object(span, ret, "output")
+                        return ret
 
             return generator_wrapper if (isgeneratorfunction(func) or isasyncgenfunction(func)) else wrapper
 
diff --git a/tests/llmobs/_utils.py b/tests/llmobs/_utils.py
index 5cf25d86bad..2a594254942 100644
--- a/tests/llmobs/_utils.py
+++ b/tests/llmobs/_utils.py
@@ -176,7 +176,13 @@ def _expected_llmobs_non_llm_span_event(
 
 
 def _llmobs_base_span_event(
-    span, span_kind, tags=None, session_id=None, error=None, error_message=None, error_stack=None
+    span,
+    span_kind,
+    tags=None,
+    session_id=None,
+    error=None,
+    error_message=None,
+    error_stack=None,
 ):
     span_event = {
         "trace_id": "{:x}".format(span.trace_id),
diff --git a/tests/llmobs/test_llmobs_decorators.py b/tests/llmobs/test_llmobs_decorators.py
index e96f587d798..8aa46d43717 100644
--- a/tests/llmobs/test_llmobs_decorators.py
+++ b/tests/llmobs/test_llmobs_decorators.py
@@ -840,11 +840,13 @@ def one(inp):
     def two(inp):
         return inp
 
-    two(one("test_input"))
+    with llmobs.agent("dummy_trace"):
+        two(one("test_input"))
+
     one_span = llmobs_events[0]
     two_span = llmobs_events[1]
 
-    assert not one_span["span_links"]
+    assert "span_links" not in one_span
     assert len(two_span["span_links"]) == 2
     assert two_span["span_links"][0] == _expected_span_link(one_span, "output", "input")
     assert two_span["span_links"][1] == _expected_span_link(one_span, "output", "output")
@@ -863,14 +865,15 @@ def one():
     def two():
         return 2
 
-    some_agent(one(), two())
+    with llmobs.agent("dummy_trace"):
+        some_agent(one(), two())
 
     one_span = llmobs_events[0]
     two_span = llmobs_events[1]
     three_span = llmobs_events[2]
 
-    assert not one_span["span_links"]
-    assert not two_span["span_links"]
+    assert "span_links" not in one_span
+    assert "span_links" not in two_span
     assert len(three_span["span_links"]) == 2
     assert three_span["span_links"][0] == _expected_span_link(one_span, "output", "input")
     assert three_span["span_links"][1] == _expected_span_link(two_span, "output", "input")
@@ -889,14 +892,15 @@ def one():
     def two():
         return 2
 
-    some_agent(one(), two())
+    with llmobs.agent("dummy_trace"):
+        some_agent(one(), two())
 
     one_span = llmobs_events[0]
     two_span = llmobs_events[1]
     three_span = llmobs_events[2]
 
-    assert not one_span["span_links"]
-    assert not two_span["span_links"]
+    assert "span_links" not in one_span
+    assert "span_links" not in two_span
     assert len(three_span["span_links"]) == 2
     assert three_span["span_links"][0] == _expected_span_link(one_span, "output", "input")
     assert three_span["span_links"][1] == _expected_span_link(two_span, "output", "input")

From 7b76ed108d079bfd7d5745c2a325046c8edb2146 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Thu, 23 Jan 2025 13:06:09 -0500
Subject: [PATCH 24/33] be more defensive with key errors

---
 ddtrace/llmobs/_llmobs.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 4ca97f1144a..03924573bba 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -401,7 +401,11 @@ def _record_object(self, span, obj, input_or_output):
             return
         span_links = []
         for span_link in self._link_tracker.get_span_links_from_object(obj):
-            if span_link["attributes"]["from"] == "input" and input_or_output == "output":
+            try:
+                if span_link["attributes"]["from"] == "input" and input_or_output == "output":
+                    continue
+            except KeyError:
+                log.debug("failed to read span link: ", span_link)
                 continue
             span_links.append(
                 {

From aaedab9bcd69363043aaf47602288f96faf5fcfe Mon Sep 17 00:00:00 2001
From: Yun Kim <35776586+Yun-Kim@users.noreply.github.com>
Date: Mon, 27 Jan 2025 16:23:45 -0500
Subject: [PATCH 25/33] chore: update changelog for version 2.19.2 (#12088)

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Co-authored-by: Nicole Cybul <nicole.cybul@datadoghq.com>
---
 CHANGELOG.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a29299f8987..6c72665472f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,21 @@ Changelogs for versions not listed here can be found at https://github.com/DataD
 
 ---
 
+## 2.19.2
+### Bug Fixes
+
+- Tracing
+  - celery: Fixes an issue where `celery.apply` spans from Celery prerun got closed too soon leading to span tags being missing.
+  - openai: Fixes a patching issue where asynchronous moderation endpoint calls resulted in coroutine scheduling errors.
+  - openai: Ensures the OpenAI integration is compatible with Python versions 3.12 and 3.13.
+  - vertexai: Resolves an issue with `chat.send_message()` where the content keyword argument was not parsed correctly.
+- LLM Observability
+  - This fix resolves an issue where annotating a span with non latin-1 (but valid utf-8) input/output values resulted in encoding errors.
+- Lib-Injection
+  - Fixes incorrect telemetry data payload format.
+
+---
+
 ## 2.19.1
 ### Bug Fixes
 

From e3045a19a20e0348f040aa589ef3c2517db74618 Mon Sep 17 00:00:00 2001
From: Nick Ripley <nick.ripley@datadoghq.com>
Date: Mon, 27 Jan 2025 16:30:29 -0500
Subject: [PATCH 26/33] fix(profiling): fix SystemError when collecting memory
 profiler events (#12075)

We added locking to the memory profiler to address crashes. These locks
are mostly "try" locks, meaning we bail out if we can't acquire them
right away. This was done defensively to mitigate the possibility of
deadlock until we fully understood why the locks are needed and could
guarantee their correctness. But as a result of using try locks, the
`iter_events` function in particular can fail if the memory profiler lock
is contended when it tries to collect profiling events. The function
then returns NULL, leading to SystemError exceptions because we don't
set an error.

Even if we set an error, returning NULL isn't the right thing to do.
It'll basically mean we wait until the next profile iteration, still
accumulating events in the same buffer, and try again to upload the
events. So we're going to get multiple iteration's worth of events. The
right thing to do is take the lock unconditionally in `iter_events`. We
can allocate the new tracker outside the memory allocation profiler lock
so that we don't need to worry about reentrancy/deadlock issues if
we start profiling that allocation. Then, the only thing we do under the
lock is swap out the global tracker, so it's safe to take the lock
unconditionally.

Fixes #11831

TODO - regression test?
---
 ddtrace/profiling/collector/_memalloc.c       | 22 +++++++++++++------
 ...loc-iter-events-null-780fd50bbebbf616.yaml |  4 ++++
 2 files changed, 19 insertions(+), 7 deletions(-)
 create mode 100644 releasenotes/notes/profiling-memalloc-iter-events-null-780fd50bbebbf616.yaml

diff --git a/ddtrace/profiling/collector/_memalloc.c b/ddtrace/profiling/collector/_memalloc.c
index 1f2b87e0433..1e1b9dbf52e 100644
--- a/ddtrace/profiling/collector/_memalloc.c
+++ b/ddtrace/profiling/collector/_memalloc.c
@@ -394,20 +394,28 @@ iterevents_new(PyTypeObject* type, PyObject* Py_UNUSED(args), PyObject* Py_UNUSE
     }
 
     IterEventsState* iestate = (IterEventsState*)type->tp_alloc(type, 0);
-    if (!iestate)
+    if (!iestate) {
+        PyErr_SetString(PyExc_RuntimeError, "failed to allocate IterEventsState");
         return NULL;
+    }
 
     memalloc_assert_gil();
 
-    /* reset the current traceback list */
-    if (memlock_trylock(&g_memalloc_lock)) {
-        iestate->alloc_tracker = global_alloc_tracker;
-        global_alloc_tracker = alloc_tracker_new();
-        memlock_unlock(&g_memalloc_lock);
-    } else {
+    /* Reset the current traceback list. Do this outside lock so we can track it,
+     * and avoid reentrancy/deadlock problems, if we start tracking the raw
+     * allocator domain */
+    alloc_tracker_t* tracker = alloc_tracker_new();
+    if (!tracker) {
+        PyErr_SetString(PyExc_RuntimeError, "failed to allocate new allocation tracker");
         Py_TYPE(iestate)->tp_free(iestate);
         return NULL;
     }
+
+    memlock_lock(&g_memalloc_lock);
+    iestate->alloc_tracker = global_alloc_tracker;
+    global_alloc_tracker = tracker;
+    memlock_unlock(&g_memalloc_lock);
+
     iestate->seq_index = 0;
 
     PyObject* iter_and_count = PyTuple_New(3);
diff --git a/releasenotes/notes/profiling-memalloc-iter-events-null-780fd50bbebbf616.yaml b/releasenotes/notes/profiling-memalloc-iter-events-null-780fd50bbebbf616.yaml
new file mode 100644
index 00000000000..52a43cbd2a1
--- /dev/null
+++ b/releasenotes/notes/profiling-memalloc-iter-events-null-780fd50bbebbf616.yaml
@@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    profiling: fix SystemError from the memory profiler returning NULL when collecting events

From 55767a78063ed17de0e954a6db4b9ff64b762f1d Mon Sep 17 00:00:00 2001
From: William Conti <58711692+wconti27@users.noreply.github.com>
Date: Tue, 28 Jan 2025 08:44:03 -0500
Subject: [PATCH 27/33] chore(tracing): refactor web server integrations to use
 the core module (#12035)

## Motivation

Refactors all web server integrations still using `tracer.trace` to
instead use `core.context_with_data`. This is in preparation for
supporting AWS API Gateway to ensure all web servers share the same code
path.

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
---
 ddtrace/_trace/trace_handlers.py              |  56 +++++++++-
 .../contrib/internal/aiohttp/middlewares.py   | 105 ++++++++----------
 ddtrace/contrib/internal/bottle/trace.py      |  61 +++++-----
 .../contrib/internal/cherrypy/middleware.py   |  53 +++++----
 ddtrace/contrib/internal/falcon/middleware.py |  61 ++++------
 ddtrace/contrib/internal/molten/patch.py      |  52 ++++-----
 ddtrace/contrib/internal/pyramid/trace.py     |  72 ++++++------
 ddtrace/contrib/internal/sanic/patch.py       |  61 +++++-----
 ...b.cherrypy.test_middleware.test_child.json |   1 +
 ...b.cherrypy.test_middleware.test_error.json |   1 +
 ...b.cherrypy.test_middleware.test_fatal.json |   1 +
 ...cherrypy.test_middleware.test_success.json |   1 +
 ...ver.test_multiple_requests_sanic_http.json |   2 +
 ...multiple_requests_sanic_http_pre_21.9.json |   2 +
 ...c.test_sanic_server.test_sanic_errors.json |   2 +
 ...nic_server.test_sanic_errors_pre_21.9.json |   2 +
 16 files changed, 290 insertions(+), 243 deletions(-)

diff --git a/ddtrace/_trace/trace_handlers.py b/ddtrace/_trace/trace_handlers.py
index dab3f743146..3ebb28f33c8 100644
--- a/ddtrace/_trace/trace_handlers.py
+++ b/ddtrace/_trace/trace_handlers.py
@@ -109,11 +109,14 @@ def _get_parameters_for_new_span_directly_from_context(ctx: core.ExecutionContex
 def _start_span(ctx: core.ExecutionContext, call_trace: bool = True, **kwargs) -> "Span":
     span_kwargs = _get_parameters_for_new_span_directly_from_context(ctx)
     call_trace = ctx.get_item("call_trace", call_trace)
-    tracer = (ctx.get_item("middleware") or ctx["pin"]).tracer
+    tracer = ctx.get_item("tracer") or (ctx.get_item("middleware") or ctx["pin"]).tracer
     distributed_headers_config = ctx.get_item("distributed_headers_config")
     if distributed_headers_config:
         trace_utils.activate_distributed_headers(
-            tracer, int_config=distributed_headers_config, request_headers=ctx["distributed_headers"]
+            tracer,
+            int_config=distributed_headers_config,
+            request_headers=ctx["distributed_headers"],
+            override=ctx.get_item("distributed_headers_config_override"),
         )
     distributed_context = ctx.get_item("distributed_context")
     if distributed_context and not call_trace:
@@ -126,6 +129,42 @@ def _start_span(ctx: core.ExecutionContext, call_trace: bool = True, **kwargs) -
     return span
 
 
+def _set_web_frameworks_tags(ctx, span, int_config):
+    span.set_tag_str(COMPONENT, int_config.integration_name)
+    span.set_tag_str(SPAN_KIND, SpanKind.SERVER)
+    span.set_tag(_SPAN_MEASURED_KEY)
+
+    analytics_enabled = ctx.get_item("analytics_enabled")
+    analytics_sample_rate = ctx.get_item("analytics_sample_rate", True)
+
+    # Configure trace search sample rate
+    if (config._analytics_enabled and analytics_enabled is not False) or analytics_enabled is True:
+        span.set_tag(_ANALYTICS_SAMPLE_RATE_KEY, analytics_sample_rate)
+
+
+def _on_web_framework_start_request(ctx, int_config):
+    request_span = ctx.get_item("req_span")
+    _set_web_frameworks_tags(ctx, request_span, int_config)
+
+
+def _on_web_framework_finish_request(
+    span, int_config, method, url, status_code, query, req_headers, res_headers, route, finish
+):
+    trace_utils.set_http_meta(
+        span=span,
+        integration_config=int_config,
+        method=method,
+        url=url,
+        status_code=status_code,
+        query=query,
+        request_headers=req_headers,
+        response_headers=res_headers,
+        route=route,
+    )
+    if finish:
+        span.finish()
+
+
 def _on_traced_request_context_started_flask(ctx):
     current_span = ctx["pin"].tracer.current_span()
     if not ctx["pin"].enabled or not current_span:
@@ -761,6 +800,10 @@ def listen():
     core.on("azure.functions.request_call_modifier", _on_azure_functions_request_span_modifier)
     core.on("azure.functions.start_response", _on_azure_functions_start_response)
 
+    # web frameworks general handlers
+    core.on("web.request.start", _on_web_framework_start_request)
+    core.on("web.request.finish", _on_web_framework_finish_request)
+
     core.on("test_visibility.enable", _on_test_visibility_enable)
     core.on("test_visibility.disable", _on_test_visibility_disable)
     core.on("test_visibility.is_enabled", _on_test_visibility_is_enabled, "is_enabled")
@@ -769,6 +812,14 @@ def listen():
     core.on("rq.queue.enqueue_job", _propagate_context)
 
     for context_name in (
+        # web frameworks
+        "aiohttp.request",
+        "bottle.request",
+        "cherrypy.request",
+        "falcon.request",
+        "molten.request",
+        "pyramid.request",
+        "sanic.request",
         "flask.call",
         "flask.jsonify",
         "flask.render_template",
@@ -779,6 +830,7 @@ def listen():
         "django.template.render",
         "django.process_exception",
         "django.func.wrapped",
+        # non web frameworks
         "botocore.instrumented_api_call",
         "botocore.instrumented_lib_function",
         "botocore.patched_kinesis_api_call",
diff --git a/ddtrace/contrib/internal/aiohttp/middlewares.py b/ddtrace/contrib/internal/aiohttp/middlewares.py
index ddb2d35fbc6..63a60734d3b 100644
--- a/ddtrace/contrib/internal/aiohttp/middlewares.py
+++ b/ddtrace/contrib/internal/aiohttp/middlewares.py
@@ -2,15 +2,10 @@
 from aiohttp.web_urldispatcher import SystemRoute
 
 from ddtrace import config
-from ddtrace.constants import _ANALYTICS_SAMPLE_RATE_KEY
-from ddtrace.constants import _SPAN_MEASURED_KEY
-from ddtrace.constants import SPAN_KIND
-from ddtrace.contrib import trace_utils
 from ddtrace.contrib.asyncio import context_provider
-from ddtrace.ext import SpanKind
 from ddtrace.ext import SpanTypes
 from ddtrace.ext import http
-from ddtrace.internal.constants import COMPONENT
+from ddtrace.internal import core
 from ddtrace.internal.schema import schematize_url_operation
 from ddtrace.internal.schema.span_attribute_schema import SpanDirection
 
@@ -35,47 +30,42 @@ async def attach_context(request):
         # application configs
         tracer = app[CONFIG_KEY]["tracer"]
         service = app[CONFIG_KEY]["service"]
-        distributed_tracing = app[CONFIG_KEY]["distributed_tracing_enabled"]
-        # Create a new context based on the propagated information.
-        trace_utils.activate_distributed_headers(
-            tracer,
-            int_config=config.aiohttp,
-            request_headers=request.headers,
-            override=distributed_tracing,
-        )
-
-        # trace the handler
-        request_span = tracer.trace(
-            schematize_url_operation("aiohttp.request", protocol="http", direction=SpanDirection.INBOUND),
-            service=service,
-            span_type=SpanTypes.WEB,
-        )
-        request_span.set_tag(_SPAN_MEASURED_KEY)
-
-        request_span.set_tag_str(COMPONENT, config.aiohttp.integration_name)
-
-        # set span.kind tag equal to type of request
-        request_span.set_tag_str(SPAN_KIND, SpanKind.SERVER)
-
-        # Configure trace search sample rate
         # DEV: aiohttp is special case maintains separate configuration from config api
         analytics_enabled = app[CONFIG_KEY]["analytics_enabled"]
-        if (config._analytics_enabled and analytics_enabled is not False) or analytics_enabled is True:
-            request_span.set_tag(_ANALYTICS_SAMPLE_RATE_KEY, app[CONFIG_KEY].get("analytics_sample_rate", True))
-
-        # attach the context and the root span to the request; the Context
-        # may be freely used by the application code
-        request[REQUEST_CONTEXT_KEY] = request_span.context
-        request[REQUEST_SPAN_KEY] = request_span
-        request[REQUEST_CONFIG_KEY] = app[CONFIG_KEY]
-        try:
-            response = await handler(request)
-            if isinstance(response, web.StreamResponse):
-                request.task.add_done_callback(lambda _: finish_request_span(request, response))
-            return response
-        except Exception:
-            request_span.set_traceback()
-            raise
+        # Create a new context based on the propagated information.
+
+        with core.context_with_data(
+            "aiohttp.request",
+            span_name=schematize_url_operation("aiohttp.request", protocol="http", direction=SpanDirection.INBOUND),
+            span_type=SpanTypes.WEB,
+            service=service,
+            tags={},
+            tracer=tracer,
+            distributed_headers=request.headers,
+            distributed_headers_config=config.aiohttp,
+            distributed_headers_config_override=app[CONFIG_KEY]["distributed_tracing_enabled"],
+            headers_case_sensitive=True,
+            analytics_enabled=analytics_enabled,
+            analytics_sample_rate=app[CONFIG_KEY].get("analytics_sample_rate", True),
+        ) as ctx:
+            req_span = ctx.span
+
+            ctx.set_item("req_span", req_span)
+            core.dispatch("web.request.start", (ctx, config.aiohttp))
+
+            # attach the context and the root span to the request; the Context
+            # may be freely used by the application code
+            request[REQUEST_CONTEXT_KEY] = req_span.context
+            request[REQUEST_SPAN_KEY] = req_span
+            request[REQUEST_CONFIG_KEY] = app[CONFIG_KEY]
+            try:
+                response = await handler(request)
+                if isinstance(response, web.StreamResponse):
+                    request.task.add_done_callback(lambda _: finish_request_span(request, response))
+                return response
+            except Exception:
+                req_span.set_traceback()
+                raise
 
     return attach_context
 
@@ -122,19 +112,22 @@ def finish_request_span(request, response):
             # SystemRoute objects exist to throw HTTP errors and have no path
             route = aiohttp_route.resource.canonical
 
-    trace_utils.set_http_meta(
-        request_span,
-        config.aiohttp,
-        method=request.method,
-        url=str(request.url),  # DEV: request.url is a yarl's URL object
-        status_code=response.status,
-        request_headers=request.headers,
-        response_headers=response.headers,
-        route=route,
+    core.dispatch(
+        "web.request.finish",
+        (
+            request_span,
+            config.aiohttp,
+            request.method,
+            str(request.url),  # DEV: request.url is a yarl's URL object
+            response.status,
+            None,  # query arg = None
+            request.headers,
+            response.headers,
+            route,
+            True,
+        ),
     )
 
-    request_span.finish()
-
 
 async def on_prepare(request, response):
     """
diff --git a/ddtrace/contrib/internal/bottle/trace.py b/ddtrace/contrib/internal/bottle/trace.py
index 3aabb4ccc81..58778a36ee1 100644
--- a/ddtrace/contrib/internal/bottle/trace.py
+++ b/ddtrace/contrib/internal/bottle/trace.py
@@ -5,13 +5,8 @@
 
 import ddtrace
 from ddtrace import config
-from ddtrace.constants import _ANALYTICS_SAMPLE_RATE_KEY
-from ddtrace.constants import _SPAN_MEASURED_KEY
-from ddtrace.constants import SPAN_KIND
-from ddtrace.contrib import trace_utils
-from ddtrace.ext import SpanKind
 from ddtrace.ext import SpanTypes
-from ddtrace.internal.constants import COMPONENT
+from ddtrace.internal import core
 from ddtrace.internal.schema import schematize_url_operation
 from ddtrace.internal.schema.span_attribute_schema import SpanDirection
 from ddtrace.internal.utils.formats import asbool
@@ -42,24 +37,21 @@ def wrapped(*args, **kwargs):
 
             resource = "{} {}".format(request.method, route.rule)
 
-            trace_utils.activate_distributed_headers(
-                self.tracer, int_config=config.bottle, request_headers=request.headers
-            )
-
-            with self.tracer.trace(
-                schematize_url_operation("bottle.request", protocol="http", direction=SpanDirection.INBOUND),
+            with core.context_with_data(
+                "bottle.request",
+                span_name=schematize_url_operation("bottle.request", protocol="http", direction=SpanDirection.INBOUND),
+                span_type=SpanTypes.WEB,
                 service=self.service,
                 resource=resource,
-                span_type=SpanTypes.WEB,
-            ) as s:
-                s.set_tag_str(COMPONENT, config.bottle.integration_name)
-
-                # set span.kind to the type of request being performed
-                s.set_tag_str(SPAN_KIND, SpanKind.SERVER)
-
-                s.set_tag(_SPAN_MEASURED_KEY)
-                # set analytics sample rate with global config enabled
-                s.set_tag(_ANALYTICS_SAMPLE_RATE_KEY, config.bottle.get_analytics_sample_rate(use_global_config=True))
+                tags={},
+                tracer=self.tracer,
+                distributed_headers=request.headers,
+                distributed_headers_config=config.bottle,
+                headers_case_sensitive=True,
+                analytics_sample_rate=config.bottle.get_analytics_sample_rate(use_global_config=True),
+            ) as ctx, ctx.span as req_span:
+                ctx.set_item("req_span", req_span)
+                core.dispatch("web.request.start", (ctx, config.bottle))
 
                 code = None
                 result = None
@@ -91,16 +83,21 @@ def wrapped(*args, **kwargs):
                     method = request.method
                     url = request.urlparts._replace(query="").geturl()
                     full_route = "/".join([request.script_name.rstrip("/"), route.rule.lstrip("/")])
-                    trace_utils.set_http_meta(
-                        s,
-                        config.bottle,
-                        method=method,
-                        url=url,
-                        status_code=response_code,
-                        query=request.query_string,
-                        request_headers=request.headers,
-                        response_headers=response.headers,
-                        route=full_route,
+
+                    core.dispatch(
+                        "web.request.finish",
+                        (
+                            req_span,
+                            config.bottle,
+                            method,
+                            url,
+                            response_code,
+                            request.query_string,
+                            request.headers,
+                            response.headers,
+                            full_route,
+                            False,
+                        ),
                     )
 
         return wrapped
diff --git a/ddtrace/contrib/internal/cherrypy/middleware.py b/ddtrace/contrib/internal/cherrypy/middleware.py
index 909a043175f..70f43059905 100644
--- a/ddtrace/contrib/internal/cherrypy/middleware.py
+++ b/ddtrace/contrib/internal/cherrypy/middleware.py
@@ -11,12 +11,10 @@
 from ddtrace.constants import ERROR_MSG
 from ddtrace.constants import ERROR_STACK
 from ddtrace.constants import ERROR_TYPE
-from ddtrace.constants import SPAN_KIND
 from ddtrace.contrib import trace_utils
-from ddtrace.ext import SpanKind
 from ddtrace.ext import SpanTypes
 from ddtrace.internal import compat
-from ddtrace.internal.constants import COMPONENT
+from ddtrace.internal import core
 from ddtrace.internal.schema import SpanDirection
 from ddtrace.internal.schema import schematize_service_name
 from ddtrace.internal.schema import schematize_url_operation
@@ -77,20 +75,23 @@ def _setup(self):
         cherrypy.request.hooks.attach("after_error_response", self._after_error_response, priority=5)
 
     def _on_start_resource(self):
-        trace_utils.activate_distributed_headers(
-            self._tracer, int_config=config.cherrypy, request_headers=cherrypy.request.headers
-        )
-
-        cherrypy.request._datadog_span = self._tracer.trace(
-            SPAN_NAME,
-            service=trace_utils.int_service(None, config.cherrypy, default="cherrypy"),
+        with core.context_with_data(
+            "cherrypy.request",
+            span_name=SPAN_NAME,
             span_type=SpanTypes.WEB,
-        )
+            service=trace_utils.int_service(None, config.cherrypy, default="cherrypy"),
+            tags={},
+            tracer=self._tracer,
+            distributed_headers=cherrypy.request.headers,
+            distributed_headers_config=config.cherrypy,
+            headers_case_sensitive=True,
+        ) as ctx:
+            req_span = ctx.span
 
-        cherrypy.request._datadog_span.set_tag_str(COMPONENT, config.cherrypy.integration_name)
+            ctx.set_item("req_span", req_span)
+            core.dispatch("web.request.start", (ctx, config.cherrypy))
 
-        # set span.kind to the type of request being performed
-        cherrypy.request._datadog_span.set_tag_str(SPAN_KIND, SpanKind.SERVER)
+            cherrypy.request._datadog_span = req_span
 
     def _after_error_response(self):
         span = getattr(cherrypy.request, "_datadog_span", None)
@@ -135,18 +136,22 @@ def _close_span(self, span):
         url = compat.to_unicode(cherrypy.request.base + cherrypy.request.path_info)
         status_code, _, _ = valid_status(cherrypy.response.status)
 
-        trace_utils.set_http_meta(
-            span,
-            config.cherrypy,
-            method=cherrypy.request.method,
-            url=url,
-            status_code=status_code,
-            request_headers=cherrypy.request.headers,
-            response_headers=cherrypy.response.headers,
+        core.dispatch(
+            "web.request.finish",
+            (
+                span,
+                config.cherrypy,
+                cherrypy.request.method,
+                url,
+                status_code,
+                None,
+                cherrypy.request.headers,
+                cherrypy.response.headers,
+                None,
+                True,
+            ),
         )
 
-        span.finish()
-
         # Clear our span just in case.
         cherrypy.request._datadog_span = None
 
diff --git a/ddtrace/contrib/internal/falcon/middleware.py b/ddtrace/contrib/internal/falcon/middleware.py
index b4ec5434777..513ce6cce39 100644
--- a/ddtrace/contrib/internal/falcon/middleware.py
+++ b/ddtrace/contrib/internal/falcon/middleware.py
@@ -1,14 +1,8 @@
 import sys
 
 from ddtrace import config
-from ddtrace.constants import _ANALYTICS_SAMPLE_RATE_KEY
-from ddtrace.constants import _SPAN_MEASURED_KEY
-from ddtrace.constants import SPAN_KIND
-from ddtrace.contrib import trace_utils
-from ddtrace.ext import SpanKind
 from ddtrace.ext import SpanTypes
-from ddtrace.ext import http as httpx
-from ddtrace.internal.constants import COMPONENT
+from ddtrace.internal import core
 from ddtrace.internal.schema import SpanDirection
 from ddtrace.internal.schema import schematize_service_name
 from ddtrace.internal.schema import schematize_url_operation
@@ -27,26 +21,27 @@ def __init__(self, tracer, service=None, distributed_tracing=None):
     def process_request(self, req, resp):
         # Falcon uppercases all header names.
         headers = dict((k.lower(), v) for k, v in req.headers.items())
-        trace_utils.activate_distributed_headers(self.tracer, int_config=config.falcon, request_headers=headers)
 
-        span = self.tracer.trace(
-            schematize_url_operation("falcon.request", protocol="http", direction=SpanDirection.INBOUND),
-            service=self.service,
+        with core.context_with_data(
+            "falcon.request",
+            span_name=schematize_url_operation("falcon.request", protocol="http", direction=SpanDirection.INBOUND),
             span_type=SpanTypes.WEB,
-        )
-        span.set_tag_str(COMPONENT, config.falcon.integration_name)
-
-        # set span.kind to the type of operation being performed
-        span.set_tag_str(SPAN_KIND, SpanKind.SERVER)
-
-        span.set_tag(_SPAN_MEASURED_KEY)
-
-        # set analytics sample rate with global config enabled
-        span.set_tag(_ANALYTICS_SAMPLE_RATE_KEY, config.falcon.get_analytics_sample_rate(use_global_config=True))
-
-        trace_utils.set_http_meta(
-            span, config.falcon, method=req.method, url=req.url, query=req.query_string, request_headers=req.headers
-        )
+            service=self.service,
+            tags={},
+            tracer=self.tracer,
+            distributed_headers=headers,
+            distributed_headers_config=config.falcon,
+            headers_case_sensitive=True,
+            analytics_sample_rate=config.falcon.get_analytics_sample_rate(use_global_config=True),
+        ) as ctx:
+            req_span = ctx.span
+            ctx.set_item("req_span", req_span)
+            core.dispatch("web.request.start", (ctx, config.falcon))
+
+            core.dispatch(
+                "web.request.finish",
+                (req_span, config.falcon, req.method, req.url, None, req.query_string, req.headers, None, None, False),
+            )
 
     def process_resource(self, req, resp, resource, params):
         span = self.tracer.current_span()
@@ -69,8 +64,7 @@ def process_response(self, req, resp, resource, req_succeeded=None):
         if resource is None:
             status = "404"
             span.resource = "%s 404" % req.method
-            span.set_tag(httpx.STATUS_CODE, status)
-            span.finish()
+            core.dispatch("web.request.finish", (span, config.falcon, None, None, status, None, None, None, None, True))
             return
 
         err_type = sys.exc_info()[0]
@@ -87,20 +81,13 @@ def process_response(self, req, resp, resource, req_succeeded=None):
 
         route = req.root_path or "" + req.uri_template
 
-        trace_utils.set_http_meta(
-            span,
-            config.falcon,
-            status_code=status,
-            response_headers=resp._headers,
-            route=route,
-        )
-
         # Emit span hook for this response
         # DEV: Emit before closing so they can overwrite `span.resource` if they want
         config.falcon.hooks.emit("request", span, req, resp)
 
-        # Close the span
-        span.finish()
+        core.dispatch(
+            "web.request.finish", (span, config.falcon, None, None, status, None, None, resp._headers, route, True)
+        )
 
 
 def _is_404(err_type):
diff --git a/ddtrace/contrib/internal/molten/patch.py b/ddtrace/contrib/internal/molten/patch.py
index 38fa949243c..dfec47eb17d 100644
--- a/ddtrace/contrib/internal/molten/patch.py
+++ b/ddtrace/contrib/internal/molten/patch.py
@@ -5,15 +5,11 @@
 from wrapt import wrap_function_wrapper as _w
 
 from ddtrace import config
-from ddtrace.constants import _ANALYTICS_SAMPLE_RATE_KEY
-from ddtrace.constants import _SPAN_MEASURED_KEY
-from ddtrace.constants import SPAN_KIND
 from ddtrace.contrib import trace_utils
 from ddtrace.contrib.internal.trace_utils import unwrap as _u
-from ddtrace.ext import SpanKind
 from ddtrace.ext import SpanTypes
+from ddtrace.internal import core
 from ddtrace.internal.compat import urlencode
-from ddtrace.internal.constants import COMPONENT
 from ddtrace.internal.schema import schematize_service_name
 from ddtrace.internal.schema import schematize_url_operation
 from ddtrace.internal.schema.span_attribute_schema import SpanDirection
@@ -89,25 +85,21 @@ def patch_app_call(wrapped, instance, args, kwargs):
     request = molten.http.Request.from_environ(environ)
     resource = func_name(wrapped)
 
-    # request.headers is type Iterable[Tuple[str, str]]
-    trace_utils.activate_distributed_headers(
-        pin.tracer, int_config=config.molten, request_headers=dict(request.headers)
-    )
-
-    with pin.tracer.trace(
-        schematize_url_operation("molten.request", protocol="http", direction=SpanDirection.INBOUND),
+    with core.context_with_data(
+        "molten.request",
+        span_name=schematize_url_operation("molten.request", protocol="http", direction=SpanDirection.INBOUND),
+        span_type=SpanTypes.WEB,
         service=trace_utils.int_service(pin, config.molten),
         resource=resource,
-        span_type=SpanTypes.WEB,
-    ) as span:
-        span.set_tag_str(COMPONENT, config.molten.integration_name)
-
-        # set span.kind tag equal to type of operation being performed
-        span.set_tag_str(SPAN_KIND, SpanKind.SERVER)
-
-        span.set_tag(_SPAN_MEASURED_KEY)
-        # set analytics sample rate with global config enabled
-        span.set_tag(_ANALYTICS_SAMPLE_RATE_KEY, config.molten.get_analytics_sample_rate(use_global_config=True))
+        tags={},
+        tracer=pin.tracer,
+        distributed_headers=dict(request.headers),  # request.headers is type Iterable[Tuple[str, str]]
+        distributed_headers_config=config.molten,
+        headers_case_sensitive=True,
+        analytics_sample_rate=config.molten.get_analytics_sample_rate(use_global_config=True),
+    ) as ctx, ctx.span as req_span:
+        ctx.set_item("req_span", req_span)
+        core.dispatch("web.request.start", (ctx, config.molten))
 
         @wrapt.function_wrapper
         def _w_start_response(wrapped, instance, args, kwargs):
@@ -125,11 +117,13 @@ def _w_start_response(wrapped, instance, args, kwargs):
             except ValueError:
                 pass
 
-            if not span.get_tag(MOLTEN_ROUTE):
+            if not req_span.get_tag(MOLTEN_ROUTE):
                 # if route never resolve, update root resource
-                span.resource = "{} {}".format(request.method, code)
+                req_span.resource = "{} {}".format(request.method, code)
 
-            trace_utils.set_http_meta(span, config.molten, status_code=code)
+            core.dispatch(
+                "web.request.finish", (req_span, config.molten, None, None, code, None, None, None, None, False)
+            )
 
             return wrapped(*args, **kwargs)
 
@@ -143,11 +137,13 @@ def _w_start_response(wrapped, instance, args, kwargs):
             request.path,
         )
         query = urlencode(dict(request.params))
-        trace_utils.set_http_meta(
-            span, config.molten, method=request.method, url=url, query=query, request_headers=request.headers
+
+        core.dispatch(
+            "web.request.finish",
+            (req_span, config.molten, request.method, url, None, query, request.headers, None, None, False),
         )
 
-        span.set_tag_str("molten.version", molten.__version__)
+        req_span.set_tag_str("molten.version", molten.__version__)
         return wrapped(environ, start_response, **kwargs)
 
 
diff --git a/ddtrace/contrib/internal/pyramid/trace.py b/ddtrace/contrib/internal/pyramid/trace.py
index 9942c673d4e..9b221bd3f09 100644
--- a/ddtrace/contrib/internal/pyramid/trace.py
+++ b/ddtrace/contrib/internal/pyramid/trace.py
@@ -6,12 +6,8 @@
 # project
 import ddtrace
 from ddtrace import config
-from ddtrace.constants import _ANALYTICS_SAMPLE_RATE_KEY
-from ddtrace.constants import _SPAN_MEASURED_KEY
-from ddtrace.constants import SPAN_KIND
-from ddtrace.contrib import trace_utils
-from ddtrace.ext import SpanKind
 from ddtrace.ext import SpanTypes
+from ddtrace.internal import core
 from ddtrace.internal.constants import COMPONENT
 from ddtrace.internal.logger import get_logger
 from ddtrace.internal.schema import schematize_service_name
@@ -67,29 +63,30 @@ def trace_tween_factory(handler, registry):
     service = settings.get(SETTINGS_SERVICE) or schematize_service_name("pyramid")
     tracer = settings.get(SETTINGS_TRACER) or ddtrace.tracer
     enabled = asbool(settings.get(SETTINGS_TRACE_ENABLED, tracer.enabled))
-    distributed_tracing = asbool(settings.get(SETTINGS_DISTRIBUTED_TRACING, True))
+
+    # ensure distributed tracing within pyramid settings matches config
+    config.pyramid.distributed_tracing_enabled = asbool(settings.get(SETTINGS_DISTRIBUTED_TRACING, True))
 
     if enabled:
         # make a request tracing function
         def trace_tween(request):
-            trace_utils.activate_distributed_headers(
-                tracer, int_config=config.pyramid, request_headers=request.headers, override=distributed_tracing
-            )
-
-            span_name = schematize_url_operation("pyramid.request", protocol="http", direction=SpanDirection.INBOUND)
-            with tracer.trace(span_name, service=service, resource="404", span_type=SpanTypes.WEB) as span:
-                span.set_tag_str(COMPONENT, config.pyramid.integration_name)
-
-                # set span.kind to the type of operation being performed
-                span.set_tag_str(SPAN_KIND, SpanKind.SERVER)
-
-                span.set_tag(_SPAN_MEASURED_KEY)
-                # Configure trace search sample rate
+            with core.context_with_data(
+                "pyramid.request",
+                span_name=schematize_url_operation("pyramid.request", protocol="http", direction=SpanDirection.INBOUND),
+                span_type=SpanTypes.WEB,
+                service=service,
+                resource="404",
+                tags={},
+                tracer=tracer,
+                distributed_headers=request.headers,
+                distributed_headers_config=config.pyramid,
+                headers_case_sensitive=True,
                 # DEV: pyramid is special case maintains separate configuration from config api
-                analytics_enabled = settings.get(SETTINGS_ANALYTICS_ENABLED)
-
-                if (config._analytics_enabled and analytics_enabled is not False) or analytics_enabled is True:
-                    span.set_tag(_ANALYTICS_SAMPLE_RATE_KEY, settings.get(SETTINGS_ANALYTICS_SAMPLE_RATE, True))
+                analytics_enabled=settings.get(SETTINGS_ANALYTICS_ENABLED),
+                analytics_sample_rate=settings.get(SETTINGS_ANALYTICS_SAMPLE_RATE, True),
+            ) as ctx, ctx.span as req_span:
+                ctx.set_item("req_span", req_span)
+                core.dispatch("web.request.start", (ctx, config.pyramid))
 
                 setattr(request, DD_TRACER, tracer)  # used to find the tracer in templates
                 response = None
@@ -110,8 +107,8 @@ def trace_tween(request):
                 finally:
                     # set request tags
                     if request.matched_route:
-                        span.resource = "{} {}".format(request.method, request.matched_route.name)
-                        span.set_tag_str("pyramid.route.name", request.matched_route.name)
+                        req_span.resource = "{} {}".format(request.method, request.matched_route.name)
+                        req_span.set_tag_str("pyramid.route.name", request.matched_route.name)
                     # set response tags
                     if response:
                         status = response.status_code
@@ -119,17 +116,22 @@ def trace_tween(request):
                     else:
                         response_headers = None
 
-                    trace_utils.set_http_meta(
-                        span,
-                        config.pyramid,
-                        method=request.method,
-                        url=request.path_url,
-                        status_code=status,
-                        query=request.query_string,
-                        request_headers=request.headers,
-                        response_headers=response_headers,
-                        route=request.matched_route.pattern if request.matched_route else None,
+                    core.dispatch(
+                        "web.request.finish",
+                        (
+                            req_span,
+                            config.pyramid,
+                            request.method,
+                            request.path_url,
+                            status,
+                            request.query_string,
+                            request.headers,
+                            response_headers,
+                            request.matched_route.pattern if request.matched_route else None,
+                            False,
+                        ),
                     )
+
                 return response
 
         return trace_tween
diff --git a/ddtrace/contrib/internal/sanic/patch.py b/ddtrace/contrib/internal/sanic/patch.py
index 8e53ed41dc8..1babf7d44a0 100644
--- a/ddtrace/contrib/internal/sanic/patch.py
+++ b/ddtrace/contrib/internal/sanic/patch.py
@@ -4,14 +4,10 @@
 import wrapt
 from wrapt import wrap_function_wrapper as _w
 
-import ddtrace
 from ddtrace import config
-from ddtrace.constants import _ANALYTICS_SAMPLE_RATE_KEY
-from ddtrace.constants import SPAN_KIND
 from ddtrace.contrib import trace_utils
-from ddtrace.ext import SpanKind
 from ddtrace.ext import SpanTypes
-from ddtrace.internal.constants import COMPONENT
+from ddtrace.internal import core
 from ddtrace.internal.logger import get_logger
 from ddtrace.internal.schema import schematize_service_name
 from ddtrace.internal.schema import schematize_url_operation
@@ -47,7 +43,10 @@ def update_span(span, response):
     #      and so use 500
     status_code = getattr(response, "status", 500)
     response_headers = getattr(response, "headers", None)
-    trace_utils.set_http_meta(span, config.sanic, status_code=status_code, response_headers=response_headers)
+
+    core.dispatch(
+        "web.request.finish", (span, config.sanic, None, None, status_code, None, None, response_headers, None, False)
+    )
 
 
 def _wrap_response_callback(span, callback):
@@ -200,31 +199,35 @@ def _create_sanic_request_span(request):
 
     headers = request.headers.copy()
 
-    trace_utils.activate_distributed_headers(ddtrace.tracer, int_config=config.sanic, request_headers=headers)
-
-    span = pin.tracer.trace(
-        schematize_url_operation("sanic.request", protocol="http", direction=SpanDirection.INBOUND),
+    with core.context_with_data(
+        "sanic.request",
+        span_name=schematize_url_operation("sanic.request", protocol="http", direction=SpanDirection.INBOUND),
+        span_type=SpanTypes.WEB,
         service=trace_utils.int_service(None, config.sanic),
         resource=resource,
-        span_type=SpanTypes.WEB,
-    )
-    span.set_tag_str(COMPONENT, config.sanic.integration_name)
-
-    # set span.kind to the type of operation being performed
-    span.set_tag_str(SPAN_KIND, SpanKind.SERVER)
-
-    sample_rate = config.sanic.get_analytics_sample_rate(use_global_config=True)
-    if sample_rate is not None:
-        span.set_tag(_ANALYTICS_SAMPLE_RATE_KEY, sample_rate)
-
-    method = request.method
-    url = "{scheme}://{host}{path}".format(scheme=request.scheme, host=request.host, path=request.path)
-    query_string = request.query_string
-    if isinstance(query_string, bytes):
-        query_string = query_string.decode()
-    trace_utils.set_http_meta(span, config.sanic, method=method, url=url, query=query_string, request_headers=headers)
-
-    return span
+        tags={},
+        pin=pin,
+        distributed_headers=headers,
+        distributed_headers_config=config.sanic,
+        headers_case_sensitive=True,
+        analytics_sample_rate=config.sanic.get_analytics_sample_rate(use_global_config=True),
+    ) as ctx:
+        req_span = ctx.span
+
+        ctx.set_item("req_span", req_span)
+        core.dispatch("web.request.start", (ctx, config.sanic))
+
+        method = request.method
+        url = "{scheme}://{host}{path}".format(scheme=request.scheme, host=request.host, path=request.path)
+        query_string = request.query_string
+        if isinstance(query_string, bytes):
+            query_string = query_string.decode()
+
+        core.dispatch(
+            "web.request.finish", (req_span, config.sanic, method, url, None, query_string, headers, None, None, False)
+        )
+
+        return req_span
 
 
 async def sanic_http_lifecycle_handle(request):
diff --git a/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_child.json b/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_child.json
index 1887282b06f..933eda3e420 100644
--- a/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_child.json
+++ b/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_child.json
@@ -22,6 +22,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
diff --git a/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_error.json b/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_error.json
index 49759fee3d3..58fde524828 100644
--- a/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_error.json
+++ b/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_error.json
@@ -25,6 +25,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
diff --git a/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_fatal.json b/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_fatal.json
index 4153c8c2b18..a5a7c1ed14f 100644
--- a/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_fatal.json
+++ b/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_fatal.json
@@ -25,6 +25,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
diff --git a/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_success.json b/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_success.json
index d1eb9fc5bee..ce13988add0 100644
--- a/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_success.json
+++ b/tests/snapshots/tests.contrib.cherrypy.test_middleware.test_success.json
@@ -22,6 +22,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
diff --git a/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_multiple_requests_sanic_http.json b/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_multiple_requests_sanic_http.json
index 3803d07f360..66ca28d9606 100644
--- a/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_multiple_requests_sanic_http.json
+++ b/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_multiple_requests_sanic_http.json
@@ -23,6 +23,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
@@ -72,6 +73,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
diff --git a/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_multiple_requests_sanic_http_pre_21.9.json b/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_multiple_requests_sanic_http_pre_21.9.json
index 4aac2721c02..aca376a974a 100644
--- a/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_multiple_requests_sanic_http_pre_21.9.json
+++ b/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_multiple_requests_sanic_http_pre_21.9.json
@@ -22,6 +22,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
@@ -70,6 +71,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
diff --git a/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_sanic_errors.json b/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_sanic_errors.json
index 435bbbc7b23..ab2aeaec920 100644
--- a/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_sanic_errors.json
+++ b/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_sanic_errors.json
@@ -22,6 +22,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
@@ -58,6 +59,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
diff --git a/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_sanic_errors_pre_21.9.json b/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_sanic_errors_pre_21.9.json
index c03813a43d6..ceabecd1ae7 100644
--- a/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_sanic_errors_pre_21.9.json
+++ b/tests/snapshots/tests.contrib.sanic.test_sanic_server.test_sanic_errors_pre_21.9.json
@@ -22,6 +22,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
@@ -54,6 +55,7 @@
       "span.kind": "server"
     },
     "metrics": {
+      "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,

From 16d5280956bc259e77691536229824d3183a4181 Mon Sep 17 00:00:00 2001
From: Christophe Papazian
 <114495376+christophe-papazian@users.noreply.github.com>
Date: Tue, 28 Jan 2025 15:48:22 +0100
Subject: [PATCH 28/33] ci(tracer): make serverless test unrequired (#12121)

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
---
 .gitlab/benchmarks.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml
index e922d315444..692a61ea93f 100644
--- a/.gitlab/benchmarks.yml
+++ b/.gitlab/benchmarks.yml
@@ -96,6 +96,7 @@ benchmark-serverless:
   tags: ["arch:amd64"]
   when: on_success
   needs: [ "benchmark-serverless-trigger" ]
+  allow_failure: true
   script:
     - git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/serverless-tools.git ./serverless-tools && cd ./serverless-tools
     - ./ci/check_trigger_status.sh

From 4f0bcb5a59378fb6015230f9f9ad04199fc380b5 Mon Sep 17 00:00:00 2001
From: Christophe Papazian
 <114495376+christophe-papazian@users.noreply.github.com>
Date: Tue, 28 Jan 2025 17:28:20 +0100
Subject: [PATCH 29/33] chore(asm): clean libddwaf loading (#12102)

Depending of the timing, libddwaf loading process could create triggers
that would create loops in our instrumentation.
From what I investigated:
- if loaded too early, it could have bad interactions with gevent.
- if loaded too late, it could be self instrumented by the tracer,
creating a loop, as ctypes is using Popen and subprocess.

while keeping the late loading introduced by 2 previous PRs
- https://github.com/DataDog/dd-trace-py/pull/11987
- https://github.com/DataDog/dd-trace-py/pull/12013
this PR introduced a mechanism to bypass tracer instrumentation during
ctypes loading, to avoid a possible loop that would prevent the WAF to
be loaded.


## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
---
 ddtrace/appsec/_ddwaf/ddwaf_types.py         |  3 +++
 ddtrace/appsec/_processor.py                 | 26 +++++++++++---------
 ddtrace/contrib/internal/subprocess/patch.py |  6 +++++
 ddtrace/settings/asm.py                      |  1 +
 4 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/ddtrace/appsec/_ddwaf/ddwaf_types.py b/ddtrace/appsec/_ddwaf/ddwaf_types.py
index e998b6048f6..5942a2fa184 100644
--- a/ddtrace/appsec/_ddwaf/ddwaf_types.py
+++ b/ddtrace/appsec/_ddwaf/ddwaf_types.py
@@ -22,9 +22,12 @@
 
 if system() == "Linux":
     try:
+        asm_config._bypass_instrumentation_for_waf = True
         ctypes.CDLL(ctypes.util.find_library("rt"), mode=ctypes.RTLD_GLOBAL)
     except Exception:  # nosec
         pass
+    finally:
+        asm_config._bypass_instrumentation_for_waf = False
 
 ARCHI = machine().lower()
 
diff --git a/ddtrace/appsec/_processor.py b/ddtrace/appsec/_processor.py
index 6102ba1ded2..83b65584a66 100644
--- a/ddtrace/appsec/_processor.py
+++ b/ddtrace/appsec/_processor.py
@@ -4,6 +4,7 @@
 from json.decoder import JSONDecodeError
 import os
 import os.path
+from typing import TYPE_CHECKING
 from typing import Any
 from typing import Dict
 from typing import List
@@ -11,6 +12,11 @@
 from typing import Set
 from typing import Tuple
 from typing import Union
+
+
+if TYPE_CHECKING:
+    import ddtrace.appsec._ddwaf as ddwaf
+
 import weakref
 
 from ddtrace._trace.processor import SpanProcessor
@@ -167,14 +173,17 @@ def __post_init__(self) -> None:
     def delayed_init(self) -> None:
         try:
             if self._rules is not None and not hasattr(self, "_ddwaf"):
-                self._ddwaf = ddwaf.DDWaf(
+                from ddtrace.appsec._ddwaf import DDWaf  # noqa: E402
+                import ddtrace.appsec._metrics as metrics  # noqa: E402
+
+                self.metrics = metrics
+                self._ddwaf = DDWaf(
                     self._rules, self.obfuscation_parameter_key_regexp, self.obfuscation_parameter_value_regexp
                 )
-            _set_waf_init_metric(self._ddwaf.info)
+                self.metrics._set_waf_init_metric(self._ddwaf.info)
         except Exception:
             # Partial of DDAS-0005-00
             log.warning("[DDAS-0005-00] WAF initialization failed")
-            raise
         self._update_required()
 
     def _update_required(self):
@@ -193,7 +202,7 @@ def _update_rules(self, new_rules: Dict[str, Any]) -> bool:
         if asm_config._asm_static_rule_file is not None:
             return result
         result = self._ddwaf.update_rules(new_rules)
-        _set_waf_updates_metric(self._ddwaf.info)
+        self.metrics._set_waf_updates_metric(self._ddwaf.info)
         self._update_required()
         return result
 
@@ -241,7 +250,7 @@ def waf_callable(custom_data=None, **kwargs):
             return self._waf_action(span._local_root or span, ctx, custom_data, **kwargs)
 
         _asm_request_context.set_waf_callback(waf_callable)
-        _asm_request_context.add_context_callback(_set_waf_request_metrics)
+        _asm_request_context.add_context_callback(self.metrics._set_waf_request_metrics)
         if headers is not None:
             _asm_request_context.set_waf_address(SPAN_DATA_NAMES.REQUEST_HEADERS_NO_COOKIES, headers)
             _asm_request_context.set_waf_address(
@@ -436,10 +445,3 @@ def on_span_finish(self, span: Span) -> None:
                     del self._span_to_waf_ctx[s]
                 except Exception:  # nosec B110
                     pass
-
-
-# load waf at the end only to avoid possible circular imports with gevent
-import ddtrace.appsec._ddwaf as ddwaf  # noqa: E402
-from ddtrace.appsec._metrics import _set_waf_init_metric  # noqa: E402
-from ddtrace.appsec._metrics import _set_waf_request_metrics  # noqa: E402
-from ddtrace.appsec._metrics import _set_waf_updates_metric  # noqa: E402
diff --git a/ddtrace/contrib/internal/subprocess/patch.py b/ddtrace/contrib/internal/subprocess/patch.py
index 80d05b107bb..2d66edd4737 100644
--- a/ddtrace/contrib/internal/subprocess/patch.py
+++ b/ddtrace/contrib/internal/subprocess/patch.py
@@ -327,6 +327,8 @@ def unpatch() -> None:
 @trace_utils.with_traced_module
 def _traced_ossystem(module, pin, wrapped, instance, args, kwargs):
     try:
+        if asm_config._bypass_instrumentation_for_waf:
+            return wrapped(*args, **kwargs)
         if isinstance(args[0], str):
             for callback in _STR_CALLBACKS.values():
                 callback(args[0])
@@ -393,6 +395,8 @@ def _traced_osspawn(module, pin, wrapped, instance, args, kwargs):
 @trace_utils.with_traced_module
 def _traced_subprocess_init(module, pin, wrapped, instance, args, kwargs):
     try:
+        if asm_config._bypass_instrumentation_for_waf:
+            return wrapped(*args, **kwargs)
         cmd_args = args[0] if len(args) else kwargs["args"]
         if isinstance(cmd_args, (list, tuple, str)):
             if kwargs.get("shell", False):
@@ -425,6 +429,8 @@ def _traced_subprocess_init(module, pin, wrapped, instance, args, kwargs):
 @trace_utils.with_traced_module
 def _traced_subprocess_wait(module, pin, wrapped, instance, args, kwargs):
     try:
+        if asm_config._bypass_instrumentation_for_waf:
+            return wrapped(*args, **kwargs)
         binary = core.get_item("subprocess_popen_binary")
 
         with pin.tracer.trace(COMMANDS.SPAN_NAME, resource=binary, span_type=SpanTypes.SYSTEM) as span:
diff --git a/ddtrace/settings/asm.py b/ddtrace/settings/asm.py
index 16937399501..fc2c9aa13fb 100644
--- a/ddtrace/settings/asm.py
+++ b/ddtrace/settings/asm.py
@@ -245,6 +245,7 @@ class ASMConfig(Env):
         default=r"^[+-]?((0b[01]+)|(0x[0-9A-Fa-f]+)|(\d+\.?\d*(?:[Ee][+-]?\d+)?|\.\d+(?:[Ee][+-]"
         + r"?\d+)?)|(X\'[0-9A-Fa-f]+\')|(B\'[01]+\'))$",
     )
+    _bypass_instrumentation_for_waf = False
 
     def __init__(self):
         super().__init__()

From c4448ea4e5580cfa55684ef108a5dafafe1f6f75 Mon Sep 17 00:00:00 2001
From: Yun Kim <35776586+Yun-Kim@users.noreply.github.com>
Date: Tue, 28 Jan 2025 17:11:42 -0500
Subject: [PATCH 30/33] fix(llmobs): propagate distributed headers via signal
 dispatching, not config (#12089)

This PR makes a change to our shared distributed tracing header
injection method to dispatch signals/events instead of relying on the
global config settings, which is only modifiable via env vars. This
fixes distributed tracing for users that might rely solely on the
`LLMObs.enable()` setup config.

Programmatic `LLMObs.enable()/disable()` calls do not set the global
`config._llmobs_enabled` boolean setting, which is only controlled by
the `DD_LLMOBS_ENABLED` env var. This was problematic for users that
relied on manual `LLMObs.enable()` setup (i.e. no env vars) because our
distributed tracing injection code only checks the global config to
inject llmobs parent IDs into request headers. If users manually enabled
LLMObs without any env vars, then this would not be reflected in the
global config value and thus LLMObs parent IDs would never be injected
into the request headers.

We can't check directly if LLMObs is enabled in the http injection
module because:
1. This would require us to import significant product-specific
LLMObs-code into the shared http injector helper module which would
impact non-LLMObs users' app performance
2. Circular imports in LLMObs which imports http injector logic to use
in its own helpers

Instead of doing our check based on the global `config._llmobs_enabled`
setting, we now send a tracing event to our shared product listeners,
and register a corresponding `LLMObs._inject_llmobs_context()` hook to
be called for all inject() calls if LLMObs is enabled (we check the
LLMObs instance, not the global config setting value).

~One risk and why I don't like changing global config settings is
because this then implies that it is no longer global or tied to an env
var (I want to push for env var configuration where possible over manual
overriding/enabling). If a global enabled config can be toggled
indiscriminately then this could open a can of worms for
enabling/disabling logic in our LLMObs service, which isn't really
designed to be toggled on/off multiple times in the app's lifespan.
However if some users cannot rely on env vars, then I don't see any
other solution that does not couple tracer internal code with LLMObs
code which is a no-option.~ (UPDATE: we avoided this issue by using
signal dispatching)

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
---
 ddtrace/llmobs/_llmobs.py                     |  8 ++-
 ddtrace/propagation/http.py                   |  7 +--
 ...nable-updates-config-45379a7a30e2e0e3.yaml |  5 ++
 tests/llmobs/test_llmobs_service.py           |  1 -
 tests/llmobs/test_propagation.py              | 54 ++++++++++++-------
 tests/tracer/test_propagation.py              | 24 ---------
 6 files changed, 49 insertions(+), 50 deletions(-)
 create mode 100644 releasenotes/notes/fix-llmobs-enable-updates-config-45379a7a30e2e0e3.yaml

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 768f4bdb292..bc5ddabcf00 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -285,6 +285,7 @@ def _stop_service(self) -> None:
         # Remove listener hooks for span events
         core.reset_listeners("trace.span_start", self._on_span_start)
         core.reset_listeners("trace.span_finish", self._on_span_finish)
+        core.reset_listeners("http.span_inject", self._inject_llmobs_context)
 
         forksafe.unregister(self._child_after_fork)
 
@@ -369,6 +370,7 @@ def enable(
         # Register hooks for span events
         core.on("trace.span_start", cls._instance._on_span_start)
         core.on("trace.span_finish", cls._instance._on_span_finish)
+        core.on("http.span_inject", cls._instance._inject_llmobs_context)
 
         atexit.register(cls.disable)
         telemetry_writer.product_activated(TELEMETRY_APM_PRODUCT.LLMOBS, True)
@@ -1162,6 +1164,11 @@ def submit_evaluation(
 
         cls._instance._llmobs_eval_metric_writer.enqueue(evaluation_metric)
 
+    def _inject_llmobs_context(self, span_context: Context, request_headers: Dict[str, str]) -> None:
+        if self.enabled is False:
+            return
+        _inject_llmobs_parent_id(span_context)
+
     @classmethod
     def inject_distributed_headers(cls, request_headers: Dict[str, str], span: Optional[Span] = None) -> Dict[str, str]:
         """Injects the span's distributed context into the given request headers."""
@@ -1179,7 +1186,6 @@ def inject_distributed_headers(cls, request_headers: Dict[str, str], span: Optio
         if span is None:
             log.warning("No span provided and no currently active span found.")
             return request_headers
-        _inject_llmobs_parent_id(span.context)
         HTTPPropagator.inject(span.context, request_headers)
         return request_headers
 
diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py
index fdaf97410ad..381acabb1bc 100644
--- a/ddtrace/propagation/http.py
+++ b/ddtrace/propagation/http.py
@@ -28,6 +28,7 @@
 from ddtrace._trace.span import _get_64_lowest_order_bits_as_int
 from ddtrace._trace.span import _MetaDictType
 from ddtrace.appsec._constants import APPSEC
+from ddtrace.internal.core import dispatch
 from ddtrace.settings.asm import config as asm_config
 
 from ..constants import AUTO_KEEP
@@ -1052,6 +1053,7 @@ def parent_call():
         :param dict headers: HTTP headers to extend with tracing attributes.
         :param Span non_active_span: Only to be used if injecting a non-active span.
         """
+        dispatch("http.span_inject", (span_context, headers))
         if not config._propagation_style_inject:
             return
         if non_active_span is not None and non_active_span.context is not span_context:
@@ -1089,11 +1091,6 @@ def parent_call():
             for key in span_context._baggage:
                 headers[_HTTP_BAGGAGE_PREFIX + key] = span_context._baggage[key]
 
-        if config._llmobs_enabled:
-            from ddtrace.llmobs._utils import _inject_llmobs_parent_id
-
-            _inject_llmobs_parent_id(span_context)
-
         if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject:
             _DatadogMultiHeader._inject(span_context, headers)
         if PROPAGATION_STYLE_B3_MULTI in config._propagation_style_inject:
diff --git a/releasenotes/notes/fix-llmobs-enable-updates-config-45379a7a30e2e0e3.yaml b/releasenotes/notes/fix-llmobs-enable-updates-config-45379a7a30e2e0e3.yaml
new file mode 100644
index 00000000000..4bf312f4680
--- /dev/null
+++ b/releasenotes/notes/fix-llmobs-enable-updates-config-45379a7a30e2e0e3.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    LLM Observability: Resolves an issue where explicitly only using ``LLMObs.enable()`` to configure LLM Observability 
+    without environment variables would not automatically propagate distributed tracing headers.  
diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py
index de428999147..744fde885e9 100644
--- a/tests/llmobs/test_llmobs_service.py
+++ b/tests/llmobs/test_llmobs_service.py
@@ -60,7 +60,6 @@ def test_service_enable_proxy_default():
         assert llmobs_instance.tracer == dummy_tracer
         assert isinstance(llmobs_instance._llmobs_span_writer._clients[0], LLMObsProxiedEventClient)
         assert run_llmobs_trace_filter(dummy_tracer) is not None
-
         llmobs_service.disable()
 
 
diff --git a/tests/llmobs/test_propagation.py b/tests/llmobs/test_propagation.py
index e3ab9c80d66..27421e6b12f 100644
--- a/tests/llmobs/test_propagation.py
+++ b/tests/llmobs/test_propagation.py
@@ -57,20 +57,24 @@ def test_propagate_correct_llmobs_parent_id_simple(run_python_code_in_subprocess
     """
     code = """
 import json
+import mock
 
-from ddtrace import tracer
-from ddtrace.ext import SpanTypes
+from ddtrace.internal.utils.http import Response
+from ddtrace.llmobs import LLMObs
 from ddtrace.propagation.http import HTTPPropagator
 
-with tracer.trace("LLMObs span", span_type=SpanTypes.LLM) as root_span:
-    with tracer.trace("Non-LLMObs span") as child_span:
-        headers = {"_DD_LLMOBS_SPAN_ID": str(root_span.span_id)}
-        HTTPPropagator.inject(child_span.context, headers)
+with mock.patch(
+    "ddtrace.internal.writer.HTTPWriter._send_payload", return_value=Response(status=200, body="{}"),
+):
+    LLMObs.enable(ml_app="test-app", api_key="<not-a-real-key>", agentless_enabled=True)
+    with LLMObs.workflow("LLMObs span") as root_span:
+        with LLMObs._instance.tracer.trace("Non-LLMObs span") as child_span:
+            headers = {"_DD_LLMOBS_SPAN_ID": str(root_span.span_id)}
+            HTTPPropagator.inject(child_span.context, headers)
 
 print(json.dumps(headers))
         """
     env = os.environ.copy()
-    env["DD_LLMOBS_ENABLED"] = "1"
     env["DD_TRACE_ENABLED"] = "0"
     stdout, stderr, status, _ = run_python_code_in_subprocess(code=code, env=env)
     assert status == 0, (stdout, stderr)
@@ -93,21 +97,33 @@ def test_propagate_llmobs_parent_id_complex(run_python_code_in_subprocess):
     """
     code = """
 import json
+import mock
 
-from ddtrace import tracer
-from ddtrace.ext import SpanTypes
+from ddtrace.internal.utils.http import Response
+from ddtrace.llmobs import LLMObs
 from ddtrace.propagation.http import HTTPPropagator
 
-with tracer.trace("LLMObs span", span_type=SpanTypes.LLM) as root_span:
-    with tracer.trace("Non-LLMObs span") as child_span:
-        headers = {"_DD_LLMOBS_SPAN_ID": str(root_span.span_id)}
-        HTTPPropagator.inject(child_span.context, headers)
+with mock.patch(
+    "ddtrace.internal.writer.HTTPWriter._send_payload", return_value=Response(status=200, body="{}"),
+):
+    from ddtrace import auto  # simulate ddtrace-run startup to ensure env var configs also propagate
+    with LLMObs.workflow("LLMObs span") as root_span:
+        with LLMObs._instance.tracer.trace("Non-LLMObs span") as child_span:
+            headers = {"_DD_LLMOBS_SPAN_ID": str(root_span.span_id)}
+            HTTPPropagator.inject(child_span.context, headers)
 
 print(json.dumps(headers))
         """
     env = os.environ.copy()
-    env["DD_LLMOBS_ENABLED"] = "1"
-    env["DD_TRACE_ENABLED"] = "0"
+    env.update(
+        {
+            "DD_LLMOBS_ENABLED": "1",
+            "DD_TRACE_ENABLED": "0",
+            "DD_AGENTLESS_ENABLED": "1",
+            "DD_API_KEY": "<not-a-real-key>",
+            "DD_LLMOBS_ML_APP": "test-app",
+        }
+    )
     stdout, stderr, status, _ = run_python_code_in_subprocess(code=code, env=env)
     assert status == 0, (stdout, stderr)
     assert stderr == b"", (stdout, stderr)
@@ -124,7 +140,7 @@ def test_propagate_llmobs_parent_id_complex(run_python_code_in_subprocess):
 
 
 def test_no_llmobs_parent_id_propagated_if_no_llmobs_spans(run_python_code_in_subprocess):
-    """Test that the correct LLMObs parent ID (None) is extracted from the headers in a simple distributed scenario.
+    """Test that the correct LLMObs parent ID ('undefined') is extracted from headers in a simple distributed scenario.
     Service A (subprocess) has spans, but none are LLMObs spans.
     Service B (outside subprocess) has a LLMObs span.
     Service B's span should have no LLMObs parent ID as there are no LLMObs spans from service A.
@@ -132,17 +148,17 @@ def test_no_llmobs_parent_id_propagated_if_no_llmobs_spans(run_python_code_in_su
     code = """
 import json
 
-from ddtrace import tracer
+from ddtrace.llmobs import LLMObs
 from ddtrace.propagation.http import HTTPPropagator
 
-with tracer.trace("Non-LLMObs span") as root_span:
+LLMObs.enable(ml_app="ml-app", agentless_enabled=True, api_key="<not-a-real-key>")
+with LLMObs._instance.tracer.trace("Non-LLMObs span") as root_span:
     headers = {}
     HTTPPropagator.inject(root_span.context, headers)
 
 print(json.dumps(headers))
         """
     env = os.environ.copy()
-    env["DD_LLMOBS_ENABLED"] = "1"
     env["DD_TRACE_ENABLED"] = "0"
     stdout, stderr, status, _ = run_python_code_in_subprocess(code=code, env=env)
     assert status == 0, (stdout, stderr)
diff --git a/tests/tracer/test_propagation.py b/tests/tracer/test_propagation.py
index 533e4974250..b073cd72e3b 100644
--- a/tests/tracer/test_propagation.py
+++ b/tests/tracer/test_propagation.py
@@ -4,7 +4,6 @@
 import os
 import pickle
 
-import mock
 import pytest
 
 import ddtrace
@@ -3387,29 +3386,6 @@ def test_DD_TRACE_PROPAGATION_STYLE_INJECT_overrides_DD_TRACE_PROPAGATION_STYLE(
     assert result == expected_headers
 
 
-def test_llmobs_enabled_injects_llmobs_parent_id():
-    with override_global_config(dict(_llmobs_enabled=True)):
-        with mock.patch("ddtrace.llmobs._utils._inject_llmobs_parent_id") as mock_llmobs_inject:
-            context = Context(trace_id=1, span_id=2)
-            HTTPPropagator.inject(context, {})
-            mock_llmobs_inject.assert_called_once_with(context)
-
-
-def test_llmobs_disabled_does_not_inject_parent_id():
-    with override_global_config(dict(_llmobs_enabled=False)):
-        with mock.patch("ddtrace.llmobs._utils._inject_llmobs_parent_id") as mock_llmobs_inject:
-            context = Context(trace_id=1, span_id=2)
-            HTTPPropagator.inject(context, {})
-            mock_llmobs_inject.assert_not_called()
-
-
-def test_llmobs_parent_id_not_injected_by_default():
-    with mock.patch("ddtrace.llmobs._utils._inject_llmobs_parent_id") as mock_llmobs_inject:
-        context = Context(trace_id=1, span_id=2)
-        HTTPPropagator.inject(context, {})
-        mock_llmobs_inject.assert_not_called()
-
-
 @pytest.mark.parametrize(
     "span_context,expected_headers",
     [

From cb41f8e77aa71bfffbb63f7ee1a9808e659189d2 Mon Sep 17 00:00:00 2001
From: Munir Abdinur <munir.abdinur@datadoghq.com>
Date: Wed, 29 Jan 2025 08:39:44 -0500
Subject: [PATCH 31/33] feat(provider): expose context provider in
 ddtrace.trace (#12135)

ddtrace v3.0 is set to remove `ddtrace.providers` module from the public
API. However we should still allow users to use their own
ContextProviders. This PR ensures the BaseContextProvider remains in the
public API and can be used in `tracer.configure(...)`.

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
---
 .github/CODEOWNERS                                           | 2 ++
 ddtrace/provider.py                                          | 5 +++--
 ddtrace/trace/__init__.py                                    | 2 ++
 docs/advanced_usage.rst                                      | 2 +-
 .../feat-expose-base-context-provider-530ebec2225f6c8d.yaml  | 5 +++++
 5 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 releasenotes/notes/feat-expose-base-context-provider-530ebec2225f6c8d.yaml

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 84e6c873642..b1162da2348 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -166,12 +166,14 @@ ddtrace/internal/remoteconfig       @DataDog/remote-config @DataDog/apm-core-pyt
 tests/internal/remoteconfig         @DataDog/remote-config @DataDog/apm-core-python
 
 # API SDK
+ddtrace/trace/                                    @DataDog/apm-sdk-api-python
 ddtrace/_trace/                                    @DataDog/apm-sdk-api-python
 ddtrace/opentelemetry/                             @DataDog/apm-sdk-api-python
 ddtrace/internal/opentelemetry                     @DataDog/apm-sdk-api-python
 ddtrace/opentracer/                                @DataDog/apm-sdk-api-python
 ddtrace/propagation/                               @DataDog/apm-sdk-api-python
 ddtrace/filters.py                                 @DataDog/apm-sdk-api-python
+ddtrace/provider.py                                @DataDog/apm-sdk-api-python
 ddtrace/pin.py                                     @DataDog/apm-sdk-api-python
 ddtrace/sampler.py                                 @DataDog/apm-sdk-api-python
 ddtrace/sampling_rule.py                           @DataDog/apm-sdk-api-python
diff --git a/ddtrace/provider.py b/ddtrace/provider.py
index 4a275ece8c8..7b9867de01a 100644
--- a/ddtrace/provider.py
+++ b/ddtrace/provider.py
@@ -7,7 +7,8 @@
 
 
 deprecate(
-    "The context provider interface is deprecated.",
-    message="The trace context is an internal interface and will no longer be supported.",
+    "The context provider interface is deprecated",
+    message="Import BaseContextProvider from `ddtrace.trace` instead.",
     category=DDTraceDeprecationWarning,
+    removal_version="3.0.0",
 )
diff --git a/ddtrace/trace/__init__.py b/ddtrace/trace/__init__.py
index f709310d589..8473c0b99f0 100644
--- a/ddtrace/trace/__init__.py
+++ b/ddtrace/trace/__init__.py
@@ -1,6 +1,7 @@
 from ddtrace._trace.context import Context
 from ddtrace._trace.filters import TraceFilter
 from ddtrace._trace.pin import Pin
+from ddtrace._trace.provider import BaseContextProvider
 from ddtrace._trace.span import Span
 from ddtrace._trace.tracer import Tracer
 
@@ -15,4 +16,5 @@
     "Tracer",
     "Span",
     "tracer",
+    "BaseContextProvider",
 ]
diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst
index 05879acac37..392e1406fb7 100644
--- a/docs/advanced_usage.rst
+++ b/docs/advanced_usage.rst
@@ -221,7 +221,7 @@ context management.
 
 If there is a case where the default is insufficient then a custom context
 provider can be used. It must implement the
-:class:`ddtrace.provider.BaseContextProvider` interface and can be configured
+:class:`ddtrace.trace.BaseContextProvider` interface and can be configured
 with::
 
     tracer.configure(context_provider=MyContextProvider)
diff --git a/releasenotes/notes/feat-expose-base-context-provider-530ebec2225f6c8d.yaml b/releasenotes/notes/feat-expose-base-context-provider-530ebec2225f6c8d.yaml
new file mode 100644
index 00000000000..010d2bf52bd
--- /dev/null
+++ b/releasenotes/notes/feat-expose-base-context-provider-530ebec2225f6c8d.yaml
@@ -0,0 +1,5 @@
+---
+deprecations:
+  - |
+    tracing: Moves ``ddtrace.provider.BaseContextProvider`` to ``ddtrace.trace.BaseContextProvider``. 
+    The ``ddtrace.provider`` module is deprecated and will be removed in v3.0.0.

From af9098c4343831d4b8771fe5debd6f99d021eac6 Mon Sep 17 00:00:00 2001
From: Laplie Anderson <randomanderson@users.noreply.github.com>
Date: Thu, 30 Jan 2025 05:34:25 -0500
Subject: [PATCH 32/33] chore(ci): skip non-linux OCI package creation (#12036)

The shared pipeline is introducing support for windows OCI packages.
This PR ensure dd-trace-py doesn't generate nonsensical packages.
Windows support can be added later.

This was tested by using the WIP branch of one-pipeline

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Co-authored-by: Brett Langdon <brett.langdon@datadoghq.com>
---
 .gitlab/prepare-oci-package.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitlab/prepare-oci-package.sh b/.gitlab/prepare-oci-package.sh
index 5958c31e731..7ee7b7d6e77 100755
--- a/.gitlab/prepare-oci-package.sh
+++ b/.gitlab/prepare-oci-package.sh
@@ -1,6 +1,11 @@
 #!/bin/bash
 set -eo pipefail
 
+if [ "$OS" != "linux" ]; then
+  echo "Only linux packages are supported. Exiting"
+  exit 0
+fi
+
 if [ -n "$CI_COMMIT_TAG" ] && [ -z "$PYTHON_PACKAGE_VERSION" ]; then
   PYTHON_PACKAGE_VERSION=${CI_COMMIT_TAG##v}
 fi

From e73d001bf9a8c18aff6e696dcd41b66af9bd5d16 Mon Sep 17 00:00:00 2001
From: lievan <evan.li@datadoqhq.com>
Date: Mon, 3 Feb 2025 14:46:39 -0500
Subject: [PATCH 33/33] gate behind flag

---
 ddtrace/llmobs/decorators.py           | 25 +++++++++++++++----------
 ddtrace/settings/config.py             |  1 +
 tests/llmobs/test_llmobs_decorators.py | 13 ++++++++++---
 tests/utils.py                         |  1 +
 4 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/ddtrace/llmobs/decorators.py b/ddtrace/llmobs/decorators.py
index 6f792b75054..a30100db4ec 100644
--- a/ddtrace/llmobs/decorators.py
+++ b/ddtrace/llmobs/decorators.py
@@ -5,6 +5,7 @@
 from typing import Callable
 from typing import Optional
 
+from ddtrace import config
 from ddtrace.internal.compat import iscoroutinefunction
 from ddtrace.internal.compat import isgeneratorfunction
 from ddtrace.internal.logger import get_logger
@@ -139,12 +140,14 @@ def wrapper(*args, **kwargs):
                         session_id=session_id,
                         ml_app=ml_app,
                     ) as span:
-                        for arg in args:
-                            LLMObs._instance._record_object(span, arg, "input")
-                        for arg in kwargs.values():
-                            LLMObs._instance._record_object(span, arg, "input")
+                        if config._llmobs_auto_span_linking_enabled:
+                            for arg in args:
+                                LLMObs._instance._record_object(span, arg, "input")
+                            for arg in kwargs.values():
+                                LLMObs._instance._record_object(span, arg, "input")
                         ret = func(*args, **kwargs)
-                        LLMObs._instance._record_object(span, ret, "output")
+                        if config._llmobs_auto_span_linking_enabled:
+                            LLMObs._instance._record_object(span, ret, "output")
                         return ret
 
             return generator_wrapper if (isgeneratorfunction(func) or isasyncgenfunction(func)) else wrapper
@@ -237,10 +240,11 @@ def wrapper(*args, **kwargs):
                     _, span_name = _get_llmobs_span_options(name, None, func)
                     traced_operation = getattr(LLMObs, operation_kind, LLMObs.workflow)
                     with traced_operation(name=span_name, session_id=session_id, ml_app=ml_app) as span:
-                        for arg in args:
-                            LLMObs._instance._record_object(span, arg, "input")
-                        for arg in kwargs.values():
-                            LLMObs._instance._record_object(span, arg, "input")
+                        if config._llmobs_auto_span_linking_enabled:
+                            for arg in args:
+                                LLMObs._instance._record_object(span, arg, "input")
+                            for arg in kwargs.values():
+                                LLMObs._instance._record_object(span, arg, "input")
                         func_signature = signature(func)
                         bound_args = func_signature.bind_partial(*args, **kwargs)
                         if _automatic_io_annotation and bound_args.arguments:
@@ -253,7 +257,8 @@ def wrapper(*args, **kwargs):
                             and span._get_ctx_item(OUTPUT_VALUE) is None
                         ):
                             LLMObs.annotate(span=span, output_data=resp)
-                        LLMObs._instance._record_object(span, resp, "output")
+                        if config._llmobs_auto_span_linking_enabled:
+                            LLMObs._instance._record_object(span, resp, "output")
                         return resp
 
             return generator_wrapper if (isgeneratorfunction(func) or isasyncgenfunction(func)) else wrapper
diff --git a/ddtrace/settings/config.py b/ddtrace/settings/config.py
index 81ed7a3ab19..a4da024ca9b 100644
--- a/ddtrace/settings/config.py
+++ b/ddtrace/settings/config.py
@@ -644,6 +644,7 @@ def __init__(self):
         self._llmobs_sample_rate = _get_config("DD_LLMOBS_SAMPLE_RATE", 1.0, float)
         self._llmobs_ml_app = _get_config("DD_LLMOBS_ML_APP")
         self._llmobs_agentless_enabled = _get_config("DD_LLMOBS_AGENTLESS_ENABLED", False, asbool)
+        self._llmobs_auto_span_linking_enabled = _get_config("_DD_LLMOBS_AUTO_SPAN_LINKING_ENABLED", False, asbool)
 
         self._inject_force = _get_config("DD_INJECT_FORCE", False, asbool)
         self._lib_was_injected = False
diff --git a/tests/llmobs/test_llmobs_decorators.py b/tests/llmobs/test_llmobs_decorators.py
index 8aa46d43717..a49439619b1 100644
--- a/tests/llmobs/test_llmobs_decorators.py
+++ b/tests/llmobs/test_llmobs_decorators.py
@@ -12,6 +12,13 @@
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
 from tests.llmobs._utils import _expected_llmobs_non_llm_span_event
 from tests.llmobs._utils import _expected_span_link
+from tests.utils import override_global_config
+
+
+@pytest.fixture
+def auto_linking_enabled():
+    with override_global_config(dict(_llmobs_auto_span_linking_enabled=True)):
+        yield
 
 
 @pytest.fixture
@@ -831,7 +838,7 @@ def get_next_element(alist):
     )
 
 
-def test_decorator_records_span_links(llmobs, llmobs_events):
+def test_decorator_records_span_links(llmobs, llmobs_events, auto_linking_enabled):
     @workflow
     def one(inp):
         return 1
@@ -852,7 +859,7 @@ def two(inp):
     assert two_span["span_links"][1] == _expected_span_link(one_span, "output", "output")
 
 
-def test_decorator_records_span_links_for_multi_input_functions(llmobs, llmobs_events):
+def test_decorator_records_span_links_for_multi_input_functions(llmobs, llmobs_events, auto_linking_enabled):
     @agent
     def some_agent(a, b):
         pass
@@ -879,7 +886,7 @@ def two():
     assert three_span["span_links"][1] == _expected_span_link(two_span, "output", "input")
 
 
-def test_decorator_records_span_links_via_kwargs(llmobs, llmobs_events):
+def test_decorator_records_span_links_via_kwargs(llmobs, llmobs_events, auto_linking_enabled):
     @agent
     def some_agent(a=None, b=None):
         pass
diff --git a/tests/utils.py b/tests/utils.py
index 1932033152f..4320bebebf1 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -161,6 +161,7 @@ def override_global_config(values):
         "_llmobs_sample_rate",
         "_llmobs_ml_app",
         "_llmobs_agentless_enabled",
+        "_llmobs_auto_span_linking_enabled",
         "_data_streams_enabled",
     ]