Skip to content

Commit 7ff4fa7

Browse files
lievanlievan
andcommitted
fix(llmobs): don't enqueue spans to an inactive evaluator runner (#12150)
This fix resolves an issue where spans were being enqueued to the buffer of a **inactive** evaluator runner, which caused noisy warning logs related to the evaluator runner's buffer being full - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: lievan <[email protected]>
1 parent aa1fbaa commit 7ff4fa7

File tree

4 files changed

+34
-18
lines changed

4 files changed

+34
-18
lines changed

ddtrace/llmobs/_evaluators/runner.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from ddtrace.internal import forksafe
77
from ddtrace.internal.logger import get_logger
88
from ddtrace.internal.periodic import PeriodicService
9+
from ddtrace.internal.service import ServiceStatus
910
from ddtrace.internal.telemetry import telemetry_writer
1011
from ddtrace.internal.telemetry.constants import TELEMETRY_APM_PRODUCT
1112
from ddtrace.llmobs._evaluators.ragas.faithfulness import RagasFaithfulnessEvaluator
@@ -70,7 +71,7 @@ def start(self, *args, **kwargs):
7071
logger.debug("no evaluators configured, not starting %r", self.__class__.__name__)
7172
return
7273
super(EvaluatorRunner, self).start()
73-
logger.debug("started %r to %r", self.__class__.__name__)
74+
logger.debug("started %r", self.__class__.__name__)
7475

7576
def _stop_service(self) -> None:
7677
"""
@@ -88,6 +89,8 @@ def recreate(self) -> "EvaluatorRunner":
8889
)
8990

9091
def enqueue(self, span_event: Dict, span: Span) -> None:
92+
if self.status == ServiceStatus.STOPPED:
93+
return
9194
with self._lock:
9295
if len(self._buffer) >= self._buffer_limit:
9396
logger.warning(
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
LLM Observability: This fix resolves an issue where spans were being enqueued to an inactive evaluator runner which caused noisy logs
5+
related to the evaluator runner buffer being full.

tests/llmobs/test_llmobs_evaluator_runner.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,39 +18,44 @@
1818
DUMMY_SPAN = Span("dummy_span")
1919

2020

21-
def test_evaluator_runner_start(mock_evaluator_logs):
22-
evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=mock.MagicMock())
23-
evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=mock.MagicMock()))
21+
@pytest.fixture
22+
def active_evaluator_runner(LLMObs):
23+
evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=LLMObs)
24+
evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=LLMObs))
2425
evaluator_runner.start()
25-
mock_evaluator_logs.debug.assert_has_calls([mock.call("started %r to %r", "EvaluatorRunner")])
26+
yield evaluator_runner
27+
2628

29+
def test_evaluator_runner_start(mock_evaluator_logs, active_evaluator_runner):
30+
mock_evaluator_logs.debug.assert_has_calls([mock.call("started %r", "EvaluatorRunner")])
2731

28-
def test_evaluator_runner_buffer_limit(mock_evaluator_logs):
29-
evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=mock.MagicMock())
32+
33+
def test_evaluator_runner_buffer_limit(mock_evaluator_logs, active_evaluator_runner):
3034
for _ in range(1001):
31-
evaluator_runner.enqueue({}, DUMMY_SPAN)
35+
active_evaluator_runner.enqueue({}, DUMMY_SPAN)
3236
mock_evaluator_logs.warning.assert_called_with(
3337
"%r event buffer full (limit is %d), dropping event", "EvaluatorRunner", 1000
3438
)
3539

3640

37-
def test_evaluator_runner_periodic_enqueues_eval_metric(LLMObs, mock_llmobs_eval_metric_writer):
38-
evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=LLMObs)
39-
evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=LLMObs))
40-
evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN)
41-
evaluator_runner.periodic()
41+
def test_evaluator_runner_periodic_enqueues_eval_metric(mock_llmobs_eval_metric_writer, active_evaluator_runner):
42+
active_evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN)
43+
active_evaluator_runner.periodic()
4244
mock_llmobs_eval_metric_writer.enqueue.assert_called_once_with(
4345
_dummy_evaluator_eval_metric_event(span_id="123", trace_id="1234")
4446
)
4547

4648

47-
@pytest.mark.vcr_logs
48-
def test_evaluator_runner_timed_enqueues_eval_metric(LLMObs, mock_llmobs_eval_metric_writer):
49-
evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=LLMObs)
50-
evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=LLMObs))
49+
def test_evaluator_runner_stopped_does_not_enqueue_metric(LLMObs, mock_llmobs_eval_metric_writer):
50+
evaluator_runner = EvaluatorRunner(interval=0.1, llmobs_service=LLMObs)
5151
evaluator_runner.start()
52-
5352
evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN)
53+
assert not evaluator_runner._buffer
54+
assert mock_llmobs_eval_metric_writer.enqueue.call_count == 0
55+
56+
57+
def test_evaluator_runner_timed_enqueues_eval_metric(LLMObs, mock_llmobs_eval_metric_writer, active_evaluator_runner):
58+
active_evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN)
5459

5560
time.sleep(0.1)
5661

tests/llmobs/test_llmobs_service.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,6 +1560,7 @@ def test_llmobs_fork_recreates_and_restarts_eval_metric_writer():
15601560

15611561
def test_llmobs_fork_recreates_and_restarts_evaluator_runner(mock_ragas_evaluator):
15621562
"""Test that forking a process correctly recreates and restarts the EvaluatorRunner."""
1563+
pytest.importorskip("ragas")
15631564
with override_env(dict(_DD_LLMOBS_EVALUATORS="ragas_faithfulness")):
15641565
with mock.patch("ddtrace.llmobs._evaluators.runner.EvaluatorRunner.periodic"):
15651566
llmobs_service.enable(_tracer=DummyTracer(), ml_app="test_app")
@@ -1649,6 +1650,8 @@ def test_llmobs_fork_submit_evaluation(monkeypatch):
16491650
def test_llmobs_fork_evaluator_runner_run(monkeypatch):
16501651
"""Test that forking a process correctly encodes new spans created in each process."""
16511652
monkeypatch.setenv("_DD_LLMOBS_EVALUATOR_INTERVAL", 5.0)
1653+
pytest.importorskip("ragas")
1654+
monkeypatch.setenv("_DD_LLMOBS_EVALUATORS", "ragas_faithfulness")
16521655
with mock.patch("ddtrace.llmobs._evaluators.runner.EvaluatorRunner.periodic"):
16531656
llmobs_service.enable(_tracer=DummyTracer(), ml_app="test_app", api_key="test_api_key")
16541657
pid = os.fork()

0 commit comments

Comments
 (0)