Skip to content

Commit c96f7f5

Browse files
authored
LLO Extraction Optimization (#366)
## What does this pull request do? Refactoring changes to improve performance, especially for spans with few or no LLO attributes, while maintaining the same functionality and behavior. ## Test plan Same test strategy as #361 and #365 By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
1 parent f1c49fe commit c96f7f5

File tree

1 file changed

+113
-34
lines changed
  • aws-opentelemetry-distro/src/amazon/opentelemetry/distro

1 file changed

+113
-34
lines changed

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/llo_handler.py

Lines changed: 113 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ def __init__(self, logger_provider: LoggerProvider):
9090
self._event_logger_provider = EventLoggerProvider(logger_provider=self._logger_provider)
9191
self._event_logger = self._event_logger_provider.get_event_logger("gen_ai.events")
9292

93-
# Patterns for attribute filtering
94-
self._exact_match_patterns = [
93+
# Patterns for attribute filtering - using a set for O(1) lookups
94+
self._exact_match_patterns = {
9595
TRACELOOP_ENTITY_INPUT,
9696
TRACELOOP_ENTITY_OUTPUT,
9797
TRACELOOP_CREW_TASKS_OUTPUT,
@@ -103,7 +103,7 @@ def __init__(self, logger_provider: LoggerProvider):
103103
OPENLIT_AGENT_HUMAN_INPUT,
104104
OPENINFERENCE_INPUT_VALUE,
105105
OPENINFERENCE_OUTPUT_VALUE,
106-
]
106+
}
107107

108108
# Pre-compile regex patterns for better performance
109109
self._regex_patterns = [
@@ -236,6 +236,16 @@ def _emit_llo_attributes(
236236
Returns:
237237
None: Events are emitted via the event logger
238238
"""
239+
# Quick check if we have any LLO attributes before running extractors
240+
has_llo_attrs = False
241+
for key in attributes:
242+
if self._is_llo_attribute(key):
243+
has_llo_attrs = True
244+
break
245+
246+
if not has_llo_attrs:
247+
return
248+
239249
all_events = []
240250
all_events.extend(self._extract_gen_ai_prompt_events(span, attributes, event_timestamp))
241251
all_events.extend(self._extract_gen_ai_completion_events(span, attributes, event_timestamp))
@@ -261,8 +271,19 @@ def _filter_attributes(self, attributes: Dict[str, Any]) -> Dict[str, Any]:
261271
Returns:
262272
Dict[str, Any]: New dictionary with LLO attributes removed
263273
"""
274+
# First check if we need to filter anything
275+
has_llo_attrs = False
276+
for key in attributes:
277+
if self._is_llo_attribute(key):
278+
has_llo_attrs = True
279+
break
280+
281+
# If no LLO attributes found, return the original attributes (no need to copy)
282+
if not has_llo_attrs:
283+
return attributes
284+
285+
# Otherwise, create filtered copy
264286
filtered_attributes = {}
265-
266287
for key, value in attributes.items():
267288
if not self._is_llo_attribute(key):
268289
filtered_attributes[key] = value
@@ -290,12 +311,16 @@ def _is_llo_attribute(self, key: str) -> bool:
290311
Returns:
291312
bool: True if the key matches any LLO pattern, False otherwise
292313
"""
293-
# Check exact matches first (faster)
314+
# Check exact matches first (O(1) lookup in a set)
294315
if key in self._exact_match_patterns:
295316
return True
296317

297318
# Then check regex patterns
298-
return any(pattern.match(key) for pattern in self._regex_patterns)
319+
for pattern in self._regex_patterns:
320+
if pattern.match(key):
321+
return True
322+
323+
return False
299324

300325
def _extract_gen_ai_prompt_events(
301326
self, span: ReadableSpan, attributes: Dict[str, Any], event_timestamp: Optional[int] = None
@@ -321,22 +346,29 @@ def _extract_gen_ai_prompt_events(
321346
Returns:
322347
List[Event]: Events created from prompt attributes
323348
"""
349+
# Quick check if any prompt content attributes exist
350+
if not any(self._prompt_content_pattern.match(key) for key in attributes):
351+
return []
352+
324353
events = []
325354
span_ctx = span.context
326355
gen_ai_system = span.attributes.get("gen_ai.system", "unknown")
327356

328357
# Use helper method to get appropriate timestamp (prompts are inputs)
329358
prompt_timestamp = self._get_timestamp(span, event_timestamp, is_input=True)
330359

360+
# Find all prompt content attributes and their roles
361+
prompt_content_matches = {}
331362
for key, value in attributes.items():
332363
match = self._prompt_content_pattern.match(key)
333-
if not match:
334-
continue
335-
336-
index = match.group(1)
337-
role_key = f"gen_ai.prompt.{index}.role"
338-
role = attributes.get(role_key, "unknown")
339-
364+
if match:
365+
index = match.group(1)
366+
role_key = f"gen_ai.prompt.{index}.role"
367+
role = attributes.get(role_key, "unknown")
368+
prompt_content_matches[index] = (key, value, role)
369+
370+
# Create events for each content+role pair
371+
for index, (key, value, role) in prompt_content_matches.items():
340372
event_attributes = {"gen_ai.system": gen_ai_system, "original_attribute": key}
341373
body = {"content": value, "role": role}
342374

@@ -376,22 +408,29 @@ def _extract_gen_ai_completion_events(
376408
Returns:
377409
List[Event]: Events created from completion attributes
378410
"""
411+
# Quick check if any completion content attributes exist
412+
if not any(self._completion_content_pattern.match(key) for key in attributes):
413+
return []
414+
379415
events = []
380416
span_ctx = span.context
381417
gen_ai_system = span.attributes.get("gen_ai.system", "unknown")
382418

383419
# Use helper method to get appropriate timestamp (completions are outputs)
384420
completion_timestamp = self._get_timestamp(span, event_timestamp, is_input=False)
385421

422+
# Find all completion content attributes and their roles
423+
completion_content_matches = {}
386424
for key, value in attributes.items():
387425
match = self._completion_content_pattern.match(key)
388-
if not match:
389-
continue
390-
391-
index = match.group(1)
392-
role_key = f"gen_ai.completion.{index}.role"
393-
role = attributes.get(role_key, "unknown")
394-
426+
if match:
427+
index = match.group(1)
428+
role_key = f"gen_ai.completion.{index}.role"
429+
role = attributes.get(role_key, "unknown")
430+
completion_content_matches[index] = (key, value, role)
431+
432+
# Create events for each content+role pair
433+
for index, (key, value, role) in completion_content_matches.items():
395434
event_attributes = {"gen_ai.system": gen_ai_system, "original_attribute": key}
396435
body = {"content": value, "role": role}
397436

@@ -437,6 +476,18 @@ def _extract_traceloop_events(
437476
Returns:
438477
List[Event]: Events created from Traceloop attributes
439478
"""
479+
# Define the Traceloop attributes we're looking for
480+
traceloop_keys = {
481+
TRACELOOP_ENTITY_INPUT,
482+
TRACELOOP_ENTITY_OUTPUT,
483+
TRACELOOP_CREW_TASKS_OUTPUT,
484+
TRACELOOP_CREW_RESULT,
485+
}
486+
487+
# Quick check if any Traceloop attributes exist
488+
if not any(key in attributes for key in traceloop_keys):
489+
return []
490+
440491
events = []
441492
span_ctx = span.context
442493
# Use traceloop.entity.name for the gen_ai.system value
@@ -521,6 +572,19 @@ def _extract_openlit_span_event_attributes(
521572
Returns:
522573
List[Event]: Events created from OpenLit attributes
523574
"""
575+
# Define the OpenLit attributes we're looking for
576+
openlit_keys = {
577+
OPENLIT_PROMPT,
578+
OPENLIT_COMPLETION,
579+
OPENLIT_REVISED_PROMPT,
580+
OPENLIT_AGENT_ACTUAL_OUTPUT,
581+
OPENLIT_AGENT_HUMAN_INPUT,
582+
}
583+
584+
# Quick check if any OpenLit attributes exist
585+
if not any(key in attributes for key in openlit_keys):
586+
return []
587+
524588
events = []
525589
span_ctx = span.context
526590
gen_ai_system = span.attributes.get("gen_ai.system", "unknown")
@@ -597,6 +661,17 @@ def _extract_openinference_attributes(
597661
Returns:
598662
List[Event]: Events created from OpenInference attributes
599663
"""
664+
# Define the OpenInference keys/patterns we're looking for
665+
openinference_direct_keys = {OPENINFERENCE_INPUT_VALUE, OPENINFERENCE_OUTPUT_VALUE}
666+
667+
# Quick check if any OpenInference attributes exist
668+
has_direct_attrs = any(key in attributes for key in openinference_direct_keys)
669+
has_input_msgs = any(self._openinference_input_msg_pattern.match(key) for key in attributes)
670+
has_output_msgs = any(self._openinference_output_msg_pattern.match(key) for key in attributes)
671+
672+
if not (has_direct_attrs or has_input_msgs or has_output_msgs):
673+
return []
674+
600675
events = []
601676
span_ctx = span.context
602677
gen_ai_system = span.attributes.get("llm.model_name", "unknown")
@@ -626,15 +701,17 @@ def _extract_openinference_attributes(
626701
events.append(event)
627702

628703
# Process input messages
704+
input_messages = {}
629705
for key, value in attributes.items():
630706
match = self._openinference_input_msg_pattern.match(key)
631-
if not match:
632-
continue
633-
634-
index = match.group(1)
635-
role_key = f"llm.input_messages.{index}.message.role"
636-
role = attributes.get(role_key, ROLE_USER) # Default to user if role not specified
637-
707+
if match:
708+
index = match.group(1)
709+
role_key = f"llm.input_messages.{index}.message.role"
710+
role = attributes.get(role_key, ROLE_USER) # Default to user if role not specified
711+
input_messages[index] = (key, value, role)
712+
713+
# Create events for input messages
714+
for index, (key, value, role) in input_messages.items():
638715
event_attributes = {"gen_ai.system": gen_ai_system, "original_attribute": key}
639716
body = {"content": value, "role": role}
640717

@@ -648,15 +725,17 @@ def _extract_openinference_attributes(
648725
events.append(event)
649726

650727
# Process output messages
728+
output_messages = {}
651729
for key, value in attributes.items():
652730
match = self._openinference_output_msg_pattern.match(key)
653-
if not match:
654-
continue
655-
656-
index = match.group(1)
657-
role_key = f"llm.output_messages.{index}.message.role"
658-
role = attributes.get(role_key, ROLE_ASSISTANT) # Default to assistant if role not specified
659-
731+
if match:
732+
index = match.group(1)
733+
role_key = f"llm.output_messages.{index}.message.role"
734+
role = attributes.get(role_key, ROLE_ASSISTANT) # Default to assistant if role not specified
735+
output_messages[index] = (key, value, role)
736+
737+
# Create events for output messages
738+
for index, (key, value, role) in output_messages.items():
660739
event_attributes = {"gen_ai.system": gen_ai_system, "original_attribute": key}
661740
body = {"content": value, "role": role}
662741

0 commit comments

Comments
 (0)