Skip to content

Commit 57bc772

Browse files
committed
add cycle detection
1 parent 48258c3 commit 57bc772

File tree

2 files changed

+36
-5
lines changed

2 files changed

+36
-5
lines changed

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int:
103103
Estimates the size in bytes of a log by calculating the size of its body and its attributes
104104
and adding a buffer amount to account for other log metadata information.
105105
Will process complex log structures up to the specified depth limit.
106-
If the depth limit of the log structure is exceeded, returns the truncated calculation
106+
Includes cycle detection to prevent processing the same complex log content (Maps, Arrays)
107+
more than once. If the depth limit of the log structure is exceeded, returns the truncated calculation
107108
to everything up to that point.
108109
109110
Args:
@@ -114,9 +115,13 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int:
114115
int: The estimated size of the log object in bytes
115116
"""
116117

117-
# Use a queue to prevent excessive recursive calls.
118-
# We calculate based on the size of the log record body and attributes for the log.
119-
queue: List[tuple[AnyValue, int]] = [(log.log_record.body, 0), (log.log_record.attributes, -1)]
118+
# Queue is a list of (log_content, depth) where:
119+
# log_content is the current piece of log data being processed
120+
# depth tracks how many levels deep we've traversed to reach this data
121+
queue = [(log.log_record.body, 0), (log.log_record.attributes, -1)]
122+
123+
# Track visited objects to avoid calculating the same complex log content more than once
124+
visited = set()
120125

121126
size: int = self._BASE_LOG_BUFFER_BYTE_SIZE
122127

@@ -130,6 +135,9 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int:
130135

131136
next_val, current_depth = data
132137

138+
if not next_val:
139+
continue
140+
133141
if isinstance(next_val, (str, bytes)):
134142
size += len(next_val)
135143
continue
@@ -142,7 +150,14 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int:
142150
size += len(str(next_val))
143151
continue
144152

153+
# next_val must be Sequence["AnyValue"] or Mapping[str, "AnyValue"],
145154
if current_depth <= depth:
155+
# Guaranteed to be unique, see: https://www.w3schools.com/python/ref_func_id.asp
156+
obj_id = id(next_val)
157+
if obj_id in visited:
158+
continue
159+
visited.add(obj_id)
160+
146161
if isinstance(next_val, Sequence):
147162
for content in next_val:
148163
new_queue.append((cast(AnyValue, content), current_depth + 1))

aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,23 @@ def test_process_log_data_primitive(self):
114114

115115
self.assertEqual(actual_size, expected_size)
116116

117+
def test_process_log_data_with_cycle(self):
118+
"""Test that processor handles processing logs with circular references only once"""
119+
cyclic_dict: dict = {"data": "test"}
120+
cyclic_dict["self_ref"] = cyclic_dict
121+
122+
log = self.generate_test_log_data(
123+
log_body=cyclic_dict,
124+
attr_key="",
125+
attr_val="",
126+
log_body_depth=-1,
127+
attr_depth=-1,
128+
count=1,
129+
)
130+
expected_size = self.base_log_size + len("data") + len("self_ref") + len("test")
131+
actual_size = self.processor._estimate_log_size(log[0])
132+
self.assertEqual(actual_size, expected_size)
133+
117134
@patch(
118135
"amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach",
119136
return_value=MagicMock(),
@@ -236,7 +253,6 @@ def generate_test_log_data(
236253
attr_depth=3,
237254
count=5,
238255
create_map=True,
239-
instrumentation_scope=InstrumentationScope("test-scope", "1.0.0"),
240256
) -> List[LogData]:
241257

242258
def generate_nested_value(depth, value, create_map=True) -> AnyValue:

0 commit comments

Comments
 (0)