Skip to content

Commit a0304a0

Browse files
authored
Use LogRecord.getMessage to get OTLP body (#4216)
1 parent 59d6754 commit a0304a0

File tree

3 files changed

+65
-40
lines changed

3 files changed

+65
-40
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
4444
([#4224](https://github.com/open-telemetry/opentelemetry-python/pull/4224))
4545
- Drop `OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION` environment variable
4646
([#4217](https://github.com/open-telemetry/opentelemetry-python/pull/4217))
47+
- Improve compatibility with other logging libraries that override
48+
`LogRecord.getMessage()` in order to customize message formatting
49+
([#4216](https://github.com/open-telemetry/opentelemetry-python/pull/4216))
4750

4851
## Version 1.27.0/0.48b0 (2024-08-28)
4952

opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py

+16-40
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ def force_flush(self, timeout_millis: int = 30000) -> bool:
438438
"exc_text",
439439
"filename",
440440
"funcName",
441+
"getMessage",
441442
"message",
442443
"levelname",
443444
"levelno",
@@ -503,51 +504,26 @@ def _translate(self, record: logging.LogRecord) -> LogRecord:
503504
observered_timestamp = time_ns()
504505
span_context = get_current_span().get_span_context()
505506
attributes = self._get_attributes(record)
506-
# This comment is taken from GanyedeNil's PR #3343, I have redacted it
507-
# slightly for clarity:
508-
# According to the definition of the Body field type in the
509-
# OTel 1.22.0 Logs Data Model article, the Body field should be of
510-
# type 'any' and should not use the str method to directly translate
511-
# the msg. This is because str only converts non-text types into a
512-
# human-readable form, rather than a standard format, which leads to
513-
# the need for additional operations when collected through a log
514-
# collector.
515-
# Considering that he Body field should be of type 'any' and should not
516-
# use the str method but record.msg is also a string type, then the
517-
# difference is just the self.args formatting?
518-
# The primary consideration depends on the ultimate purpose of the log.
519-
# Converting the default log directly into a string is acceptable as it
520-
# will be required to be presented in a more readable format. However,
521-
# this approach might not be as "standard" when hoping to aggregate
522-
# logs and perform subsequent data analysis. In the context of log
523-
# extraction, it would be more appropriate for the msg to be
524-
# converted into JSON format or remain unchanged, as it will eventually
525-
# be transformed into JSON. If the final output JSON data contains a
526-
# structure that appears similar to JSON but is not, it may confuse
527-
# users. This is particularly true for operation and maintenance
528-
# personnel who need to deal with log data in various languages.
529-
# Where is the JSON converting occur? and what about when the msg
530-
# represents something else but JSON, the expected behavior change?
531-
# For the ConsoleLogExporter, it performs the to_json operation in
532-
# opentelemetry.sdk._logs._internal.export.ConsoleLogExporter.__init__,
533-
# so it can handle any type of input without problems. As for the
534-
# OTLPLogExporter, it also handles any type of input encoding in
535-
# _encode_log located in
536-
# opentelemetry.exporter.otlp.proto.common._internal._log_encoder.
537-
# Therefore, no extra operation is needed to support this change.
538-
# The only thing to consider is the users who have already been using
539-
# this SDK. If they upgrade the SDK after this change, they will need
540-
# to readjust their logging collection rules to adapt to the latest
541-
# output format. Therefore, this change is considered a breaking
542-
# change and needs to be upgraded at an appropriate time.
543507
severity_number = std_to_otel(record.levelno)
544508
if self.formatter:
545509
body = self.format(record)
546510
else:
547-
if isinstance(record.msg, str) and record.args:
548-
body = record.msg % record.args
549-
else:
511+
# `record.getMessage()` uses `record.msg` as a template to format
512+
# `record.args` into. There is a special case in `record.getMessage()`
513+
# where it will only attempt formatting if args are provided,
514+
# otherwise, it just stringifies `record.msg`.
515+
#
516+
# Since the OTLP body field has a type of 'any' and the logging module
517+
# is sometimes used in such a way that objects incorrectly end up
518+
# set as record.msg, in those cases we would like to bypass
519+
# `record.getMessage()` completely and set the body to the object
520+
# itself instead of its string representation.
521+
# For more background, see: https://github.com/open-telemetry/opentelemetry-python/pull/4216
522+
if not record.args and not isinstance(record.msg, str):
523+
# no args are provided so it's *mostly* safe to use the message template as the body
550524
body = record.msg
525+
else:
526+
body = record.getMessage()
551527

552528
# related to https://github.com/open-telemetry/opentelemetry-python/issues/3548
553529
# Severity Text = WARN as defined in https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/logs/data-model.md#displaying-severity.

opentelemetry-sdk/tests/logs/test_export.py

+46
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,52 @@ def test_simple_log_record_processor_different_msg_types(self):
230230
item.instrumentation_scope.name, "different_msg_types"
231231
)
232232

233+
def test_simple_log_record_processor_custom_single_obj(self):
234+
"""
235+
Tests that special-case handling for logging a single non-string object
236+
is correctly applied.
237+
"""
238+
exporter = InMemoryLogExporter()
239+
log_record_processor = BatchLogRecordProcessor(exporter)
240+
241+
provider = LoggerProvider()
242+
provider.add_log_record_processor(log_record_processor)
243+
244+
logger = logging.getLogger("single_obj")
245+
logger.addHandler(LoggingHandler(logger_provider=provider))
246+
247+
# NOTE: the behaviour of `record.getMessage` is detailed in the
248+
# `logging.Logger.debug` documentation:
249+
# > The msg is the message format string, and the args are the arguments
250+
# > which are merged into msg using the string formatting operator. [...]
251+
# > No % formatting operation is performed on msg when no args are supplied.
252+
253+
# This test uses the presence of '%s' in the first arg to determine if
254+
# formatting was applied
255+
256+
# string msg with no args - getMessage bypasses formatting and sets the string directly
257+
logger.warning("a string with a percent-s: %s")
258+
# string msg with args - getMessage formats args into the msg
259+
logger.warning("a string with a percent-s: %s", "and arg")
260+
# non-string msg with args - getMessage stringifies msg and formats args into it
261+
logger.warning(["a non-string with a percent-s", "%s"], "and arg")
262+
# non-string msg with no args:
263+
# - normally getMessage would stringify the object and bypass formatting
264+
# - SPECIAL CASE: bypass stringification as well to keep the raw object
265+
logger.warning(["a non-string with a percent-s", "%s"])
266+
log_record_processor.shutdown()
267+
268+
finished_logs = exporter.get_finished_logs()
269+
expected = [
270+
("a string with a percent-s: %s"),
271+
("a string with a percent-s: and arg"),
272+
("['a non-string with a percent-s', 'and arg']"),
273+
(["a non-string with a percent-s", "%s"]),
274+
]
275+
for emitted, expected in zip(finished_logs, expected):
276+
self.assertEqual(emitted.log_record.body, expected)
277+
self.assertEqual(emitted.instrumentation_scope.name, "single_obj")
278+
233279
def test_simple_log_record_processor_different_msg_types_with_formatter(
234280
self,
235281
):

0 commit comments

Comments
 (0)