Skip to content

Commit dcd9453

Browse files
authored
Sanitize peer-supplied mDNS labels in discover CLI (#1689)
1 parent 533d608 commit dcd9453

9 files changed

Lines changed: 173 additions & 39 deletions

File tree

aioesphomeapi/_frame_helper/base.pxd

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22
import cython
33

4+
from .._sanitize cimport safe_label_str
45
from ..connection cimport APIConnection
56

67

@@ -10,8 +11,6 @@ cdef int _MAX_NAME_LEN
1011
cdef int _MAX_MAC_LEN
1112
cdef int _MAX_EXPLANATION_LEN
1213

13-
cpdef str safe_label_str(str raw, int limit)
14-
1514
cdef class APIFrameHelper:
1615

1716
cdef object _loop

aioesphomeapi/_frame_helper/base.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import logging
77
from typing import TYPE_CHECKING, cast
88

9+
from .._sanitize import MAX_EXPLANATION_LEN, MAX_MAC_LEN, MAX_NAME_LEN, safe_label_str
910
from ..core import SocketClosedAPIError
1011

1112
if TYPE_CHECKING:
@@ -25,25 +26,20 @@
2526
_bytes = bytes
2627

2728

28-
# Caps match the firmware's actual wire-format limits:
29-
# - name: ESPHOME_DEVICE_NAME_MAX_LEN = 31 (validate_hostname in core/config.py)
30-
# - mac: MAC_ADDRESS_BUFFER_SIZE - 1 = 12 (lowercase hex, no separator)
31-
# - explanation: 32-byte handshake-reject buffer minus the 1-byte failure code
32-
# A small extra margin on each lets benign forward-compat tweaks (e.g. firmware
33-
# bumping the max name length by a few chars) through without breaking clients.
34-
# MAX_* are the Python-importable forms (used by tests + connection.py); _MAX_*
35-
# are the cdef int aliases declared in base.pxd for hot-path C comparisons.
36-
MAX_NAME_LEN = 32
37-
MAX_MAC_LEN = 16
38-
MAX_EXPLANATION_LEN = 64
29+
# _MAX_* are the cdef int aliases declared in base.pxd for hot-path C
30+
# comparisons; the Python-importable MAX_* names live in aioesphomeapi._sanitize
31+
# and are re-exported here so existing `from .base import MAX_NAME_LEN` callers
32+
# keep working unchanged.
3933
_MAX_NAME_LEN = MAX_NAME_LEN
4034
_MAX_MAC_LEN = MAX_MAC_LEN
4135
_MAX_EXPLANATION_LEN = MAX_EXPLANATION_LEN
4236

43-
44-
def safe_label_str(raw: str, limit: _int) -> str:
45-
"""Strip non-printables and length-cap a peer-supplied label for log output."""
46-
return "".join(filter(str.isprintable, raw))[:limit]
37+
__all__ = (
38+
"MAX_EXPLANATION_LEN",
39+
"MAX_MAC_LEN",
40+
"MAX_NAME_LEN",
41+
"safe_label_str",
42+
)
4743

4844

4945
class APIFrameHelper:

aioesphomeapi/_frame_helper/noise.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import cython
22

3+
from .._sanitize cimport safe_label_str
34
from ..connection cimport APIConnection
45
from .base cimport (
56
APIFrameHelper,
67
_MAX_EXPLANATION_LEN,
78
_MAX_MAC_LEN,
89
_MAX_NAME_LEN,
9-
safe_label_str,
1010
)
1111
from .noise_encryption cimport EncryptCipher, DecryptCipher
1212
from .packets cimport make_noise_packets

aioesphomeapi/_frame_helper/noise.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from cryptography.exceptions import InvalidTag
99
from noise.connection import NoiseConnection
1010

11+
from .._sanitize import MAX_EXPLANATION_LEN, MAX_MAC_LEN, MAX_NAME_LEN, safe_label_str
1112
from ..core import (
1213
APIConnectionError,
1314
BadMACAddressAPIError,
@@ -19,14 +20,7 @@
1920
InvalidEncryptionKeyAPIError,
2021
ProtocolAPIError,
2122
)
22-
from .base import (
23-
_LOGGER,
24-
MAX_EXPLANATION_LEN,
25-
MAX_MAC_LEN,
26-
MAX_NAME_LEN,
27-
APIFrameHelper,
28-
safe_label_str,
29-
)
23+
from .base import _LOGGER, APIFrameHelper
3024
from .noise_encryption import ESPHOME_NOISE_BACKEND, DecryptCipher, EncryptCipher
3125
from .packets import make_noise_packets
3226

@@ -51,9 +45,9 @@
5145

5246
int_ = int
5347

54-
# Cython resolves _MAX_* and safe_label_str via cimport from .base
55-
# (noise.pxd); these assignments are the pure-Python (SKIP_CYTHON=1) fallback
56-
# so callers below have a name to resolve.
48+
# Cython resolves _MAX_* via cimport from .base and safe_label_str via cimport
49+
# from .._sanitize (noise.pxd); these assignments are the pure-Python
50+
# (SKIP_CYTHON=1) fallback so callers below have a name to resolve.
5751
_MAX_NAME_LEN = MAX_NAME_LEN
5852
_MAX_MAC_LEN = MAX_MAC_LEN
5953
_MAX_EXPLANATION_LEN = MAX_EXPLANATION_LEN

aioesphomeapi/_sanitize.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
cpdef str safe_label_str(str raw, int limit)

aioesphomeapi/_sanitize.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""Sanitize peer-supplied labels before logging or printing them.
2+
3+
Shared by the noise hello / handshake-reject paths and the discover CLI;
4+
strips non-printable characters and length-caps the result so a hostile
5+
remote can't inject ANSI escapes, newlines or oversized values into the
6+
operator's terminal or logs.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
# Caps match the firmware's actual wire-format limits:
12+
# - name: ESPHOME_DEVICE_NAME_MAX_LEN = 31 (validate_hostname in core/config.py)
13+
# - mac: MAC_ADDRESS_BUFFER_SIZE - 1 = 12 (lowercase hex, no separator)
14+
# - explanation: 32-byte handshake-reject buffer minus the 1-byte failure code
15+
# A small extra margin on each lets benign forward-compat tweaks (e.g. firmware
16+
# bumping the max name length by a few chars) through without breaking clients.
17+
MAX_NAME_LEN = 32
18+
MAX_MAC_LEN = 16
19+
MAX_EXPLANATION_LEN = 64
20+
21+
__all__ = (
22+
"MAX_EXPLANATION_LEN",
23+
"MAX_MAC_LEN",
24+
"MAX_NAME_LEN",
25+
"safe_label_str",
26+
)
27+
28+
29+
# Alias so the `limit` annotation below isn't interpreted as a C-int type
30+
# declaration by Cython — the .pxd already declares `int limit` and a bare
31+
# `int` annotation would clash with it ("Function signature does not match
32+
# previous declaration"). Mirrors the same workaround in _frame_helper/base.py.
33+
_int = int
34+
35+
36+
def safe_label_str(raw: str, limit: _int) -> str:
37+
"""Strip non-printables and length-cap a peer-supplied label for log output."""
38+
return "".join(filter(str.isprintable, raw))[:limit]

aioesphomeapi/discover.py

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,44 @@
66
import asyncio
77
import contextlib
88
import logging
9+
import re
910
import sys
1011

1112
from zeroconf import IPVersion, ServiceStateChange, Zeroconf
1213
from zeroconf.asyncio import AsyncServiceBrowser, AsyncServiceInfo, AsyncZeroconf
1314

15+
from ._sanitize import safe_label_str
16+
1417
FORMAT = "{: <7}|{: <32}|{: <15}|{: <12}|{: <16}|{: <10}|{: <32}"
1518
COLUMN_NAMES = ("Status", "Name", "Address", "MAC", "Version", "Platform", "Board")
1619
UNKNOWN = "unknown"
1720

18-
19-
def decode_bytes_or_unknown(data: str | bytes | None) -> str:
20-
"""Decode bytes or return unknown."""
21+
# Per-column display caps for peer-supplied mDNS labels, derived from the
22+
# FORMAT widths so a hostile broadcaster can't widen a column by stuffing a
23+
# long value; deriving them from FORMAT keeps the caps in lock-step if the
24+
# table layout is ever retuned.
25+
_COLUMN_WIDTHS = tuple(int(w) for w in re.findall(r"<\s*(\d+)", FORMAT))
26+
assert len(_COLUMN_WIDTHS) == len(COLUMN_NAMES), (
27+
"FORMAT width count must match COLUMN_NAMES; update one and the other together"
28+
)
29+
_MAX_NAME_DISPLAY = _COLUMN_WIDTHS[COLUMN_NAMES.index("Name")]
30+
_MAX_MAC_DISPLAY = _COLUMN_WIDTHS[COLUMN_NAMES.index("MAC")]
31+
_MAX_VERSION_DISPLAY = _COLUMN_WIDTHS[COLUMN_NAMES.index("Version")]
32+
_MAX_PLATFORM_DISPLAY = _COLUMN_WIDTHS[COLUMN_NAMES.index("Platform")]
33+
_MAX_BOARD_DISPLAY = _COLUMN_WIDTHS[COLUMN_NAMES.index("Board")]
34+
35+
36+
def decode_mdns_label_or_unknown(
37+
data: str | bytes | None, limit: int = _MAX_NAME_DISPLAY
38+
) -> str:
39+
"""Decode peer-supplied mDNS bytes, strip non-printables, length-cap."""
2140
if data is None:
2241
return UNKNOWN
2342
if isinstance(data, bytes):
24-
return data.decode()
25-
return data
43+
# A device on the LAN can broadcast arbitrary bytes; use "replace" so
44+
# a malformed UTF-8 payload doesn't raise out of the zeroconf callback.
45+
data = data.decode("utf-8", "replace")
46+
return safe_label_str(data, limit)
2647

2748

2849
def async_service_update(
@@ -32,15 +53,22 @@ def async_service_update(
3253
state_change: ServiceStateChange,
3354
) -> None:
3455
"""Service state changed."""
35-
short_name = name.partition(".")[0]
56+
# The mDNS service name is peer-controlled — sanitize before printing so
57+
# a hostile broadcaster can't inject ANSI escapes / newlines / null bytes
58+
# into the terminal.
59+
short_name = safe_label_str(name.partition(".")[0], _MAX_NAME_DISPLAY)
3660
state = "OFFLINE" if state_change is ServiceStateChange.Removed else "ONLINE"
3761
info = AsyncServiceInfo(service_type, name)
3862
info.load_from_cache(zeroconf)
3963
properties = info.properties
40-
mac = decode_bytes_or_unknown(properties.get(b"mac"))
41-
version = decode_bytes_or_unknown(properties.get(b"version"))
42-
platform = decode_bytes_or_unknown(properties.get(b"platform"))
43-
board = decode_bytes_or_unknown(properties.get(b"board"))
64+
mac = decode_mdns_label_or_unknown(properties.get(b"mac"), _MAX_MAC_DISPLAY)
65+
version = decode_mdns_label_or_unknown(
66+
properties.get(b"version"), _MAX_VERSION_DISPLAY
67+
)
68+
platform = decode_mdns_label_or_unknown(
69+
properties.get(b"platform"), _MAX_PLATFORM_DISPLAY
70+
)
71+
board = decode_mdns_label_or_unknown(properties.get(b"board"), _MAX_BOARD_DISPLAY)
4472
address = ""
4573
if addresses := info.ip_addresses_by_version(IPVersion.V4Only):
4674
address = str(addresses[0])

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from distutils.core import Extension
1616

1717
TO_CYTHONIZE = [
18+
"aioesphomeapi/_sanitize.py",
1819
"aioesphomeapi/client_base.py",
1920
"aioesphomeapi/connection.py",
2021
"aioesphomeapi/_frame_helper/base.py",

tests/test_discover.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from __future__ import annotations
2+
3+
import re
4+
5+
from aioesphomeapi.discover import (
6+
_MAX_BOARD_DISPLAY,
7+
_MAX_MAC_DISPLAY,
8+
_MAX_NAME_DISPLAY,
9+
_MAX_PLATFORM_DISPLAY,
10+
_MAX_VERSION_DISPLAY,
11+
COLUMN_NAMES,
12+
FORMAT,
13+
UNKNOWN,
14+
decode_mdns_label_or_unknown,
15+
)
16+
17+
18+
def test_decode_mdns_label_or_unknown_none() -> None:
19+
assert decode_mdns_label_or_unknown(None) == UNKNOWN
20+
21+
22+
def test_decode_mdns_label_or_unknown_str_passthrough() -> None:
23+
assert decode_mdns_label_or_unknown("esp32-board") == "esp32-board"
24+
25+
26+
def test_decode_mdns_label_or_unknown_bytes_utf8() -> None:
27+
assert decode_mdns_label_or_unknown(b"esp32-board") == "esp32-board"
28+
29+
30+
def test_decode_mdns_label_or_unknown_invalid_utf8_replaces() -> None:
31+
# Hostile mDNS broadcaster sends non-UTF-8 bytes; result is the U+FFFD
32+
# replacement character (one per invalid byte), never raises
33+
# UnicodeDecodeError. Pinning the actual output, not just the type, keeps
34+
# a future refactor from silently switching to UNKNOWN or empty string.
35+
assert decode_mdns_label_or_unknown(b"\xff\xfe") == "\ufffd\ufffd"
36+
37+
38+
def test_decode_mdns_label_or_unknown_strips_control_chars() -> None:
39+
# Strip the ESC byte that activates ANSI sequences, plus newline / CR /
40+
# null / tab / etc. The trailing "[2J" is harmless printable text once
41+
# the leading ESC is gone, so a hostile broadcaster can no longer clear
42+
# the user's terminal from a discovery scan.
43+
assert decode_mdns_label_or_unknown(b"\x1b[2Jvers\n1.0") == "[2Jvers1.0"
44+
assert decode_mdns_label_or_unknown(b"line1\r\nline2") == "line1line2"
45+
assert decode_mdns_label_or_unknown(b"col\tumn") == "column"
46+
47+
48+
def test_decode_mdns_label_or_unknown_strips_null_byte() -> None:
49+
assert decode_mdns_label_or_unknown(b"esp\x0032") == "esp32"
50+
51+
52+
def test_decode_mdns_label_or_unknown_caps_length() -> None:
53+
assert decode_mdns_label_or_unknown(b"x" * 200, limit=10) == "x" * 10
54+
55+
56+
def test_decode_mdns_label_or_unknown_default_limit_caps_long_str() -> None:
57+
# Default cap is the Name column width from FORMAT.
58+
assert len(decode_mdns_label_or_unknown("a" * 100)) == _MAX_NAME_DISPLAY
59+
60+
61+
def test_decode_mdns_label_or_unknown_unicode_printable_survives() -> None:
62+
# safe_label_str uses str.isprintable so non-ASCII printable chars stay.
63+
assert decode_mdns_label_or_unknown("café") == "café"
64+
65+
66+
def test_per_column_caps_match_format_widths() -> None:
67+
# The per-column caps must equal the FORMAT widths so a peer-controlled
68+
# value can never widen a column past its slot. If FORMAT changes and
69+
# this assertion fires, update the cap derivation in discover.py — do
70+
# not just bump the expected values.
71+
widths = tuple(int(w) for w in re.findall(r"<\s*(\d+)", FORMAT))
72+
assert widths[COLUMN_NAMES.index("Name")] == _MAX_NAME_DISPLAY
73+
assert widths[COLUMN_NAMES.index("MAC")] == _MAX_MAC_DISPLAY
74+
assert widths[COLUMN_NAMES.index("Version")] == _MAX_VERSION_DISPLAY
75+
assert widths[COLUMN_NAMES.index("Platform")] == _MAX_PLATFORM_DISPLAY
76+
assert widths[COLUMN_NAMES.index("Board")] == _MAX_BOARD_DISPLAY

0 commit comments

Comments
 (0)