Skip to content

Commit

Permalink
Improve D-Bus timeout error handling (#5664)
Browse files Browse the repository at this point in the history
* Improve D-Bus timeout error handling

Typically D-Bus timeouts are related to systemd activation timing out
after 25s. The current dbus-fast timeout of 10s is well below that
so we never get the actual D-Bus error. This increases the dbus-fast
timeout to 30s, which will make sure we wait long enought to get the
actual D-Bus error from the broker.

Note that this should not slow down a typical system, since we tried
three times each waiting for 10s. With the new error handling typically
we'll end up waiting 25s and then receive the actual D-Bus error. There
is no point in waiting for multiple D-Bus/systemd caused timeouts.

* Create D-Bus TimedOut exception
  • Loading branch information
agners authored Feb 25, 2025
1 parent 644ec45 commit 15e8940
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 11 deletions.
4 changes: 2 additions & 2 deletions supervisor/dbus/udisks2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ async def connect(self, bus: MessageBus):
try:
await super().connect(bus)
await self.udisks2_object_manager.connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to udisks2")
except DBusError as err:
_LOGGER.critical("Can't connect to udisks2: %s", err)
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning(
"No udisks2 support on the host. Host control has been disabled."
Expand Down
6 changes: 5 additions & 1 deletion supervisor/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,11 @@ class DBusParseError(DBusError):


class DBusTimeoutError(DBusError):
"""D-Bus call timed out."""
"""D-Bus call timeout."""


class DBusTimedOutError(DBusError):
"""D-Bus call timed out (typically when systemd D-Bus service activation fail)."""


class DBusNoReplyError(DBusError):
Expand Down
13 changes: 7 additions & 6 deletions supervisor/os/data_disk.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,13 @@ async def load(self) -> None:
await self.sys_dbus.agent.datadisk.reload_device()

# Register for signals on devices added/removed
self.sys_dbus.udisks2.udisks2_object_manager.dbus.object_manager.on_interfaces_added(
self._udisks2_interface_added
)
self.sys_dbus.udisks2.udisks2_object_manager.dbus.object_manager.on_interfaces_removed(
self._udisks2_interface_removed
)
if self.sys_dbus.udisks2.is_connected:
self.sys_dbus.udisks2.udisks2_object_manager.dbus.object_manager.on_interfaces_added(
self._udisks2_interface_added
)
self.sys_dbus.udisks2.udisks2_object_manager.dbus.object_manager.on_interfaces_removed(
self._udisks2_interface_removed
)

@Job(
name="data_disk_migrate",
Expand Down
13 changes: 11 additions & 2 deletions supervisor/utils/dbus.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
DBusObjectError,
DBusParseError,
DBusServiceUnkownError,
DBusTimedOutError,
DBusTimeoutError,
HassioNotSupportedError,
)
Expand Down Expand Up @@ -87,6 +88,8 @@ def from_dbus_error(err: DBusFastDBusError) -> HassioNotSupportedError | DBusErr
return DBusNotConnectedError(err.text)
if err.type == ErrorType.TIMEOUT:
return DBusTimeoutError(err.text)
if err.type == ErrorType.TIMED_OUT:
return DBusTimedOutError(err.text)
if err.type == ErrorType.NO_REPLY:
return DBusNoReplyError(err.text)
return DBusFatalError(err.text, type_=err.type)
Expand Down Expand Up @@ -136,15 +139,21 @@ async def introspect(self) -> Node:
for _ in range(3):
try:
return await self._bus.introspect(
self.bus_name, self.object_path, timeout=10
self.bus_name, self.object_path, timeout=30
)
except InvalidIntrospectionError as err:
raise DBusParseError(
f"Can't parse introspect data: {err}", _LOGGER.error
) from err
except DBusFastDBusError as err:
raise DBus.from_dbus_error(err) from None
except (EOFError, TimeoutError):
except TimeoutError:
# The systemd D-Bus activate service has a timeout of 25s, which will raise. We should
# not end up here unless the D-Bus broker is majorly overwhelmed.
_LOGGER.critical(
"Timeout connecting to %s - %s", self.bus_name, self.object_path
)
except EOFError:
_LOGGER.warning(
"Busy system at %s - %s", self.bus_name, self.object_path
)
Expand Down

0 comments on commit 15e8940

Please sign in to comment.