From 15e8940c7fb489bcc08556c104d7701cefa2fb6f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 25 Feb 2025 17:11:23 +0100 Subject: [PATCH] Improve D-Bus timeout error handling (#5664) * Improve D-Bus timeout error handling Typically D-Bus timeouts are related to systemd activation timing out after 25s. The current dbus-fast timeout of 10s is well below that so we never get the actual D-Bus error. This increases the dbus-fast timeout to 30s, which will make sure we wait long enought to get the actual D-Bus error from the broker. Note that this should not slow down a typical system, since we tried three times each waiting for 10s. With the new error handling typically we'll end up waiting 25s and then receive the actual D-Bus error. There is no point in waiting for multiple D-Bus/systemd caused timeouts. * Create D-Bus TimedOut exception --- supervisor/dbus/udisks2/__init__.py | 4 ++-- supervisor/exceptions.py | 6 +++++- supervisor/os/data_disk.py | 13 +++++++------ supervisor/utils/dbus.py | 13 +++++++++++-- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/supervisor/dbus/udisks2/__init__.py b/supervisor/dbus/udisks2/__init__.py index 076e159d8c6..61ba7539a9a 100644 --- a/supervisor/dbus/udisks2/__init__.py +++ b/supervisor/dbus/udisks2/__init__.py @@ -66,8 +66,8 @@ async def connect(self, bus: MessageBus): try: await super().connect(bus) await self.udisks2_object_manager.connect(bus) - except DBusError: - _LOGGER.warning("Can't connect to udisks2") + except DBusError as err: + _LOGGER.critical("Can't connect to udisks2: %s", err) except (DBusServiceUnkownError, DBusInterfaceError): _LOGGER.warning( "No udisks2 support on the host. Host control has been disabled." diff --git a/supervisor/exceptions.py b/supervisor/exceptions.py index df26a626c0c..560a53cf084 100644 --- a/supervisor/exceptions.py +++ b/supervisor/exceptions.py @@ -403,7 +403,11 @@ class DBusParseError(DBusError): class DBusTimeoutError(DBusError): - """D-Bus call timed out.""" + """D-Bus call timeout.""" + + +class DBusTimedOutError(DBusError): + """D-Bus call timed out (typically when systemd D-Bus service activation fail).""" class DBusNoReplyError(DBusError): diff --git a/supervisor/os/data_disk.py b/supervisor/os/data_disk.py index 1219c1db715..40be89d24f6 100644 --- a/supervisor/os/data_disk.py +++ b/supervisor/os/data_disk.py @@ -189,12 +189,13 @@ async def load(self) -> None: await self.sys_dbus.agent.datadisk.reload_device() # Register for signals on devices added/removed - self.sys_dbus.udisks2.udisks2_object_manager.dbus.object_manager.on_interfaces_added( - self._udisks2_interface_added - ) - self.sys_dbus.udisks2.udisks2_object_manager.dbus.object_manager.on_interfaces_removed( - self._udisks2_interface_removed - ) + if self.sys_dbus.udisks2.is_connected: + self.sys_dbus.udisks2.udisks2_object_manager.dbus.object_manager.on_interfaces_added( + self._udisks2_interface_added + ) + self.sys_dbus.udisks2.udisks2_object_manager.dbus.object_manager.on_interfaces_removed( + self._udisks2_interface_removed + ) @Job( name="data_disk_migrate", diff --git a/supervisor/utils/dbus.py b/supervisor/utils/dbus.py index 3953c3df6b6..b24628d654d 100644 --- a/supervisor/utils/dbus.py +++ b/supervisor/utils/dbus.py @@ -31,6 +31,7 @@ DBusObjectError, DBusParseError, DBusServiceUnkownError, + DBusTimedOutError, DBusTimeoutError, HassioNotSupportedError, ) @@ -87,6 +88,8 @@ def from_dbus_error(err: DBusFastDBusError) -> HassioNotSupportedError | DBusErr return DBusNotConnectedError(err.text) if err.type == ErrorType.TIMEOUT: return DBusTimeoutError(err.text) + if err.type == ErrorType.TIMED_OUT: + return DBusTimedOutError(err.text) if err.type == ErrorType.NO_REPLY: return DBusNoReplyError(err.text) return DBusFatalError(err.text, type_=err.type) @@ -136,7 +139,7 @@ async def introspect(self) -> Node: for _ in range(3): try: return await self._bus.introspect( - self.bus_name, self.object_path, timeout=10 + self.bus_name, self.object_path, timeout=30 ) except InvalidIntrospectionError as err: raise DBusParseError( @@ -144,7 +147,13 @@ async def introspect(self) -> Node: ) from err except DBusFastDBusError as err: raise DBus.from_dbus_error(err) from None - except (EOFError, TimeoutError): + except TimeoutError: + # The systemd D-Bus activate service has a timeout of 25s, which will raise. We should + # not end up here unless the D-Bus broker is majorly overwhelmed. + _LOGGER.critical( + "Timeout connecting to %s - %s", self.bus_name, self.object_path + ) + except EOFError: _LOGGER.warning( "Busy system at %s - %s", self.bus_name, self.object_path )