From 1590f7d0f02a35ecfefc42388fddcbfd2e7b22df Mon Sep 17 00:00:00 2001 From: Faur Ioan-Aurel Date: Tue, 1 Apr 2025 01:35:19 +0300 Subject: [PATCH 1/2] fix: socket connection timeout Context: - okhttp uses an HTTP/2 connection to the coder rest api in order to resolves the workspaces. - HTTP/2 uses a single TCP connection for multiple requests (multiplexing). If the connection is idle, the http server can close that connection, with client side ending in a socket timeout if it doesn't detect the drop in time. - similarly on the client side, if the OS goes into sleep mode, the connection might have been interrupted. HTTP/2 doesn't always detect this quickly, leading to stale streams when Toolbox wakes up. Implementation: - we could try to force the client to use HTTP/1 which creates a TCP connection for each request, but from my testing it seems that configuring a retry strategy when a client attempts to reuse a TCP connection that has unexpectedly closed does the job. - resolves #13 --- src/main/kotlin/com/coder/toolbox/sdk/CoderRestClient.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/kotlin/com/coder/toolbox/sdk/CoderRestClient.kt b/src/main/kotlin/com/coder/toolbox/sdk/CoderRestClient.kt index 1122b54..3ee4af0 100644 --- a/src/main/kotlin/com/coder/toolbox/sdk/CoderRestClient.kt +++ b/src/main/kotlin/com/coder/toolbox/sdk/CoderRestClient.kt @@ -103,6 +103,7 @@ open class CoderRestClient( builder .sslSocketFactory(socketFactory, trustManagers[0] as X509TrustManager) .hostnameVerifier(CoderHostnameVerifier(settings.tls.altHostname)) + .retryOnConnectionFailure(true) .addInterceptor { it.proceed( it.request().newBuilder().addHeader( From e91d4415844bb3231bdda715daf4bce2452deb83 Mon Sep 17 00:00:00 2001 From: Faur Ioan-Aurel Date: Wed, 2 Apr 2025 00:50:19 +0300 Subject: [PATCH 2/2] fix: detect os sleep after socket timeout exception - retryOnConnectionFailure seems to not be enough there are some residual socket timeouts that can still be reproduced - with this patch we detect if the os went to sleep by measuring large gaps between the last poll time and the socket timeout exception. - if sleep is detected we reset the OkHttp client and establish a fresh TCP connection. --- .../com/coder/toolbox/CoderRemoteProvider.kt | 29 ++++++++++++++----- .../coder/toolbox/CoderToolboxExtension.kt | 4 +-- .../com/coder/toolbox/sdk/CoderRestClient.kt | 11 ++++--- .../toolbox/util/CoderProtocolHandler.kt | 5 +--- .../com/coder/toolbox/views/ConnectPage.kt | 3 -- 5 files changed, 31 insertions(+), 21 deletions(-) diff --git a/src/main/kotlin/com/coder/toolbox/CoderRemoteProvider.kt b/src/main/kotlin/com/coder/toolbox/CoderRemoteProvider.kt index c9acc57..fef85ff 100644 --- a/src/main/kotlin/com/coder/toolbox/CoderRemoteProvider.kt +++ b/src/main/kotlin/com/coder/toolbox/CoderRemoteProvider.kt @@ -30,18 +30,20 @@ import kotlinx.coroutines.isActive import kotlinx.coroutines.launch import kotlinx.coroutines.selects.onTimeout import kotlinx.coroutines.selects.select -import okhttp3.OkHttpClient +import java.net.SocketTimeoutException import java.net.URI import java.net.URL import kotlin.coroutines.cancellation.CancellationException import kotlin.time.Duration.Companion.seconds +import kotlin.time.TimeSource import com.jetbrains.toolbox.api.ui.components.AccountDropdownField as DropDownMenu import com.jetbrains.toolbox.api.ui.components.AccountDropdownField as dropDownFactory +private val POLL_INTERVAL = 5.seconds + @OptIn(ExperimentalCoroutinesApi::class) class CoderRemoteProvider( private val context: CoderToolboxContext, - private val httpClient: OkHttpClient, ) : RemoteProvider("Coder") { // Current polling job. private var pollJob: Job? = null @@ -66,7 +68,7 @@ class CoderRemoteProvider( private var firstRun = true private val isInitialized: MutableStateFlow = MutableStateFlow(false) private var coderHeaderPage = NewEnvironmentPage(context, context.i18n.pnotr(getDeploymentURL()?.first ?: "")) - private val linkHandler = CoderProtocolHandler(context, httpClient, dialogUi, isInitialized) + private val linkHandler = CoderProtocolHandler(context, dialogUi, isInitialized) override val environments: MutableStateFlow>> = MutableStateFlow( LoadableState.Value(emptyList()) ) @@ -77,6 +79,7 @@ class CoderRemoteProvider( * first time). */ private fun poll(client: CoderRestClient, cli: CoderCLIManager): Job = context.cs.launch { + var lastPollTime = TimeSource.Monotonic.markNow() while (isActive) { try { context.logger.debug("Fetching workspace agents from ${client.url}") @@ -134,16 +137,28 @@ class CoderRemoteProvider( } catch (_: CancellationException) { context.logger.debug("${client.url} polling loop canceled") break + } catch (ex: SocketTimeoutException) { + val elapsed = lastPollTime.elapsedNow() + if (elapsed > POLL_INTERVAL * 2) { + context.logger.info("wake-up from an OS sleep was detected, going to re-initialize the http client...") + client.setupSession() + } else { + context.logger.error(ex, "workspace polling error encountered") + pollError = ex + logout() + break + } } catch (ex: Exception) { - context.logger.info(ex, "workspace polling error encountered") + context.logger.error(ex, "workspace polling error encountered") pollError = ex logout() break } + // TODO: Listening on a web socket might be better? select { - onTimeout(5.seconds) { - context.logger.trace("workspace poller waked up by the 5 seconds timeout") + onTimeout(POLL_INTERVAL) { + context.logger.trace("workspace poller waked up by the $POLL_INTERVAL timeout") } triggerSshConfig.onReceive { shouldTrigger -> if (shouldTrigger) { @@ -152,6 +167,7 @@ class CoderRemoteProvider( } } } + lastPollTime = TimeSource.Monotonic.markNow() } } @@ -329,7 +345,6 @@ class CoderRemoteProvider( context, deploymentURL, token, - httpClient, ::goToEnvironmentsPage, ) { client, cli -> // Store the URL and token for use next time. diff --git a/src/main/kotlin/com/coder/toolbox/CoderToolboxExtension.kt b/src/main/kotlin/com/coder/toolbox/CoderToolboxExtension.kt index 755d934..a310ee0 100644 --- a/src/main/kotlin/com/coder/toolbox/CoderToolboxExtension.kt +++ b/src/main/kotlin/com/coder/toolbox/CoderToolboxExtension.kt @@ -15,7 +15,6 @@ import com.jetbrains.toolbox.api.remoteDev.states.EnvironmentStateColorPalette import com.jetbrains.toolbox.api.remoteDev.ui.EnvironmentUiPageManager import com.jetbrains.toolbox.api.ui.ToolboxUi import kotlinx.coroutines.CoroutineScope -import okhttp3.OkHttpClient /** * Entry point into the extension. @@ -35,8 +34,7 @@ class CoderToolboxExtension : RemoteDevExtension { serviceLocator.getService(LocalizableStringFactory::class.java), CoderSettingsStore(serviceLocator.getService(PluginSettingsStore::class.java), Environment(), logger), CoderSecretsStore(serviceLocator.getService(PluginSecretStore::class.java)), - ), - OkHttpClient(), + ) ) } } \ No newline at end of file diff --git a/src/main/kotlin/com/coder/toolbox/sdk/CoderRestClient.kt b/src/main/kotlin/com/coder/toolbox/sdk/CoderRestClient.kt index 3ee4af0..3b107be 100644 --- a/src/main/kotlin/com/coder/toolbox/sdk/CoderRestClient.kt +++ b/src/main/kotlin/com/coder/toolbox/sdk/CoderRestClient.kt @@ -53,16 +53,19 @@ open class CoderRestClient( val token: String?, private val proxyValues: ProxyValues? = null, private val pluginVersion: String = "development", - existingHttpClient: OkHttpClient? = null, ) { private val settings = context.settingsStore.readOnly() - private val httpClient: OkHttpClient - private val retroRestClient: CoderV2RestFacade + private lateinit var httpClient: OkHttpClient + private lateinit var retroRestClient: CoderV2RestFacade lateinit var me: User lateinit var buildVersion: String init { + setupSession() + } + + fun setupSession() { val moshi = Moshi.Builder() .add(ArchConverter()) @@ -73,7 +76,7 @@ open class CoderRestClient( val socketFactory = coderSocketFactory(settings.tls) val trustManagers = coderTrustManagers(settings.tls.caPath) - var builder = existingHttpClient?.newBuilder() ?: OkHttpClient.Builder() + var builder = OkHttpClient.Builder() if (proxyValues != null) { builder = diff --git a/src/main/kotlin/com/coder/toolbox/util/CoderProtocolHandler.kt b/src/main/kotlin/com/coder/toolbox/util/CoderProtocolHandler.kt index fe2e307..c8b8e51 100644 --- a/src/main/kotlin/com/coder/toolbox/util/CoderProtocolHandler.kt +++ b/src/main/kotlin/com/coder/toolbox/util/CoderProtocolHandler.kt @@ -16,7 +16,6 @@ import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.flow.first import kotlinx.coroutines.launch import kotlinx.coroutines.time.withTimeout -import okhttp3.OkHttpClient import java.net.HttpURLConnection import java.net.URI import java.net.URL @@ -26,7 +25,6 @@ import kotlin.time.toJavaDuration open class CoderProtocolHandler( private val context: CoderToolboxContext, - private val httpClient: OkHttpClient?, private val dialogUi: DialogUi, private val isInitialized: StateFlow, ) { @@ -230,8 +228,7 @@ open class CoderProtocolHandler( deploymentURL.toURL(), token, proxyValues = null, // TODO - not sure the above comment applies as we are creating our own http client - PluginManager.pluginInfo.version, - httpClient + PluginManager.pluginInfo.version ) client.authenticate() return client diff --git a/src/main/kotlin/com/coder/toolbox/views/ConnectPage.kt b/src/main/kotlin/com/coder/toolbox/views/ConnectPage.kt index 261cc53..b3523b5 100644 --- a/src/main/kotlin/com/coder/toolbox/views/ConnectPage.kt +++ b/src/main/kotlin/com/coder/toolbox/views/ConnectPage.kt @@ -14,7 +14,6 @@ import kotlinx.coroutines.Job import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.launch -import okhttp3.OkHttpClient import java.net.URL /** @@ -24,7 +23,6 @@ class ConnectPage( private val context: CoderToolboxContext, private val url: URL, private val token: String?, - private val httpClient: OkHttpClient, private val onCancel: () -> Unit, private val onConnect: ( client: CoderRestClient, @@ -95,7 +93,6 @@ class ConnectPage( token, proxyValues = null, PluginManager.pluginInfo.version, - httpClient ) client.authenticate() updateStatus(context.i18n.ptrl("Checking Coder binary..."), error = null)