|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, software |
| 13 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | + * See the License for the specific language governing permissions and |
| 16 | + * limitations under the License. |
| 17 | + */ |
| 18 | +package com.datastax.oss.driver.api.core.retry; |
| 19 | + |
| 20 | +import com.datastax.oss.driver.api.core.ConsistencyLevel; |
| 21 | +import com.datastax.oss.driver.api.core.connection.ClosedConnectionException; |
| 22 | +import com.datastax.oss.driver.api.core.connection.HeartbeatException; |
| 23 | +import com.datastax.oss.driver.api.core.loadbalancing.LoadBalancingPolicy; |
| 24 | +import com.datastax.oss.driver.api.core.servererrors.BootstrappingException; |
| 25 | +import com.datastax.oss.driver.api.core.servererrors.CoordinatorException; |
| 26 | +import com.datastax.oss.driver.api.core.servererrors.FunctionFailureException; |
| 27 | +import com.datastax.oss.driver.api.core.servererrors.OverloadedException; |
| 28 | +import com.datastax.oss.driver.api.core.servererrors.ProtocolError; |
| 29 | +import com.datastax.oss.driver.api.core.servererrors.QueryValidationException; |
| 30 | +import com.datastax.oss.driver.api.core.servererrors.ReadFailureException; |
| 31 | +import com.datastax.oss.driver.api.core.servererrors.ReadTimeoutException; |
| 32 | +import com.datastax.oss.driver.api.core.servererrors.ServerError; |
| 33 | +import com.datastax.oss.driver.api.core.servererrors.TruncateException; |
| 34 | +import com.datastax.oss.driver.api.core.servererrors.WriteFailureException; |
| 35 | +import com.datastax.oss.driver.api.core.servererrors.WriteType; |
| 36 | +import com.datastax.oss.driver.api.core.session.Request; |
| 37 | +import edu.umd.cs.findbugs.annotations.NonNull; |
| 38 | + |
| 39 | +/** |
| 40 | + * Defines the behavior to adopt when a request fails. |
| 41 | + * |
| 42 | + * <p>For each request, the driver gets a "query plan" (a list of coordinators to try) from the |
| 43 | + * {@link LoadBalancingPolicy}, and tries each node in sequence. This policy is invoked if the |
| 44 | + * request to that node fails. |
| 45 | + * |
| 46 | + * <p>The methods of this interface are invoked on I/O threads, therefore <b>implementations should |
| 47 | + * never block</b>. In particular, don't call {@link Thread#sleep(long)} to retry after a delay: |
| 48 | + * this would prevent asynchronous processing of other requests, and very negatively impact |
| 49 | + * throughput. If the application needs to back off and retry later, this should be implemented in |
| 50 | + * client code, not in this policy. |
| 51 | + */ |
| 52 | +public interface BackoffRetryPolicy extends AutoCloseable { |
| 53 | + /** |
| 54 | + * Whether to retry when the server replied with a {@code READ_TIMEOUT} error; this indicates a |
| 55 | + * <b>server-side</b> timeout during a read query, i.e. some replicas did not reply to the |
| 56 | + * coordinator in time. |
| 57 | + * |
| 58 | + * @param request the request that timed out. |
| 59 | + * @param cl the requested consistency level. |
| 60 | + * @param blockFor the minimum number of replica acknowledgements/responses that were required to |
| 61 | + * fulfill the operation. |
| 62 | + * @param received the number of replica that had acknowledged/responded to the operation before |
| 63 | + * it failed. |
| 64 | + * @param dataPresent whether the actual data was amongst the received replica responses. See |
| 65 | + * {@link ReadTimeoutException#wasDataPresent()}. |
| 66 | + * @param retryCount how many times the retry policy has been invoked already for this request |
| 67 | + * (not counting the current invocation). |
| 68 | + */ |
| 69 | + int onReadTimeoutBackoffMs( |
| 70 | + @NonNull Request request, |
| 71 | + @NonNull ConsistencyLevel cl, |
| 72 | + int blockFor, |
| 73 | + int received, |
| 74 | + boolean dataPresent, |
| 75 | + int retryCount, |
| 76 | + RetryVerdict verdict); |
| 77 | + |
| 78 | + /** |
| 79 | + * Whether to retry when the server replied with a {@code WRITE_TIMEOUT} error; this indicates a |
| 80 | + * <b>server-side</b> timeout during a write query, i.e. some replicas did not reply to the |
| 81 | + * coordinator in time. |
| 82 | + * |
| 83 | + * <p>Note that this method will only be invoked for {@link Request#isIdempotent()} idempotent} |
| 84 | + * requests: when a write times out, it is impossible to determine with 100% certainty whether the |
| 85 | + * mutation was applied or not, so the write is never safe to retry; the driver will rethrow the |
| 86 | + * error directly, without invoking the retry policy. |
| 87 | + * |
| 88 | + * @param request the request that timed out. |
| 89 | + * @param cl the requested consistency level. |
| 90 | + * @param writeType the type of the write for which the timeout was raised. |
| 91 | + * @param blockFor the minimum number of replica acknowledgements/responses that were required to |
| 92 | + * fulfill the operation. |
| 93 | + * @param received the number of replica that had acknowledged/responded to the operation before |
| 94 | + * it failed. |
| 95 | + * @param retryCount how many times the retry policy has been invoked already for this request |
| 96 | + * (not counting the current invocation). |
| 97 | + */ |
| 98 | + int onWriteTimeoutBackoffMs( |
| 99 | + @NonNull Request request, |
| 100 | + @NonNull ConsistencyLevel cl, |
| 101 | + @NonNull WriteType writeType, |
| 102 | + int blockFor, |
| 103 | + int received, |
| 104 | + int retryCount, |
| 105 | + RetryVerdict verdict); |
| 106 | + |
| 107 | + /** |
| 108 | + * Whether to retry when the server replied with an {@code UNAVAILABLE} error; this indicates that |
| 109 | + * the coordinator determined that there were not enough replicas alive to perform a query with |
| 110 | + * the requested consistency level. |
| 111 | + * |
| 112 | + * @param request the request that timed out. |
| 113 | + * @param cl the requested consistency level. |
| 114 | + * @param required the number of replica acknowledgements/responses required to perform the |
| 115 | + * operation (with its required consistency level). |
| 116 | + * @param alive the number of replicas that were known to be alive by the coordinator node when it |
| 117 | + * tried to execute the operation. |
| 118 | + * @param retryCount how many times the retry policy has been invoked already for this request |
| 119 | + * (not counting the current invocation). |
| 120 | + */ |
| 121 | + int onUnavailableBackoffMs( |
| 122 | + @NonNull Request request, |
| 123 | + @NonNull ConsistencyLevel cl, |
| 124 | + int required, |
| 125 | + int alive, |
| 126 | + int retryCount, |
| 127 | + RetryVerdict verdict); |
| 128 | + |
| 129 | + /** |
| 130 | + * Whether to retry when a request was aborted before we could get a response from the server. |
| 131 | + * |
| 132 | + * <p>This can happen in two cases: if the connection was closed due to an external event (this |
| 133 | + * will manifest as a {@link ClosedConnectionException}, or {@link HeartbeatException} for a |
| 134 | + * heartbeat failure); or if there was an unexpected error while decoding the response (this can |
| 135 | + * only be a driver bug). |
| 136 | + * |
| 137 | + * <p>Note that this method will only be invoked for {@linkplain Request#isIdempotent() |
| 138 | + * idempotent} requests: when execution was aborted before getting a response, it is impossible to |
| 139 | + * determine with 100% certainty whether a mutation was applied or not, so a write is never safe |
| 140 | + * to retry; the driver will rethrow the error directly, without invoking the retry policy. |
| 141 | + * |
| 142 | + * @param request the request that was aborted. |
| 143 | + * @param error the error. |
| 144 | + * @param retryCount how many times the retry policy has been invoked already for this request |
| 145 | + * (not counting the current invocation). |
| 146 | + */ |
| 147 | + int onRequestAbortedBackoffMs( |
| 148 | + @NonNull Request request, @NonNull Throwable error, int retryCount, RetryVerdict verdict); |
| 149 | + |
| 150 | + /** |
| 151 | + * Whether to retry when the server replied with a recoverable error (other than {@code |
| 152 | + * READ_TIMEOUT}, {@code WRITE_TIMEOUT} or {@code UNAVAILABLE}). |
| 153 | + * |
| 154 | + * <p>This can happen for the following errors: {@link OverloadedException}, {@link ServerError}, |
| 155 | + * {@link TruncateException}, {@link ReadFailureException}, {@link WriteFailureException}. |
| 156 | + * |
| 157 | + * <p>The following errors are handled internally by the driver, and therefore will <b>never</b> |
| 158 | + * be encountered in this method: |
| 159 | + * |
| 160 | + * <ul> |
| 161 | + * <li>{@link BootstrappingException}: always retried on the next node; |
| 162 | + * <li>{@link QueryValidationException} (and its subclasses), {@link FunctionFailureException} |
| 163 | + * and {@link ProtocolError}: always rethrown. |
| 164 | + * </ul> |
| 165 | + * |
| 166 | + * <p>Note that this method will only be invoked for {@link Request#isIdempotent()} idempotent} |
| 167 | + * requests: when execution was aborted before getting a response, it is impossible to determine |
| 168 | + * with 100% certainty whether a mutation was applied or not, so a write is never safe to retry; |
| 169 | + * the driver will rethrow the error directly, without invoking the retry policy. |
| 170 | + * |
| 171 | + * @param request the request that failed. |
| 172 | + * @param error the error. |
| 173 | + * @param retryCount how many times the retry policy has been invoked already for this request |
| 174 | + * (not counting the current invocation). |
| 175 | + */ |
| 176 | + int onErrorResponseBackoff( |
| 177 | + @NonNull Request request, |
| 178 | + @NonNull CoordinatorException error, |
| 179 | + int retryCount, |
| 180 | + RetryVerdict verdict); |
| 181 | + |
| 182 | + /** Called when the cluster that this policy is associated with closes. */ |
| 183 | + @Override |
| 184 | + void close(); |
| 185 | +} |
0 commit comments