4
4
import java .util .concurrent .CompletableFuture ;
5
5
import java .util .concurrent .RejectedExecutionException ;
6
6
import java .util .concurrent .ScheduledExecutorService ;
7
- import java .util .concurrent .ThreadLocalRandom ;
8
7
import java .util .concurrent .TimeUnit ;
9
8
import java .util .concurrent .atomic .AtomicBoolean ;
10
9
import java .util .concurrent .atomic .AtomicInteger ;
11
10
12
11
import org .slf4j .Logger ;
13
12
13
+ import tech .ydb .common .retry .ExponentialBackoffRetry ;
14
+ import tech .ydb .common .retry .RetryPolicy ;
14
15
import tech .ydb .core .Status ;
15
16
import tech .ydb .topic .settings .RetryMode ;
16
17
17
18
/**
18
19
* @author Nikolay Perfilov
19
20
*/
20
21
public abstract class GrpcStreamRetrier {
21
- // TODO: add retry policy
22
- private static final int MAX_RECONNECT_COUNT = 0 ; // Inf
23
- private static final int EXP_BACKOFF_BASE_MS = 256 ;
24
- private static final int EXP_BACKOFF_CEILING_MS = 40000 ; // 40 sec (max delays would be 40-80 sec)
25
- private static final int EXP_BACKOFF_MAX_POWER = 7 ;
26
22
private static final int ID_LENGTH = 6 ;
27
23
private static final char [] ID_ALPHABET = "abcdefghijklmnopqrstuvwxyzABSDEFGHIJKLMNOPQRSTUVWXYZ1234567890"
28
24
.toCharArray ();
29
25
30
- private final RetryMode retryMode ;
31
26
protected final String id ;
32
27
protected final AtomicBoolean isReconnecting = new AtomicBoolean (false );
33
28
protected final AtomicBoolean isStopped = new AtomicBoolean (false );
29
+
34
30
private final ScheduledExecutorService scheduler ;
35
- protected final AtomicInteger reconnectCounter = new AtomicInteger (0 );
31
+ private final RetryMode retryMode ;
32
+ private final RetryPolicy retryPolicy = new DefaultRetryPolicy ();
33
+ private final AtomicInteger retry = new AtomicInteger (-1 );
36
34
37
35
protected GrpcStreamRetrier (RetryMode retryMode , ScheduledExecutorService scheduler ) {
38
36
this .retryMode = retryMode ;
@@ -53,45 +51,32 @@ protected static String generateRandomId(int length) {
53
51
.toString ();
54
52
}
55
53
56
- private void tryScheduleReconnect () {
57
- int currentReconnectCounter = reconnectCounter .get () + 1 ;
58
- if (MAX_RECONNECT_COUNT > 0 && currentReconnectCounter > MAX_RECONNECT_COUNT ) {
59
- if (isStopped .compareAndSet (false , true )) {
60
- String errorMessage = "[" + id + "] Maximum retry count (" + MAX_RECONNECT_COUNT
61
- + ") exceeded. Shutting down " + getStreamName ();
62
- getLogger ().error (errorMessage );
63
- shutdownImpl (errorMessage );
64
- return ;
65
- } else {
66
- getLogger ().info ("[{}] Maximum retry count ({}}) exceeded. Need to shutdown {} but it's already " +
67
- "shut down." , id , MAX_RECONNECT_COUNT , getStreamName ());
68
- }
69
- }
70
- if (isReconnecting .compareAndSet (false , true )) {
71
- reconnectCounter .set (currentReconnectCounter );
72
- int delayMs = currentReconnectCounter <= EXP_BACKOFF_MAX_POWER
73
- ? EXP_BACKOFF_BASE_MS * (1 << currentReconnectCounter )
74
- : EXP_BACKOFF_CEILING_MS ;
75
- // Add jitter
76
- delayMs = delayMs + ThreadLocalRandom .current ().nextInt (delayMs );
77
- getLogger ().warn ("[{}] Retry #{}. Scheduling {} reconnect in {}ms..." , id , currentReconnectCounter ,
78
- getStreamName (), delayMs );
79
- try {
80
- scheduler .schedule (this ::reconnect , delayMs , TimeUnit .MILLISECONDS );
81
- } catch (RejectedExecutionException exception ) {
82
- String errorMessage = "[" + id + "] Couldn't schedule reconnect: scheduler is already shut down. " +
83
- "Shutting down " + getStreamName ();
84
- getLogger ().error (errorMessage );
85
- shutdownImpl (errorMessage );
86
- }
87
- } else {
54
+ private void tryScheduleReconnect (int retryNumber ) {
55
+ if (!isReconnecting .compareAndSet (false , true )) {
88
56
getLogger ().info ("[{}] should reconnect {} stream, but reconnect is already in progress" , id ,
89
57
getStreamName ());
58
+ return ;
59
+ }
60
+
61
+ retry .set (retryNumber );
62
+ long delay = retryPolicy .nextRetryMs (retryNumber , 0 );
63
+ getLogger ().warn ("[{}] Retry #{}. Scheduling {} reconnect in {}ms..." , id , retryNumber , getStreamName (), delay );
64
+ try {
65
+ scheduler .schedule (this ::reconnect , delay , TimeUnit .MILLISECONDS );
66
+ } catch (RejectedExecutionException exception ) {
67
+ String errorMessage = "[" + id + "] Couldn't schedule reconnect: scheduler is already shut down. " +
68
+ "Shutting down " + getStreamName ();
69
+ getLogger ().error (errorMessage );
70
+ shutdownImpl (errorMessage );
90
71
}
91
72
}
92
73
74
+ protected void resetRetries () {
75
+ retry .set (0 );
76
+ }
77
+
93
78
void reconnect () {
94
- getLogger ().info ("[{}] {} reconnect #{} started" , id , getStreamName (), reconnectCounter .get ());
79
+ getLogger ().info ("[{}] {} reconnect #{} started" , id , getStreamName (), retry .get ());
95
80
if (!isReconnecting .compareAndSet (true , false )) {
96
81
getLogger ().warn ("Couldn't reset reconnect flag. Shouldn't happen" );
97
82
}
@@ -130,10 +115,52 @@ protected void onSessionClosed(Status status, Throwable th) {
130
115
}
131
116
}
132
117
133
- if (!isStopped .get ()) {
134
- tryScheduleReconnect ();
135
- } else {
118
+ if (isStopped .get ()) {
136
119
getLogger ().info ("[{}] {} is already stopped, no need to schedule reconnect" , id , getStreamName ());
120
+ return ;
121
+ }
122
+
123
+ int currentRetry = nextRetryNumber ();
124
+ if (currentRetry > 0 ) {
125
+ tryScheduleReconnect (currentRetry );
126
+ return ;
127
+ }
128
+
129
+ if (!isStopped .compareAndSet (false , true )) {
130
+ getLogger ().warn ("[{}] Stopped by retry mode {} after {} retries. But {} is already shut down." , id ,
131
+ retryMode , currentRetry , getStreamName ());
132
+ return ;
133
+ }
134
+
135
+ String errorMessage = "[" + id + "] Stopped by retry mode " + retryMode + " after " + currentRetry +
136
+ " retries. Shutting down " + getStreamName ();
137
+ getLogger ().error (errorMessage );
138
+ shutdownImpl (errorMessage );
139
+ }
140
+
141
+ private int nextRetryNumber () {
142
+ int next = retry .get () + 1 ;
143
+ switch (retryMode ) {
144
+ case RECOVER : return next ;
145
+ case ALWAYS : return Math .max (1 , next );
146
+ case NONE :
147
+ default :
148
+ return 0 ;
149
+ }
150
+ }
151
+
152
+ private static class DefaultRetryPolicy extends ExponentialBackoffRetry {
153
+
154
+ private static final int EXP_BACKOFF_BASE_MS = 256 ;
155
+ private static final int EXP_BACKOFF_MAX_POWER = 7 ;
156
+
157
+ DefaultRetryPolicy () {
158
+ super (EXP_BACKOFF_BASE_MS , EXP_BACKOFF_MAX_POWER );
159
+ }
160
+
161
+ @ Override
162
+ public long nextRetryMs (int retryCount , long elapsedTimeMs /* ignored */ ) {
163
+ return backoffTimeMillis (retryCount );
137
164
}
138
165
}
139
166
}
0 commit comments