4
4
import java .util .concurrent .CompletableFuture ;
5
5
import java .util .concurrent .RejectedExecutionException ;
6
6
import java .util .concurrent .ScheduledExecutorService ;
7
- import java .util .concurrent .ThreadLocalRandom ;
8
7
import java .util .concurrent .TimeUnit ;
9
8
import java .util .concurrent .atomic .AtomicBoolean ;
10
- import java .util .concurrent .atomic .AtomicInteger ;
11
9
12
10
import org .slf4j .Logger ;
13
11
14
12
import tech .ydb .common .retry .RetryConfig ;
13
+ import tech .ydb .common .retry .RetryPolicy ;
15
14
import tech .ydb .core .Status ;
16
15
17
16
/**
18
17
* @author Nikolay Perfilov
19
18
*/
20
19
public abstract class GrpcStreamRetrier {
21
- // TODO: add retry policy
22
- private static final int MAX_RECONNECT_COUNT = 0 ; // Inf
23
- private static final int EXP_BACKOFF_BASE_MS = 256 ;
24
- private static final int EXP_BACKOFF_CEILING_MS = 40000 ; // 40 sec (max delays would be 40-80 sec)
25
- private static final int EXP_BACKOFF_MAX_POWER = 7 ;
26
20
private static final int ID_LENGTH = 6 ;
27
21
private static final char [] ID_ALPHABET = "abcdefghijklmnopqrstuvwxyzABSDEFGHIJKLMNOPQRSTUVWXYZ1234567890"
28
22
.toCharArray ();
29
23
30
- private final RetryConfig retryConfig ;
31
24
protected final String id ;
32
25
protected final AtomicBoolean isReconnecting = new AtomicBoolean (false );
33
26
protected final AtomicBoolean isStopped = new AtomicBoolean (false );
34
- protected final AtomicInteger reconnectCounter = new AtomicInteger (0 );
35
27
36
28
private final ScheduledExecutorService scheduler ;
29
+ private final RetryConfig retryConfig ;
30
+ private volatile int retryCount ;
31
+ private volatile long retryStartedAt ;
37
32
38
33
protected GrpcStreamRetrier (RetryConfig retryConfig , ScheduledExecutorService scheduler ) {
39
34
this .retryConfig = retryConfig ;
@@ -54,45 +49,31 @@ protected static String generateRandomId(int length) {
54
49
.toString ();
55
50
}
56
51
57
- private void tryScheduleReconnect () {
58
- int currentReconnectCounter = reconnectCounter .get () + 1 ;
59
- if (MAX_RECONNECT_COUNT > 0 && currentReconnectCounter > MAX_RECONNECT_COUNT ) {
60
- if (isStopped .compareAndSet (false , true )) {
61
- String errorMessage = "[" + id + "] Maximum retry count (" + MAX_RECONNECT_COUNT
62
- + ") exceeded. Shutting down " + getStreamName ();
63
- getLogger ().error (errorMessage );
64
- shutdownImpl (errorMessage );
65
- return ;
66
- } else {
67
- getLogger ().info ("[{}] Maximum retry count ({}}) exceeded. Need to shutdown {} but it's already " +
68
- "shut down." , id , MAX_RECONNECT_COUNT , getStreamName ());
69
- }
70
- }
71
- if (isReconnecting .compareAndSet (false , true )) {
72
- reconnectCounter .set (currentReconnectCounter );
73
- int delayMs = currentReconnectCounter <= EXP_BACKOFF_MAX_POWER
74
- ? EXP_BACKOFF_BASE_MS * (1 << currentReconnectCounter )
75
- : EXP_BACKOFF_CEILING_MS ;
76
- // Add jitter
77
- delayMs = delayMs + ThreadLocalRandom .current ().nextInt (delayMs );
78
- getLogger ().warn ("[{}] Retry #{}. Scheduling {} reconnect in {}ms..." , id , currentReconnectCounter ,
79
- getStreamName (), delayMs );
80
- try {
81
- scheduler .schedule (this ::reconnect , delayMs , TimeUnit .MILLISECONDS );
82
- } catch (RejectedExecutionException exception ) {
83
- String errorMessage = "[" + id + "] Couldn't schedule reconnect: scheduler is already shut down. " +
84
- "Shutting down " + getStreamName ();
85
- getLogger ().error (errorMessage );
86
- shutdownImpl (errorMessage );
87
- }
88
- } else {
52
+ private void tryReconnect (long delay ) {
53
+ if (!isReconnecting .compareAndSet (false , true )) {
89
54
getLogger ().info ("[{}] should reconnect {} stream, but reconnect is already in progress" , id ,
90
55
getStreamName ());
56
+ return ;
91
57
}
58
+
59
+ getLogger ().warn ("[{}] Retry #{}. Scheduling {} reconnect in {}ms..." , id , retryCount , getStreamName (), delay );
60
+ try {
61
+ scheduler .schedule (this ::reconnect , delay , TimeUnit .MILLISECONDS );
62
+ } catch (RejectedExecutionException exception ) {
63
+ String errorMessage = "[" + id + "] Couldn't schedule reconnect: scheduler is already shut down. " +
64
+ "Shutting down " + getStreamName ();
65
+ getLogger ().error (errorMessage );
66
+ shutdownImpl (errorMessage );
67
+ }
68
+ }
69
+
70
+ protected void resetRetries () {
71
+ retryStartedAt = -1 ;
72
+ retryCount = 0 ;
92
73
}
93
74
94
75
void reconnect () {
95
- getLogger ().info ("[{}] {} reconnect #{} started" , id , getStreamName (), reconnectCounter . get () );
76
+ getLogger ().info ("[{}] {} reconnect #{} started" , id , getStreamName (), retryCount );
96
77
if (!isReconnecting .compareAndSet (true , false )) {
97
78
getLogger ().warn ("Couldn't reset reconnect flag. Shouldn't happen" );
98
79
}
@@ -115,26 +96,53 @@ protected CompletableFuture<Void> shutdownImpl(String reason) {
115
96
protected void onSessionClosed (Status status , Throwable th ) {
116
97
getLogger ().info ("[{}] onSessionClosed called" , id );
117
98
99
+ RetryPolicy retryPolicy = null ;
118
100
if (th != null ) {
119
101
getLogger ().error ("[{}] Exception in {} stream session: " , id , getStreamName (), th );
102
+ retryPolicy = retryConfig .isThrowableRetryable (th );
120
103
} else {
121
104
if (status .isSuccess ()) {
122
105
if (isStopped .get ()) {
123
106
getLogger ().info ("[{}] {} stream session closed successfully" , id , getStreamName ());
124
107
return ;
125
108
} else {
126
- getLogger ().warn ("[{}] {} stream session was closed on working {}" , id , getStreamName (),
127
- getStreamName ());
109
+ getLogger ().warn ("[{}] {} stream session was closed on working {}" , id , getStreamName ());
128
110
}
129
111
} else {
130
112
getLogger ().warn ("[{}] Error in {} stream session: {}" , id , getStreamName (), status );
113
+ retryPolicy = retryConfig .isStatusRetryable (status .getCode ());
131
114
}
132
115
}
133
116
134
- if (!isStopped .get ()) {
135
- tryScheduleReconnect ();
136
- } else {
117
+ if (isStopped .get ()) {
137
118
getLogger ().info ("[{}] {} is already stopped, no need to schedule reconnect" , id , getStreamName ());
119
+ return ;
120
+ }
121
+
122
+ if (retryPolicy != null ) {
123
+ if (retryCount < 1 ) {
124
+ retryStartedAt = System .currentTimeMillis ();
125
+ }
126
+ long delay = retryPolicy .nextRetryMs (retryCount + 1 , System .currentTimeMillis () - retryStartedAt );
127
+ if (delay >= 0 ) {
128
+ retryCount ++;
129
+ tryReconnect (delay );
130
+ return ;
131
+ }
138
132
}
133
+
134
+ long elapsedMs = retryStartedAt > 0 ? System .currentTimeMillis () - retryStartedAt : 0 ;
135
+ if (!isStopped .compareAndSet (false , true )) {
136
+ getLogger ().warn ("[{}] Stopped after {} retries and {} ms elapsed. But {} is already shut down." ,
137
+ id , retryCount , elapsedMs , getStreamName ());
138
+ return ;
139
+ }
140
+
141
+ String errorMessage = "[" + id + "] Stopped after " + retryCount + " retries and " + elapsedMs +
142
+ " ms elapsed. Shutting down " + getStreamName ();
143
+ getLogger ().error (errorMessage );
144
+ shutdownImpl (errorMessage );
139
145
}
146
+
147
+
140
148
}
0 commit comments