20
20
import io .micronaut .rxjava2 .http .client .websockets .RxWebSocketClient ;
21
21
import io .micronaut .scheduling .TaskScheduler ;
22
22
import io .micronaut .websocket .exceptions .WebSocketClientException ;
23
+ import io .seqera .tower .agent .exceptions .RecoverableException ;
24
+ import io .seqera .tower .agent .exceptions .UnrecoverableException ;
23
25
import io .seqera .tower .agent .exchange .CommandRequest ;
24
26
import io .seqera .tower .agent .exchange .CommandResponse ;
25
27
import io .seqera .tower .agent .exchange .HeartbeatMessage ;
38
40
import java .lang .module .ModuleDescriptor ;
39
41
import java .net .URI ;
40
42
import java .net .URISyntaxException ;
43
+ import java .net .UnknownHostException ;
41
44
import java .nio .file .Files ;
42
45
import java .nio .file .InvalidPathException ;
43
46
import java .nio .file .Path ;
@@ -98,19 +101,30 @@ public static void main(String[] args) throws Exception {
98
101
public void run () {
99
102
try {
100
103
validateParameters ();
101
- checkTower ();
102
- connectTower ();
103
104
sendPeriodicHeartbeat ();
104
- } catch (Exception e ) {
105
+ infiniteLoop ();
106
+ } catch (UnrecoverableException e ) {
105
107
logger .error (e .getMessage ());
106
108
System .exit (1 );
109
+ } catch (Throwable e ) {
110
+ logger .error (e .getMessage (), e );
111
+ System .exit (1 );
107
112
}
108
113
}
109
114
110
- private void connectTowerDelay () {
111
- TaskScheduler scheduler = ctx .getBean (TaskScheduler .class );
112
- Duration delay = Duration .ofSeconds (2 );
113
- scheduler .schedule (delay , this ::connectTower );
115
+ private void infiniteLoop () throws InterruptedException , IOException {
116
+ while (true ) {
117
+ try {
118
+ if (agentClient == null || !agentClient .isOpen ()) {
119
+ checkTower ();
120
+ connectTower ();
121
+ }
122
+ } catch (RecoverableException e ) {
123
+ logger .error (e .getMessage ());
124
+ }
125
+
126
+ Thread .sleep (2000 );
127
+ }
114
128
}
115
129
116
130
/**
@@ -121,32 +135,28 @@ private void connectTower() {
121
135
try {
122
136
final URI uri = new URI (url + "/agent/" + agentKey + "/connect" );
123
137
if (!uri .getScheme ().equals ("https" )) {
124
- logger .error ("You are trying to connect to an insecure server: {}" , url );
125
- System .exit (1 );
138
+ throw new UnrecoverableException (String .format ("You are trying to connect to an insecure server: %s" , url ));
126
139
}
127
140
128
141
final MutableHttpRequest <?> req = HttpRequest .GET (uri ).bearerAuth (token );
129
142
final RxWebSocketClient webSocketClient = ctx .getBean (RxWebSocketClient .class );
130
143
agentClient = webSocketClient .connect (AgentClientSocket .class , req )
131
144
.timeout (5 , TimeUnit .SECONDS )
132
145
.blockingFirst ();
133
- agentClient .setConnectCallback (this ::connectTowerDelay );
134
146
agentClient .setCommandRequestCallback (this ::execCommand );
135
147
sendInfoMessage ();
136
148
} catch (URISyntaxException e ) {
137
- logger .error ("Invalid URI: {}/agent/{}/connect - {}" , url , agentKey , e .getMessage ());
138
- System .exit (1 );
149
+ throw new UnrecoverableException (String .format ("Invalid URI: %s/agent/%s/connect - %s" , url , agentKey , e .getMessage ()));
139
150
} catch (WebSocketClientException e ) {
140
- logger .error ("Connection error - {}" , e .getMessage ());
141
- System .exit (1 );
151
+ throw new RecoverableException (String .format ("Connection error - %s" , e .getMessage ()));
152
+ } catch (UnknownHostException e ) {
153
+ throw new RecoverableException ("Unknown host exception - Check that it's a valid DNS domain." );
142
154
} catch (Exception e ) {
143
155
if (e .getCause () instanceof TimeoutException ) {
144
- logger .error ("Connection timeout [trying to reconnect in {} seconds]" , heartbeatDelay );
145
- } else {
146
- logger .error ("Unknown problem" );
147
- e .printStackTrace ();
156
+ throw new RecoverableException (String .format ("Connection timeout -- %s" , e .getCause ().getMessage ()));
148
157
}
149
- System .exit (1 );
158
+
159
+ throw new RecoverableException (String .format ("Unknown problem - %s" , e .getMessage ()), e );
150
160
}
151
161
}
152
162
@@ -159,6 +169,7 @@ private void execCommand(CommandRequest message) {
159
169
CommandResponse response ;
160
170
161
171
try {
172
+ logger .trace ("REQUEST: {}" , message .getCommand ());
162
173
Process process = new ProcessBuilder ()
163
174
.command ("sh" , "-c" , message .getCommand ())
164
175
.redirectErrorStream (true )
@@ -199,12 +210,10 @@ private void execCommand(CommandRequest message) {
199
210
private void sendPeriodicHeartbeat () {
200
211
TaskScheduler scheduler = ctx .getBean (TaskScheduler .class );
201
212
scheduler .scheduleWithFixedDelay (heartbeatDelay , heartbeatDelay , () -> {
202
- if (agentClient .isOpen ()) {
213
+ if (agentClient != null && agentClient .isOpen ()) {
203
214
logger .info ("Sending heartbeat" );
215
+ logger .trace ("websocket session '{}'" , agentClient .getId ());
204
216
agentClient .send (new HeartbeatMessage ());
205
- } else {
206
- logger .info ("Trying to reconnect" );
207
- connectTower ();
208
217
}
209
218
});
210
219
}
@@ -226,8 +235,7 @@ private void validateParameters() throws IOException {
226
235
// Fetch username
227
236
validatedUserName = System .getenv ().getOrDefault ("USER" , System .getProperty ("user.name" ));
228
237
if (validatedUserName == null || validatedUserName .isEmpty () || validatedUserName .isBlank () || validatedUserName .equals ("?" )) {
229
- logger .error ("Impossible to detect current Unix username. Try setting USER environment variable." );
230
- System .exit (1 );
238
+ throw new UnrecoverableException ("Impossible to detect current Unix username. Try setting USER environment variable." );
231
239
}
232
240
233
241
// Set default workDir
@@ -237,15 +245,13 @@ private void validateParameters() throws IOException {
237
245
try {
238
246
workDir = Paths .get (defaultPath );
239
247
} catch (InvalidPathException e ) {
240
- logger .error ("Impossible to define a default work directory. Please provide one using '--work-dir'." );
241
- System .exit (1 );
248
+ throw new UnrecoverableException ("Impossible to define a default work directory. Please provide one using '--work-dir'." );
242
249
}
243
250
}
244
251
245
252
// Validate workDir exists
246
253
if (!Files .exists (workDir )) {
247
- logger .error ("The work directory '{}' do not exists. Create it or provide a different one using '--work-dir'." , workDir );
248
- System .exit (1 );
254
+ throw new UnrecoverableException (String .format ("The work directory '%s' do not exists. Create it or provide a different one using '--work-dir'." , workDir ));
249
255
}
250
256
validatedWorkDir = workDir .toAbsolutePath ().normalize ().toString ();
251
257
@@ -261,38 +267,35 @@ private void validateParameters() throws IOException {
261
267
* Do some health checks to the Tower API endpoint to verify that it is available and
262
268
* compatible with this Agent.
263
269
*/
264
- private void checkTower () {
270
+ private void checkTower () throws IOException {
265
271
final RxHttpClient httpClient = ctx .getBean (RxHttpClient .class );
272
+ ServiceInfoResponse infoResponse = null ;
266
273
try {
267
274
final URI uri = new URI (url + "/service-info" );
268
275
final MutableHttpRequest <?> req = HttpRequest .GET (uri ).bearerAuth (token );
269
-
270
- ServiceInfoResponse infoResponse = httpClient .retrieve (req , ServiceInfoResponse .class ).blockingFirst ();
271
- if (infoResponse .getServiceInfo () != null && infoResponse .getServiceInfo ().getApiVersion () != null ) {
272
- final ModuleDescriptor .Version systemApiVersion = ModuleDescriptor .Version .parse (infoResponse .getServiceInfo ().getApiVersion ());
273
- final ModuleDescriptor .Version requiredApiVersion = ModuleDescriptor .Version .parse (getVersionApi ());
274
-
275
- if (systemApiVersion .compareTo (requiredApiVersion ) < 0 ) {
276
- logger .error ("Tower at '{}' is running API version {} and the agent needs a minimum of {}" , url , systemApiVersion , requiredApiVersion );
277
- System .exit (1 );
278
- }
279
- }
276
+ infoResponse = httpClient .retrieve (req , ServiceInfoResponse .class ).blockingFirst ();
280
277
} catch (Exception e ) {
281
278
if (url .contains ("/api" )) {
282
- logger .error ("Tower API endpoint '{}' it is not available" , url );
283
- } else {
284
- logger .error ("Tower API endpoint '{}' it is not available (did you mean '{}/api'?)" , url , url );
279
+ throw new RecoverableException (String .format ("Tower API endpoint '%s' it is not available" , url ));
280
+ }
281
+ throw new RecoverableException (String .format ("Tower API endpoint '%s' it is not available (did you mean '%s/api'?)" , url , url ));
282
+ }
283
+
284
+ if (infoResponse != null && infoResponse .getServiceInfo () != null && infoResponse .getServiceInfo ().getApiVersion () != null ) {
285
+ final ModuleDescriptor .Version systemApiVersion = ModuleDescriptor .Version .parse (infoResponse .getServiceInfo ().getApiVersion ());
286
+ final ModuleDescriptor .Version requiredApiVersion = ModuleDescriptor .Version .parse (getVersionApi ());
287
+
288
+ if (systemApiVersion .compareTo (requiredApiVersion ) < 0 ) {
289
+ throw new UnrecoverableException (String .format ("Tower at '%s' is running API version %s and the agent needs a minimum of %s" , url , systemApiVersion , requiredApiVersion ));
285
290
}
286
- System .exit (1 );
287
291
}
288
292
289
293
try {
290
294
final URI uri = new URI (url + "/user" );
291
295
final MutableHttpRequest <?> req = HttpRequest .GET (uri ).bearerAuth (token );
292
296
httpClient .retrieve (req ).blockingFirst ();
293
297
} catch (Exception e ) {
294
- logger .error ("Invalid TOWER_ACCESS_TOKEN, check that the given token has access at '{}'." , url );
295
- System .exit (1 );
298
+ throw new UnrecoverableException (String .format ("Invalid TOWER_ACCESS_TOKEN, check that the given token has access at '%s'." , url ));
296
299
}
297
300
}
298
301
0 commit comments