33
44import com .yahoo .concurrent .DaemonThreadFactory ;
55import com .yahoo .config .FileReference ;
6+ import com .yahoo .jrt .ErrorCode ;
67import com .yahoo .jrt .Int32Value ;
78import com .yahoo .jrt .Request ;
89import com .yahoo .jrt .Spec ;
@@ -39,6 +40,8 @@ public class FileReferenceDownloader {
3940 private static final Logger log = Logger .getLogger (FileReferenceDownloader .class .getName ());
4041 private static final Set <CompressionType > defaultAcceptedCompressionTypes = Set .of (lz4 , none , zstd );
4142
43+ private enum DownloadResult { SUCCESS , TIMEOUT , FAILURE }
44+
4245 private final ExecutorService downloadExecutor =
4346 Executors .newFixedThreadPool (Math .max (8 , Runtime .getRuntime ().availableProcessors ()),
4447 new DaemonThreadFactory ("filereference downloader" ));
@@ -49,17 +52,31 @@ public class FileReferenceDownloader {
4952 private final Optional <Duration > rpcTimeout ; // Only used when overridden with env variable
5053 private final File downloadDirectory ;
5154 private final AtomicBoolean shutDown = new AtomicBoolean (false );
55+ private final int maxTimeoutsBeforeClose ;
5256
5357 FileReferenceDownloader (ConnectionPool connectionPool ,
5458 Downloads downloads ,
5559 Duration timeout ,
5660 Duration backoffInitialTime ,
5761 File downloadDirectory ) {
62+ this (connectionPool , downloads , timeout , backoffInitialTime , downloadDirectory ,
63+ Optional .ofNullable (System .getenv ("VESPA_FILE_DOWNLOAD_MAX_TIMEOUTS_BEFORE_CLOSE" ))
64+ .map (Integer ::parseInt )
65+ .orElse (0 ));
66+ }
67+
68+ FileReferenceDownloader (ConnectionPool connectionPool ,
69+ Downloads downloads ,
70+ Duration timeout ,
71+ Duration backoffInitialTime ,
72+ File downloadDirectory ,
73+ int maxTimeoutsBeforeClose ) {
5874 this .connectionPool = connectionPool ;
5975 this .downloads = downloads ;
6076 this .downloadTimeout = timeout ;
6177 this .backoffInitialTime = backoffInitialTime ;
6278 this .downloadDirectory = downloadDirectory ;
79+ this .maxTimeoutsBeforeClose = maxTimeoutsBeforeClose ;
6380 // Undocumented on purpose, might change or be removed at any time
6481 var timeoutString = Optional .ofNullable (System .getenv ("VESPA_FILE_DOWNLOAD_RPC_TIMEOUT" ));
6582 this .rpcTimeout = timeoutString .map (t -> Duration .ofSeconds (Integer .parseInt (t )));
@@ -69,6 +86,7 @@ private void waitUntilDownloadStarted(FileReferenceDownload fileReferenceDownloa
6986 Instant end = Instant .now ().plus (downloadTimeout );
7087 FileReference fileReference = fileReferenceDownload .fileReference ();
7188 int retryCount = 0 ;
89+ int timeoutCount = 0 ;
7290 Connection connection = connectionPool .getCurrent ();
7391 do {
7492 if (retryCount > 0 )
@@ -81,8 +99,19 @@ private void waitUntilDownloadStarted(FileReferenceDownload fileReferenceDownloa
8199 var timeout = rpcTimeout .orElse (Duration .between (Instant .now (), end ));
82100 log .log (Level .FINE , "Wait until download of " + fileReference + " has started, retryCount " + retryCount +
83101 ", timeout " + timeout + " (request from " + fileReferenceDownload .client () + ")" );
84- if ( ! timeout .isNegative () && startDownloadRpc (fileReferenceDownload , retryCount , connection , timeout ))
85- return ;
102+ if ( ! timeout .isNegative ()) {
103+ var result = startDownloadRpc (fileReferenceDownload , retryCount , connection , timeout );
104+ if (result == DownloadResult .SUCCESS ) return ;
105+ if (result == DownloadResult .TIMEOUT && maxTimeoutsBeforeClose > 0 ) {
106+ timeoutCount ++;
107+ if (timeoutCount >= maxTimeoutsBeforeClose ) {
108+ log .log (Level .INFO , "RPC request for " + fileReference + " timed out " + timeoutCount +
109+ " times, closing connection to " + connection .getAddress ());
110+ connection .closeConnection ();
111+ timeoutCount = 0 ;
112+ }
113+ }
114+ }
86115
87116 retryCount ++;
88117 // There might not be one connection that works for all file references (each file reference might
@@ -131,10 +160,13 @@ void startDownloadFromSource(FileReferenceDownload fileReferenceDownload, Spec s
131160
132161 log .log (Level .FINE , () -> "Will download " + fileReference + " with timeout " + downloadTimeout + " from " + spec .host ());
133162 downloads .add (fileReferenceDownload );
134- var downloading = startDownloadRpc (fileReferenceDownload , 1 , connection , downloadTimeout );
163+ var result = startDownloadRpc (fileReferenceDownload , 1 , connection , downloadTimeout );
164+ if (result == DownloadResult .TIMEOUT && maxTimeoutsBeforeClose > 0 ) {
165+ connection .closeConnection ();
166+ }
135167 // Need to explicitly remove from downloads if downloading has not started.
136168 // If downloading *has* started FileReceiver will take care of that when download has completed or failed
137- if ( ! downloading )
169+ if (result != DownloadResult . SUCCESS )
138170 downloads .remove (fileReference );
139171 });
140172 }
@@ -144,7 +176,7 @@ void failedDownloading(FileReference fileReference) {
144176 downloads .remove (fileReference );
145177 }
146178
147- private boolean startDownloadRpc (FileReferenceDownload fileReferenceDownload , int retryCount , Connection connection , Duration timeout ) {
179+ private DownloadResult startDownloadRpc (FileReferenceDownload fileReferenceDownload , int retryCount , Connection connection , Duration timeout ) {
148180 Request request = createRequest (fileReferenceDownload );
149181 connection .invokeSync (request , timeout );
150182
@@ -157,18 +189,18 @@ private boolean startDownloadRpc(FileReferenceDownload fileReferenceDownload, in
157189
158190 if (errorCode == 0 ) {
159191 log .log (Level .FINE , () -> "Found " + fileReference + " available at " + address );
160- return true ;
192+ return DownloadResult . SUCCESS ;
161193 } else {
162194 var error = FileApiErrorCodes .get (errorCode );
163195 log .log (logLevel , "Downloading " + fileReference + " from " + address + " failed (" + error + ")" );
164- return false ;
196+ return DownloadResult . FAILURE ;
165197 }
166198 } else {
167199 log .log (logLevel , "Downloading " + fileReference + " from " + address +
168200 " (client " + fileReferenceDownload .client () + ") failed:" +
169201 " error code " + request .errorCode () + " (" + request .errorMessage () + ")." +
170202 " (retry " + retryCount + ", rpc timeout " + timeout + ")" );
171- return false ;
203+ return request . errorCode () == ErrorCode . TIMEOUT ? DownloadResult . TIMEOUT : DownloadResult . FAILURE ;
172204 }
173205 }
174206
0 commit comments