42
42
43
43
import javax .annotation .PostConstruct ;
44
44
45
- import org .apache .commons .io .FilenameUtils ;
46
45
import org .apache .commons .io .IOUtils ;
47
- import org .apache .http .HttpEntity ;
48
46
import org .apache .http .HttpStatus ;
49
47
import org .apache .http .client .methods .CloseableHttpResponse ;
50
48
import org .apache .http .client .methods .HttpGet ;
51
- import org .apache .http .client .methods .HttpPost ;
52
49
import org .apache .http .client .methods .HttpUriRequest ;
53
50
import org .apache .http .client .utils .URIBuilder ;
54
- import org .apache .http .entity .mime .MultipartEntityBuilder ;
55
- import org .apache .http .entity .mime .content .InputStreamBody ;
56
51
import org .apache .http .impl .client .CloseableHttpClient ;
57
52
import org .apache .jena .rdf .model .Model ;
58
53
import org .apache .jena .rdf .model .Resource ;
@@ -96,8 +91,9 @@ public class ExtractTechnicalMetadataJob extends AbstractConcurrentDepositJob {
96
91
private String baseFitsUri ;
97
92
// URI to the examine servlet in the FITS application
98
93
private URI fitsExamineUri ;
99
-
100
- private boolean processFilesLocally ;
94
+ private String fitsHomePath ;
95
+ private Path fitsCommandPath ;
96
+ private int maxFileSizeForWebService ;
101
97
102
98
private Model model ;
103
99
@@ -109,6 +105,7 @@ public ExtractTechnicalMetadataJob(String uuid, String depositUUID) {
109
105
public void initJob () {
110
106
init ();
111
107
fitsExamineUri = URI .create (URIUtil .join (baseFitsUri , FITS_EXAMINE_PATH ));
108
+ fitsCommandPath = Paths .get (fitsHomePath , "fits.sh" );
112
109
}
113
110
114
111
@ Override
@@ -318,7 +315,6 @@ private void overrideDepositMimetype(String fitsExtractMimetype) {
318
315
* @return
319
316
*/
320
317
private Document getFitsDocument (PID objPid , String stagedUriString ) {
321
- HttpUriRequest request ;
322
318
URI stagedUri = URI .create (stagedUriString );
323
319
Path stagedPath ;
324
320
if (!stagedUri .isAbsolute ()) {
@@ -328,45 +324,41 @@ private Document getFitsDocument(PID objPid, String stagedUriString) {
328
324
stagedPath = Paths .get (stagedUri );
329
325
}
330
326
331
- Path sanitizedPath = null ;
332
- // FITS cannot currently handle file paths that contain unicode characters, so need to upload
333
- if (processFilesLocally ) {
334
- // Files are available locally to FITS, so just pass along path
335
- URI fitsUri = null ;
336
- try {
337
- sanitizedPath = sanitizePath (stagedPath );
338
- URIBuilder builder = new URIBuilder (fitsExamineUri );
339
- builder .addParameter ("file" , (sanitizedPath == null ? stagedPath : sanitizedPath ).toString ());
340
- fitsUri = builder .build ();
341
-
342
- log .debug ("Requesting FITS document for {} using local file via URI {}" , objPid , fitsUri );
343
- } catch (URISyntaxException e ) {
344
- failJob (e , "Failed to construct FITs report uri for {0}" , objPid );
345
- } catch (IOException e ) {
346
- failJob (e , "Failed to create symbolic link to file for extract {0} for {1}" , stagedPath , objPid );
347
- }
348
-
349
- request = new HttpGet (fitsUri );
327
+ if (shouldProcessWithWebService (stagedPath )) {
328
+ return extractUsingWebService (objPid , stagedPath );
350
329
} else {
351
- // Files are to be processed remotely, so upload them via a post request
352
- HttpEntity entity ;
353
- try {
354
- entity = MultipartEntityBuilder .create ()
355
- .addPart ("datafile" , new InputStreamBody (Files .newInputStream (stagedPath ),
356
- stagedPath .getFileName ().toString ()))
357
- .build ();
358
- } catch (IOException e ) {
359
- failJob (e , "Unable to read file {0}" , stagedPath );
360
- return null ;
361
- }
330
+ return extractUsingCLI (objPid , stagedPath );
331
+ }
332
+ }
362
333
363
- HttpPost postRequest = new HttpPost (fitsExamineUri );
364
- postRequest .setEntity (entity );
365
- request = postRequest ;
334
+ private boolean shouldProcessWithWebService (Path path ) {
335
+ // FITS cannot currently handle file paths that contain unicode characters
336
+ if (!CharMatcher .ascii ().matchesAllOf (path .toString ())) {
337
+ return false ;
338
+ }
339
+ try {
340
+ return Files .size (path ) <= maxFileSizeForWebService ;
341
+ } catch (IOException e ) {
342
+ failJob (e , "Unable to inspect file" );
343
+ }
344
+ return false ;
345
+ }
366
346
367
- log .debug ("Requesting FITS document for {} using remote file from {}" , objPid , stagedPath );
347
+ private Document extractUsingWebService (PID objPid , Path stagedPath ) {
348
+ // Files are available locally to FITS, so just pass along path
349
+ URI fitsUri = null ;
350
+ try {
351
+ URIBuilder builder = new URIBuilder (fitsExamineUri );
352
+ builder .addParameter ("file" , stagedPath .toString ());
353
+ fitsUri = builder .build ();
354
+
355
+ log .debug ("Requesting FITS document for {} using local file via URI {}" , objPid , fitsUri );
356
+ } catch (URISyntaxException e ) {
357
+ failJob (e , "Failed to construct FITs report uri for {0}" , objPid );
368
358
}
369
359
360
+ HttpUriRequest request = new HttpGet (fitsUri );
361
+
370
362
try (CloseableHttpResponse resp = httpClient .execute (request )) {
371
363
// Write the report response to file
372
364
InputStream respBodyStream = resp .getEntity ().getContent ();
@@ -380,32 +372,22 @@ private Document getFitsDocument(PID objPid, String stagedUriString) {
380
372
} catch (IOException | JDOMException e ) {
381
373
failJob (e , "Failed to stream report for {0} from server to report document" ,
382
374
objPid );
383
- } finally {
384
- // Cleanup symbolic link if one was created
385
- if (sanitizedPath != null ) {
386
- try {
387
- Files .deleteIfExists (sanitizedPath );
388
- } catch (IOException e ) {
389
- log .warn ("Failed to cleanup sanitized path {}: {}" , sanitizedPath , e .getMessage ());
390
- }
391
- }
392
375
}
393
376
return null ;
394
377
}
395
378
396
- private Path sanitizePath (Path path ) throws IOException {
397
- if (CharMatcher .ascii ().matchesAllOf (path .toString ())) {
398
- return null ;
399
- }
400
- String ext = FilenameUtils .getExtension (path .getFileName ().toString ());
401
- if (!ext .equals ("" )) {
402
- ext = "." + ext ;
379
+ private Document extractUsingCLI (PID objPid , Path stagedPath ) {
380
+ try {
381
+ Process process = Runtime .getRuntime ().exec (fitsCommandPath + " -i " + stagedPath );
382
+ if (process .waitFor () != 0 ) {
383
+ failJob (null , "Failed to generate report for {0}, script returned {1} with output:\n {2}" ,
384
+ objPid , process .exitValue (), IOUtils .toString (process .getInputStream (), UTF_8 ));
385
+ }
386
+ return createSAXBuilder ().build (process .getInputStream ());
387
+ } catch (IOException | JDOMException | InterruptedException e ) {
388
+ failJob (e , "Failed to generate report for {0}" , objPid );
403
389
}
404
- // Get a temp path for the symbolic link to be created at, using the same extension as the original
405
- Path linkPath = Files .createTempFile ("extract" , ext );
406
- Files .delete (linkPath );
407
- Files .createSymbolicLink (linkPath , path );
408
- return linkPath ;
390
+ return null ;
409
391
}
410
392
411
393
/**
@@ -577,7 +559,11 @@ public void setBaseFitsUri(String baseFitsUri) {
577
559
this .baseFitsUri = baseFitsUri ;
578
560
}
579
561
580
- public void setProcessFilesLocally (boolean processFilesLocally ) {
581
- this .processFilesLocally = processFilesLocally ;
562
+ public void setFitsHomePath (String fitsHomePath ) {
563
+ this .fitsHomePath = fitsHomePath ;
564
+ }
565
+
566
+ public void setMaxFileSizeForWebService (int maxFileSizeForWebService ) {
567
+ this .maxFileSizeForWebService = maxFileSizeForWebService ;
582
568
}
583
569
}
0 commit comments