Skip to content

Commit 8d62721

Browse files
authored
FITS extract to be performed using commandline utility instead of webservice under certain conditions, currently based on characters in file path and size of the file (#1284)
1 parent 38e61fb commit 8d62721

File tree

3 files changed

+123
-102
lines changed

3 files changed

+123
-102
lines changed

deposit/src/main/java/edu/unc/lib/deposit/validate/ExtractTechnicalMetadataJob.java

+51-65
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,12 @@
4242

4343
import javax.annotation.PostConstruct;
4444

45-
import org.apache.commons.io.FilenameUtils;
4645
import org.apache.commons.io.IOUtils;
47-
import org.apache.http.HttpEntity;
4846
import org.apache.http.HttpStatus;
4947
import org.apache.http.client.methods.CloseableHttpResponse;
5048
import org.apache.http.client.methods.HttpGet;
51-
import org.apache.http.client.methods.HttpPost;
5249
import org.apache.http.client.methods.HttpUriRequest;
5350
import org.apache.http.client.utils.URIBuilder;
54-
import org.apache.http.entity.mime.MultipartEntityBuilder;
55-
import org.apache.http.entity.mime.content.InputStreamBody;
5651
import org.apache.http.impl.client.CloseableHttpClient;
5752
import org.apache.jena.rdf.model.Model;
5853
import org.apache.jena.rdf.model.Resource;
@@ -96,8 +91,9 @@ public class ExtractTechnicalMetadataJob extends AbstractConcurrentDepositJob {
9691
private String baseFitsUri;
9792
// URI to the examine servlet in the FITS application
9893
private URI fitsExamineUri;
99-
100-
private boolean processFilesLocally;
94+
private String fitsHomePath;
95+
private Path fitsCommandPath;
96+
private int maxFileSizeForWebService;
10197

10298
private Model model;
10399

@@ -109,6 +105,7 @@ public ExtractTechnicalMetadataJob(String uuid, String depositUUID) {
109105
public void initJob() {
110106
init();
111107
fitsExamineUri = URI.create(URIUtil.join(baseFitsUri, FITS_EXAMINE_PATH));
108+
fitsCommandPath = Paths.get(fitsHomePath, "fits.sh");
112109
}
113110

114111
@Override
@@ -318,7 +315,6 @@ private void overrideDepositMimetype(String fitsExtractMimetype) {
318315
* @return
319316
*/
320317
private Document getFitsDocument(PID objPid, String stagedUriString) {
321-
HttpUriRequest request;
322318
URI stagedUri = URI.create(stagedUriString);
323319
Path stagedPath;
324320
if (!stagedUri.isAbsolute()) {
@@ -328,45 +324,41 @@ private Document getFitsDocument(PID objPid, String stagedUriString) {
328324
stagedPath = Paths.get(stagedUri);
329325
}
330326

331-
Path sanitizedPath = null;
332-
// FITS cannot currently handle file paths that contain unicode characters, so need to upload
333-
if (processFilesLocally) {
334-
// Files are available locally to FITS, so just pass along path
335-
URI fitsUri = null;
336-
try {
337-
sanitizedPath = sanitizePath(stagedPath);
338-
URIBuilder builder = new URIBuilder(fitsExamineUri);
339-
builder.addParameter("file", (sanitizedPath == null ? stagedPath : sanitizedPath).toString());
340-
fitsUri = builder.build();
341-
342-
log.debug("Requesting FITS document for {} using local file via URI {}", objPid, fitsUri);
343-
} catch (URISyntaxException e) {
344-
failJob(e, "Failed to construct FITs report uri for {0}", objPid);
345-
} catch (IOException e) {
346-
failJob(e, "Failed to create symbolic link to file for extract {0} for {1}", stagedPath, objPid);
347-
}
348-
349-
request = new HttpGet(fitsUri);
327+
if (shouldProcessWithWebService(stagedPath)) {
328+
return extractUsingWebService(objPid, stagedPath);
350329
} else {
351-
// Files are to be processed remotely, so upload them via a post request
352-
HttpEntity entity;
353-
try {
354-
entity = MultipartEntityBuilder.create()
355-
.addPart("datafile", new InputStreamBody(Files.newInputStream(stagedPath),
356-
stagedPath.getFileName().toString()))
357-
.build();
358-
} catch (IOException e) {
359-
failJob(e, "Unable to read file {0}", stagedPath);
360-
return null;
361-
}
330+
return extractUsingCLI(objPid, stagedPath);
331+
}
332+
}
362333

363-
HttpPost postRequest = new HttpPost(fitsExamineUri);
364-
postRequest.setEntity(entity);
365-
request = postRequest;
334+
private boolean shouldProcessWithWebService(Path path) {
335+
// FITS cannot currently handle file paths that contain unicode characters
336+
if (!CharMatcher.ascii().matchesAllOf(path.toString())) {
337+
return false;
338+
}
339+
try {
340+
return Files.size(path) <= maxFileSizeForWebService;
341+
} catch (IOException e) {
342+
failJob(e, "Unable to inspect file");
343+
}
344+
return false;
345+
}
366346

367-
log.debug("Requesting FITS document for {} using remote file from {}", objPid, stagedPath);
347+
private Document extractUsingWebService(PID objPid, Path stagedPath) {
348+
// Files are available locally to FITS, so just pass along path
349+
URI fitsUri = null;
350+
try {
351+
URIBuilder builder = new URIBuilder(fitsExamineUri);
352+
builder.addParameter("file", stagedPath.toString());
353+
fitsUri = builder.build();
354+
355+
log.debug("Requesting FITS document for {} using local file via URI {}", objPid, fitsUri);
356+
} catch (URISyntaxException e) {
357+
failJob(e, "Failed to construct FITs report uri for {0}", objPid);
368358
}
369359

360+
HttpUriRequest request = new HttpGet(fitsUri);
361+
370362
try (CloseableHttpResponse resp = httpClient.execute(request)) {
371363
// Write the report response to file
372364
InputStream respBodyStream = resp.getEntity().getContent();
@@ -380,32 +372,22 @@ private Document getFitsDocument(PID objPid, String stagedUriString) {
380372
} catch (IOException | JDOMException e) {
381373
failJob(e, "Failed to stream report for {0} from server to report document",
382374
objPid);
383-
} finally {
384-
// Cleanup symbolic link if one was created
385-
if (sanitizedPath != null) {
386-
try {
387-
Files.deleteIfExists(sanitizedPath);
388-
} catch (IOException e) {
389-
log.warn("Failed to cleanup sanitized path {}: {}", sanitizedPath, e.getMessage());
390-
}
391-
}
392375
}
393376
return null;
394377
}
395378

396-
private Path sanitizePath(Path path) throws IOException {
397-
if (CharMatcher.ascii().matchesAllOf(path.toString())) {
398-
return null;
399-
}
400-
String ext = FilenameUtils.getExtension(path.getFileName().toString());
401-
if (!ext.equals("")) {
402-
ext = "." + ext;
379+
private Document extractUsingCLI(PID objPid, Path stagedPath) {
380+
try {
381+
Process process = Runtime.getRuntime().exec(fitsCommandPath + " -i " + stagedPath);
382+
if (process.waitFor() != 0) {
383+
failJob(null, "Failed to generate report for {0}, script returned {1} with output:\n{2}",
384+
objPid, process.exitValue(), IOUtils.toString(process.getInputStream(), UTF_8));
385+
}
386+
return createSAXBuilder().build(process.getInputStream());
387+
} catch (IOException | JDOMException | InterruptedException e) {
388+
failJob(e, "Failed to generate report for {0}", objPid);
403389
}
404-
// Get a temp path for the symbolic link to be created at, using the same extension as the original
405-
Path linkPath = Files.createTempFile("extract", ext);
406-
Files.delete(linkPath);
407-
Files.createSymbolicLink(linkPath, path);
408-
return linkPath;
390+
return null;
409391
}
410392

411393
/**
@@ -577,7 +559,11 @@ public void setBaseFitsUri(String baseFitsUri) {
577559
this.baseFitsUri = baseFitsUri;
578560
}
579561

580-
public void setProcessFilesLocally(boolean processFilesLocally) {
581-
this.processFilesLocally = processFilesLocally;
562+
public void setFitsHomePath(String fitsHomePath) {
563+
this.fitsHomePath = fitsHomePath;
564+
}
565+
566+
public void setMaxFileSizeForWebService(int maxFileSizeForWebService) {
567+
this.maxFileSizeForWebService = maxFileSizeForWebService;
582568
}
583569
}

deposit/src/main/webapp/WEB-INF/deposit-jobs-context.xml

+2-1
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,8 @@
265265
scope="prototype">
266266
<property name="baseFitsUri" value="${fits.baseUri}" />
267267
<property name="httpClient" ref="fitsPooledHttpClient" />
268-
<property name="processFilesLocally" value="${fits.processFilesLocally:true}" />
268+
<property name="maxFileSizeForWebService" value="${fits.maxFileSizeForWebService:128000000}" />
269+
<property name="fitsHomePath" value="${fits.homePath}" />
269270
<property name="executorService" ref="extractTechnicalMetadataExecutor" />
270271
<property name="flushRate" value="${job.extractTechnicalMetadata.flushRate:2000}" />
271272
<property name="maxQueuedJobs" value="${job.extractTechnicalMetadata.maxQueuedJobs:6}" />

0 commit comments

Comments
 (0)