6
6
import edu .unc .lib .boxc .deposit .work .AbstractConcurrentDepositJob ;
7
7
import edu .unc .lib .boxc .deposit .work .JobFailedException ;
8
8
import edu .unc .lib .boxc .deposit .work .JobInterruptedException ;
9
+ import edu .unc .lib .boxc .model .api .exceptions .RepositoryException ;
9
10
import edu .unc .lib .boxc .model .api .ids .PID ;
10
11
import edu .unc .lib .boxc .model .api .rdf .CdrDeposit ;
11
12
import edu .unc .lib .boxc .model .fcrepo .ids .DatastreamPids ;
43
44
import java .nio .file .Paths ;
44
45
import java .util .ArrayList ;
45
46
import java .util .Arrays ;
47
+ import java .util .Comparator ;
46
48
import java .util .HashSet ;
47
49
import java .util .Iterator ;
48
50
import java .util .List ;
49
51
import java .util .Map .Entry ;
50
52
import java .util .Objects ;
51
53
import java .util .Set ;
52
54
import java .util .regex .Pattern ;
55
+ import java .util .stream .Collectors ;
53
56
54
57
import static edu .unc .lib .boxc .common .xml .SecureXMLFactory .createSAXBuilder ;
55
58
import static edu .unc .lib .boxc .model .api .rdf .CdrDeposit .mimetype ;
@@ -73,7 +76,7 @@ public class ExtractTechnicalMetadataJob extends AbstractConcurrentDepositJob {
73
76
74
77
private static final String FITS_SINGLE_STATUS = "SINGLE_RESULT" ;
75
78
private static final String FITS_EXAMINE_PATH = "examine" ;
76
- private static final Path TMP_PATH = Paths . get ( System . getProperty ( "java.io.tmpdir" )) ;
79
+ private static final String MIMETYPE_ATTR = "mimetype" ;
77
80
78
81
private CloseableHttpClient httpClient ;
79
82
@@ -215,10 +218,12 @@ public void run() {
215
218
216
219
// Symlink the file before processing
217
220
Path linkPath = makeSymlinkForStagedPath (stagedPath , providedLabel );
218
- // Generate the FITS report as a document
219
- Document fitsDoc = getFitsDocument (objPid , linkPath );
220
221
222
+ Document fitsDoc = null ;
221
223
try {
224
+ // Generate the FITS report as a document
225
+ fitsDoc = getFitsDocument (objPid , linkPath );
226
+
222
227
// Create the PREMIS report wrapper for the FITS results
223
228
Document premisDoc = generatePremisReport (objPid , fitsDoc );
224
229
Element premisObjCharsEl = getObjectCharacteristics (premisDoc );
@@ -234,11 +239,11 @@ public void run() {
234
239
writePremisReport (objPid , premisDoc );
235
240
236
241
receiveResult (result );
237
- } catch (JobFailedException | JobInterruptedException e ) {
242
+ } catch (JobFailedException | JobInterruptedException | RepositoryException e ) {
238
243
throw e ;
239
244
} catch (Exception e ) {
240
245
failJob (e , "Failed to extract FITS details for file '{0}' with id {1} from document:\n {2}" ,
241
- stagedPath , objPid .getId (), getXMLOutputter ().outputString (fitsDoc ));
246
+ stagedPath , objPid .getId (), fitsDoc != null ? getXMLOutputter ().outputString (fitsDoc ) : "null" );
242
247
} finally {
243
248
try {
244
249
Files .delete (linkPath );
@@ -263,7 +268,7 @@ private void addFileIdentification(Document fitsDoc, Element premisObjCharsEl) {
263
268
String fitsMimetype = null ;
264
269
String format ;
265
270
if (identity != null ) {
266
- fitsMimetype = identity .getAttributeValue ("mimetype" );
271
+ fitsMimetype = identity .getAttributeValue (MIMETYPE_ATTR );
267
272
format = identity .getAttributeValue ("format" );
268
273
} else {
269
274
format = "Unknown" ;
@@ -347,15 +352,18 @@ protected Path makeSymlinkForStagedPath(String stagedUriString, String label) {
347
352
// Resolve the path from a URI and make it absolute
348
353
URI stagedUri = URI .create (stagedUriString );
349
354
Path stagedPath ;
355
+ File depositDirectory = getDepositDirectory ();
350
356
if (!stagedUri .isAbsolute ()) {
351
- stagedPath = Paths .get (getDepositDirectory () .toString (), stagedUriString );
357
+ stagedPath = Paths .get (depositDirectory .toString (), stagedUriString );
352
358
} else {
353
359
stagedPath = Paths .get (stagedUri );
354
360
}
355
361
try {
362
+ // Create a unique parent directory for the symlink to avoid filename conflicts
363
+ var parentDir = Files .createTempDirectory (depositDirectory .toPath (), "fits_staging" );
364
+ // Assign the same permissions as the parent directory to the temp dir, since createTempDirectory is restrictive
365
+ Files .setPosixFilePermissions (parentDir , Files .getPosixFilePermissions (parentDir .getParent ()));
356
366
// Create a symlink to the file to make use of the original filename and avoid issues with non-ascii characters
357
- var parentDir = TMP_PATH .resolve (Long .toString (System .nanoTime ()));
358
- Files .createDirectories (parentDir );
359
367
String symlinkName = label != null ? label : stagedPath .getFileName ().toString ();
360
368
var linkPath = sanitizeCliPath (parentDir .resolve (symlinkName ));
361
369
Files .createSymbolicLink (linkPath , stagedPath );
@@ -511,19 +519,22 @@ private Element getFitsIdentificationInformation(Document fitsDoc) {
511
519
return null ;
512
520
}
513
521
514
- // Conflicting identification from FITS, try to resolve
515
- // Don't trust Exiftool if it detects a symlink, which is does not follow to the file.
516
- // Trust any answer agreed on by multiple tools
517
- for (Element el : identification .getChildren ("identity" , FITS_NS )) {
518
- if (el .getChildren ("tool" , FITS_NS ).size () > 1
519
- || !("Exiftool" .equals (el .getChild ("tool" , FITS_NS ).getAttributeValue ("toolname" ))
520
- && "application/x-symlink" .equals (el .getAttributeValue ("mimetype" )))) {
521
- return el ;
522
- }
523
- }
522
+ // Sort the identification elements to find the best value returned by FITS
523
+ var identityEls = identification .getChildren ("identity" , FITS_NS ).stream ()
524
+ // Filter out any invalid entries
525
+ .filter (el -> MimetypeHelpers .isValidMimetype (el .getAttributeValue (MIMETYPE_ATTR )))
526
+ // Primarily sort by the best ranking mimetype
527
+ .sorted (Comparator .comparingInt ((Element el ) -> rankMimetype (el .getAttributeValue (MIMETYPE_ATTR )))
528
+ // Then rank by the number of tools that agreed on the mimetype
529
+ .thenComparingInt (el -> el .getChildren ("tool" , FITS_NS ).size ())
530
+ // Reverse so both rank and tool count is in descending order
531
+ .reversed ()
532
+ // And then favor more application specific mimetypes
533
+ .thenComparingInt (el -> el .getAttributeValue (MIMETYPE_ATTR ).contains ("x-" ) ? -1 : 0 ))
534
+ .collect (Collectors .toList ());
535
+ // Return the best ranking identification, or null if none are valid
536
+ return identityEls .isEmpty () ? null : identityEls .get (0 );
524
537
}
525
-
526
- return null ;
527
538
}
528
539
529
540
private int rankMimetype (String mimetype ) {
0 commit comments