66import edu .unc .lib .boxc .deposit .work .AbstractConcurrentDepositJob ;
77import edu .unc .lib .boxc .deposit .work .JobFailedException ;
88import edu .unc .lib .boxc .deposit .work .JobInterruptedException ;
9+ import edu .unc .lib .boxc .model .api .exceptions .RepositoryException ;
910import edu .unc .lib .boxc .model .api .ids .PID ;
1011import edu .unc .lib .boxc .model .api .rdf .CdrDeposit ;
1112import edu .unc .lib .boxc .model .fcrepo .ids .DatastreamPids ;
4344import java .nio .file .Paths ;
4445import java .util .ArrayList ;
4546import java .util .Arrays ;
47+ import java .util .Comparator ;
4648import java .util .HashSet ;
4749import java .util .Iterator ;
4850import java .util .List ;
4951import java .util .Map .Entry ;
5052import java .util .Objects ;
5153import java .util .Set ;
5254import java .util .regex .Pattern ;
55+ import java .util .stream .Collectors ;
5356
5457import static edu .unc .lib .boxc .common .xml .SecureXMLFactory .createSAXBuilder ;
5558import static edu .unc .lib .boxc .model .api .rdf .CdrDeposit .mimetype ;
@@ -73,7 +76,7 @@ public class ExtractTechnicalMetadataJob extends AbstractConcurrentDepositJob {
7376
7477 private static final String FITS_SINGLE_STATUS = "SINGLE_RESULT" ;
7578 private static final String FITS_EXAMINE_PATH = "examine" ;
76- private static final Path TMP_PATH = Paths . get ( System . getProperty ( "java.io.tmpdir" )) ;
79+ private static final String MIMETYPE_ATTR = "mimetype" ;
7780
7881 private CloseableHttpClient httpClient ;
7982
@@ -215,10 +218,12 @@ public void run() {
215218
216219 // Symlink the file before processing
217220 Path linkPath = makeSymlinkForStagedPath (stagedPath , providedLabel );
218- // Generate the FITS report as a document
219- Document fitsDoc = getFitsDocument (objPid , linkPath );
220221
222+ Document fitsDoc = null ;
221223 try {
224+ // Generate the FITS report as a document
225+ fitsDoc = getFitsDocument (objPid , linkPath );
226+
222227 // Create the PREMIS report wrapper for the FITS results
223228 Document premisDoc = generatePremisReport (objPid , fitsDoc );
224229 Element premisObjCharsEl = getObjectCharacteristics (premisDoc );
@@ -234,11 +239,11 @@ public void run() {
234239 writePremisReport (objPid , premisDoc );
235240
236241 receiveResult (result );
237- } catch (JobFailedException | JobInterruptedException e ) {
242+ } catch (JobFailedException | JobInterruptedException | RepositoryException e ) {
238243 throw e ;
239244 } catch (Exception e ) {
240245 failJob (e , "Failed to extract FITS details for file '{0}' with id {1} from document:\n {2}" ,
241- stagedPath , objPid .getId (), getXMLOutputter ().outputString (fitsDoc ));
246+ stagedPath , objPid .getId (), fitsDoc != null ? getXMLOutputter ().outputString (fitsDoc ) : "null" );
242247 } finally {
243248 try {
244249 Files .delete (linkPath );
@@ -263,7 +268,7 @@ private void addFileIdentification(Document fitsDoc, Element premisObjCharsEl) {
263268 String fitsMimetype = null ;
264269 String format ;
265270 if (identity != null ) {
266- fitsMimetype = identity .getAttributeValue ("mimetype" );
271+ fitsMimetype = identity .getAttributeValue (MIMETYPE_ATTR );
267272 format = identity .getAttributeValue ("format" );
268273 } else {
269274 format = "Unknown" ;
@@ -347,15 +352,18 @@ protected Path makeSymlinkForStagedPath(String stagedUriString, String label) {
347352 // Resolve the path from a URI and make it absolute
348353 URI stagedUri = URI .create (stagedUriString );
349354 Path stagedPath ;
355+ File depositDirectory = getDepositDirectory ();
350356 if (!stagedUri .isAbsolute ()) {
351- stagedPath = Paths .get (getDepositDirectory () .toString (), stagedUriString );
357+ stagedPath = Paths .get (depositDirectory .toString (), stagedUriString );
352358 } else {
353359 stagedPath = Paths .get (stagedUri );
354360 }
355361 try {
362+ // Create a unique parent directory for the symlink to avoid filename conflicts
363+ var parentDir = Files .createTempDirectory (depositDirectory .toPath (), "fits_staging" );
364+ // Assign the same permissions as the parent directory to the temp dir, since createTempDirectory is restrictive
365+ Files .setPosixFilePermissions (parentDir , Files .getPosixFilePermissions (parentDir .getParent ()));
356366 // Create a symlink to the file to make use of the original filename and avoid issues with non-ascii characters
357- var parentDir = TMP_PATH .resolve (Long .toString (System .nanoTime ()));
358- Files .createDirectories (parentDir );
359367 String symlinkName = label != null ? label : stagedPath .getFileName ().toString ();
360368 var linkPath = sanitizeCliPath (parentDir .resolve (symlinkName ));
361369 Files .createSymbolicLink (linkPath , stagedPath );
@@ -511,19 +519,22 @@ private Element getFitsIdentificationInformation(Document fitsDoc) {
511519 return null ;
512520 }
513521
514- // Conflicting identification from FITS, try to resolve
515- // Don't trust Exiftool if it detects a symlink, which is does not follow to the file.
516- // Trust any answer agreed on by multiple tools
517- for (Element el : identification .getChildren ("identity" , FITS_NS )) {
518- if (el .getChildren ("tool" , FITS_NS ).size () > 1
519- || !("Exiftool" .equals (el .getChild ("tool" , FITS_NS ).getAttributeValue ("toolname" ))
520- && "application/x-symlink" .equals (el .getAttributeValue ("mimetype" )))) {
521- return el ;
522- }
523- }
522+ // Sort the identification elements to find the best value returned by FITS
523+ var identityEls = identification .getChildren ("identity" , FITS_NS ).stream ()
524+ // Filter out any invalid entries
525+ .filter (el -> MimetypeHelpers .isValidMimetype (el .getAttributeValue (MIMETYPE_ATTR )))
526+ // Primarily sort by the best ranking mimetype
527+ .sorted (Comparator .comparingInt ((Element el ) -> rankMimetype (el .getAttributeValue (MIMETYPE_ATTR )))
528+ // Then rank by the number of tools that agreed on the mimetype
529+ .thenComparingInt (el -> el .getChildren ("tool" , FITS_NS ).size ())
530+ // Reverse so both rank and tool count is in descending order
531+ .reversed ()
532+ // And then favor more application specific mimetypes
533+ .thenComparingInt (el -> el .getAttributeValue (MIMETYPE_ATTR ).contains ("x-" ) ? -1 : 0 ))
534+ .collect (Collectors .toList ());
535+ // Return the best ranking identification, or null if none are valid
536+ return identityEls .isEmpty () ? null : identityEls .get (0 );
524537 }
525-
526- return null ;
527538 }
528539
529540 private int rankMimetype (String mimetype ) {
0 commit comments