7
7
import edu .unc .lib .boxc .deposit .work .JobFailedException ;
8
8
import edu .unc .lib .boxc .deposit .work .JobInterruptedException ;
9
9
import edu .unc .lib .boxc .model .api .ids .PID ;
10
+ import edu .unc .lib .boxc .model .api .rdf .CdrDeposit ;
10
11
import edu .unc .lib .boxc .model .fcrepo .ids .DatastreamPids ;
11
12
import edu .unc .lib .boxc .model .fcrepo .ids .PIDs ;
12
13
import org .apache .commons .io .FilenameUtils ;
@@ -124,8 +125,10 @@ public void runJob() {
124
125
String stagedPath = stagedPair .getValue ();
125
126
PID originalPid = DatastreamPids .getOriginalFilePid (objPid );
126
127
final String providedMimetype = getProvidedMimetype (originalPid , model );
128
+ final String providedLabel = getProvidedLabel (objPid , model );
127
129
128
- submitTask (new ExtractTechnicalMetadataRunnable (objPid , originalPid , stagedPath , providedMimetype ));
130
+ submitTask (new ExtractTechnicalMetadataRunnable (objPid , originalPid , stagedPath ,
131
+ providedMimetype , providedLabel ));
129
132
}
130
133
131
134
waitForCompletion ();
@@ -144,6 +147,16 @@ private String getProvidedMimetype(PID originalPid, Model model) {
144
147
}
145
148
}
146
149
150
+ private String getProvidedLabel (PID filePid , Model model ) {
151
+ Resource fileResc = model .getResource (filePid .getRepositoryPath ());
152
+ Statement labelStmt = fileResc .getProperty (CdrDeposit .label );
153
+ if (labelStmt != null ) {
154
+ return labelStmt .getString ();
155
+ } else {
156
+ return null ;
157
+ }
158
+ }
159
+
147
160
@ Override
148
161
protected void registrationAction () {
149
162
List <Object > results = new ArrayList <>();
@@ -177,14 +190,16 @@ private class ExtractTechnicalMetadataRunnable implements Runnable {
177
190
private PID originalPid ;
178
191
private String stagedPath ;
179
192
private String providedMimetype ;
193
+ private String providedLabel ;
180
194
private ExtractTechnicalMetadataResult result = new ExtractTechnicalMetadataResult ();
181
195
182
196
public ExtractTechnicalMetadataRunnable (PID objPid , PID originalPid , String stagedPath ,
183
- String providedMimetype ) {
197
+ String providedMimetype , String providedLabel ) {
184
198
this .objPid = objPid ;
185
199
this .originalPid = originalPid ;
186
200
this .stagedPath = stagedPath ;
187
201
this .providedMimetype = providedMimetype ;
202
+ this .providedLabel = providedLabel ;
188
203
result .objPid = objPid ;
189
204
result .originalPid = originalPid ;
190
205
result .hasProvidedMimetype = providedMimetype != null ;
@@ -198,8 +213,10 @@ public void run() {
198
213
199
214
interruptJobIfStopped ();
200
215
216
+ // Symlink the file before processing
217
+ Path linkPath = makeSymlinkForStagedPath (stagedPath , providedLabel );
201
218
// Generate the FITS report as a document
202
- Document fitsDoc = getFitsDocument (objPid , stagedPath );
219
+ Document fitsDoc = getFitsDocument (objPid , linkPath );
203
220
204
221
try {
205
222
// Create the PREMIS report wrapper for the FITS results
@@ -222,6 +239,13 @@ public void run() {
222
239
} catch (Exception e ) {
223
240
failJob (e , "Failed to extract FITS details for file '{0}' with id {1} from document:\n {2}" ,
224
241
stagedPath , objPid .getId (), getXMLOutputter ().outputString (fitsDoc ));
242
+ } finally {
243
+ try {
244
+ Files .delete (linkPath );
245
+ Files .delete (linkPath .getParent ());
246
+ } catch (IOException e ) {
247
+ log .warn ("Failed to delete symlink" , e );
248
+ }
225
249
}
226
250
}
227
251
@@ -261,7 +285,6 @@ private void addFileIdentification(Document fitsDoc, Element premisObjCharsEl) {
261
285
* Overrides the mimetype for this object in the deposit model when the FITS
262
286
* generated value is preferred.
263
287
*
264
- * @param objResc
265
288
* @param fitsExtractMimetype
266
289
*/
267
290
private void overrideDepositMimetype (String fitsExtractMimetype ) {
@@ -301,31 +324,48 @@ private void overrideDepositMimetype(String fitsExtractMimetype) {
301
324
* XML document
302
325
*
303
326
* @param objPid
327
+ * @param filePath
328
+ * @return
329
+ */
330
+ private Document getFitsDocument (PID objPid , Path filePath ) {
331
+ if (shouldProcessWithWebService (filePath )) {
332
+ return extractUsingWebService (objPid , filePath );
333
+ } else {
334
+ return extractUsingCLI (objPid , filePath );
335
+ }
336
+ }
337
+
338
+ /**
339
+ * Creates a symlink to the provided stagedUri, where the symlink is sanitized of problematic characters
340
+ * and uses the label as the filename to ensure the original file extension is present, if available.
341
+ * @param objPid
304
342
* @param stagedUriString
343
+ * @param label
305
344
* @return
306
345
*/
307
- private Document getFitsDocument (PID objPid , String stagedUriString ) {
346
+ protected Path makeSymlinkForStagedPath (String stagedUriString , String label ) {
347
+ // Resolve the path from a URI and make it absolute
308
348
URI stagedUri = URI .create (stagedUriString );
309
349
Path stagedPath ;
310
350
if (!stagedUri .isAbsolute ()) {
311
351
stagedPath = Paths .get (getDepositDirectory ().toString (), stagedUriString );
312
352
} else {
313
353
stagedPath = Paths .get (stagedUri );
314
354
}
315
-
316
- if (shouldProcessWithWebService (stagedPath )) {
317
- return extractUsingWebService (objPid , stagedPath );
318
- } else {
319
- return extractUsingCLI (objPid , stagedPath );
355
+ try {
356
+ // Create a symlink to the file to make use of the original filename and avoid issues with non-ascii characters
357
+ var parentDir = TMP_PATH .resolve (Long .toString (System .nanoTime ()));
358
+ Files .createDirectories (parentDir );
359
+ String symlinkName = label != null ? label : stagedPath .getFileName ().toString ();
360
+ var linkPath = sanitizeCliPath (parentDir .resolve (symlinkName ));
361
+ Files .createSymbolicLink (linkPath , stagedPath );
362
+ return linkPath ;
363
+ } catch (IOException e ) {
364
+ throw new JobFailedException ("Failed to create symlink for file " + stagedPath , e );
320
365
}
321
366
}
322
367
323
368
private boolean shouldProcessWithWebService (Path path ) {
324
- // FITS cannot currently handle file paths that contain unicode characters
325
- if (!CharMatcher .ascii ().matchesAllOf (path .toString ())) {
326
- log .debug ("File {} not applicable for web service due to unacceptable characters" , path );
327
- return false ;
328
- }
329
369
String filename = path .getFileName ().toString ();
330
370
String extension = FilenameUtils .getExtension (filename ).toLowerCase ();
331
371
if (FILE_EXTS_FOR_CLI .contains (extension )) {
@@ -392,35 +432,9 @@ protected Path sanitizeCliPath(Path stagedPath) {
392
432
return Paths .get (path );
393
433
}
394
434
395
- /**
396
- * Symlink the provided file into the temp directory, inside of parent directory based on the id of the object.
397
- * Filename of the symlink is based off the sanitized form of the path.
398
- * @param objPid
399
- * @param sanitizedPath Sanitized version of the staging path, does not need to resolve to a file
400
- * @param stagedPath Original staging path of the file, must resolve to the file being linked
401
- * @return Symlink path
402
- * @throws IOException
403
- */
404
- protected Path symlinkFile (PID objPid , Path sanitizedPath , Path stagedPath ) throws IOException {
405
- var parentDir = TMP_PATH .resolve (objPid .getId ());
406
- Files .createDirectories (parentDir );
407
- var linkPath = parentDir .resolve (sanitizedPath .getFileName ());
408
- Files .createSymbolicLink (linkPath , stagedPath );
409
- return linkPath ;
410
- }
411
-
412
- private Document extractUsingCLI (PID objPid , Path stagedPath ) {
435
+ private Document extractUsingCLI (PID objPid , Path targetPath ) {
413
436
String stdout = null ;
414
- Path fileLink = null ;
415
437
try {
416
- Path targetPath = stagedPath ;
417
- var sanitizedPath = sanitizeCliPath (stagedPath );
418
- // Create a symlink to the file to avoid problems with non-ascii characters and reserved linux characters
419
- // otherwise there will be encoding mismatches between boxc, the terminal, and FITS.
420
- if (!stagedPath .equals (sanitizedPath )) {
421
- fileLink = symlinkFile (objPid , sanitizedPath , stagedPath );
422
- targetPath = fileLink ;
423
- }
424
438
String [] command = new String [] { fitsCommandPath .toString (), "-i" , targetPath .toString () };
425
439
Process process = Runtime .getRuntime ().exec (command );
426
440
int exitCode = process .waitFor ();
@@ -434,17 +448,7 @@ private Document extractUsingCLI(PID objPid, Path stagedPath) {
434
448
return createSAXBuilder ().build (new ByteArrayInputStream (stdout .getBytes (UTF_8 )));
435
449
} catch (IOException | JDOMException | InterruptedException e ) {
436
450
failJob (e , "Failed to generate report for file {0} with id {1}, output was:\n {2}" ,
437
- stagedPath , objPid .getId (), stdout );
438
- } finally {
439
- // Cleanup symlink and parent directory containing symlink, if a symlink was used
440
- if (fileLink != null ) {
441
- try {
442
- Files .delete (fileLink );
443
- Files .delete (fileLink .getParent ());
444
- } catch (IOException e ) {
445
- log .warn ("Failed to cleanup symlink" , e );
446
- }
447
- }
451
+ targetPath , objPid .getId (), stdout );
448
452
}
449
453
return null ;
450
454
}
0 commit comments