1
1
package edu .unc .lib .deposit .fcrepo3 ;
2
2
3
+ import static edu .unc .lib .deposit .work .DepositGraphUtils .dprop ;
4
+ import static edu .unc .lib .dl .util .ContentModelHelper .Datastream .DATA_FILE ;
5
+
3
6
import java .io .ByteArrayOutputStream ;
4
7
import java .io .File ;
5
8
import java .io .IOException ;
6
9
import java .net .ConnectException ;
7
10
import java .net .URI ;
8
11
import java .net .URISyntaxException ;
12
+ import java .nio .file .Files ;
13
+ import java .nio .file .Paths ;
9
14
import java .util .ArrayDeque ;
10
15
import java .util .ArrayList ;
11
16
import java .util .Collection ;
27
32
28
33
import com .hp .hpl .jena .rdf .model .Bag ;
29
34
import com .hp .hpl .jena .rdf .model .Model ;
35
+ import com .hp .hpl .jena .rdf .model .Property ;
36
+ import com .hp .hpl .jena .rdf .model .Resource ;
30
37
31
38
import edu .unc .lib .deposit .work .AbstractDepositJob ;
32
39
import edu .unc .lib .deposit .work .DepositGraphUtils ;
43
50
import edu .unc .lib .dl .fedora .ObjectIntegrityException ;
44
51
import edu .unc .lib .dl .fedora .PID ;
45
52
import edu .unc .lib .dl .fedora .ServiceException ;
53
+ import edu .unc .lib .dl .fedora .types .Datastream ;
54
+ import edu .unc .lib .dl .util .ContentModelHelper .DepositRelationship ;
46
55
import edu .unc .lib .dl .util .ContentModelHelper .Relationship ;
47
56
import edu .unc .lib .dl .util .DepositConstants ;
48
57
import edu .unc .lib .dl .util .DepositException ;
51
60
import edu .unc .lib .dl .util .JMSMessageUtil .FedoraActions ;
52
61
import edu .unc .lib .dl .util .PremisEventLogger ;
53
62
import edu .unc .lib .dl .util .RedisWorkerConstants .DepositField ;
63
+ import edu .unc .lib .dl .util .TripleStoreQueryService ;
54
64
import edu .unc .lib .dl .xml .FOXMLJDOMUtil ;
55
65
56
66
/**
@@ -77,6 +87,9 @@ public class IngestDeposit extends AbstractDepositJob implements ListenerJob {
77
87
78
88
@ Autowired
79
89
private AccessClient accessClient ;
90
+
91
+ @ Autowired
92
+ private TripleStoreQueryService tsqs ;
80
93
81
94
private int ingestObjectCount ;
82
95
@@ -172,6 +185,8 @@ private void processDepositStructure() {
172
185
173
186
// Capture the top level pids
174
187
DepositGraphUtils .walkChildrenDepthFirst (depositBag , topLevelPids , false );
188
+
189
+ closeModel ();
175
190
176
191
// TODO capture structure for ordered sequences instead of just bags
177
192
}
@@ -369,7 +384,7 @@ private void ingestObject(String ingestPid, boolean confirmExisting) throws Depo
369
384
log .info ("Fedora ingest timed out, awaiting ingest confirmation and proceeding with the remainder of the deposit: "
370
385
+ e .getLocalizedMessage ());
371
386
} catch (ObjectExistsException e ) {
372
- if (confirmExisting ) {
387
+ if (confirmExisting || isDuplicateOkay ( pid ) ) {
373
388
ingestsAwaitingConfirmation .remove (ingestPid );
374
389
} else {
375
390
throw new DepositException ("Object " + pid .getPid () + " already exists in the repository." , e );
@@ -382,6 +397,57 @@ private void ingestObject(String ingestPid, boolean confirmExisting) throws Depo
382
397
// TODO increment ingestedOctets
383
398
384
399
}
400
+
401
+ private boolean isDuplicateOkay (PID pid ) {
402
+ // Get the deposit ID for the repository copy of pid
403
+ List <String > deposits = tsqs .fetchBySubjectAndPredicate (pid , Relationship .originalDeposit .toString ());
404
+
405
+ // Ensure that the deposit id as record by fedora matches the current deposit or is not present
406
+ if (deposits != null && !deposits .contains (this .getDepositPID ().getURI ())) {
407
+ return false ;
408
+ }
409
+
410
+ Model model = getReadOnlyModel ();
411
+ try {
412
+ Resource objectResc = model .getResource (pid .getURI ());
413
+
414
+ Property stagingLocation = dprop (model , DepositRelationship .stagingLocation );
415
+ if (!objectResc .hasProperty (stagingLocation )) {
416
+ // No staging location, so nothing further to check
417
+ return true ;
418
+ }
419
+
420
+ String fileLocation = objectResc .getProperty (stagingLocation ).getString ();
421
+ fileLocation = new URI (fileLocation ).getPath ();
422
+
423
+ // Confirm that incoming file is the same size as the one in the repository
424
+ long incomingSize = Files .size (
425
+ Paths .get (this .getDepositDirectory ().getAbsolutePath (), fileLocation ));
426
+
427
+ // Get information for copy in the repository
428
+ Datastream ds = client .getDatastream (pid , DATA_FILE .getName ());
429
+
430
+ if (incomingSize != ds .getSize () && !(ds .getSize () == -1 && incomingSize == 0 )) {
431
+ // File sizes didn't match, so this is not the correct file
432
+ return false ;
433
+ }
434
+
435
+ // If a checksum is available, make sure it matches the one in the repository
436
+ Property md5sum = dprop (model , DepositRelationship .md5sum );
437
+ if (objectResc .hasProperty (md5sum )) {
438
+ String incomingChecksum = objectResc .getProperty (md5sum ).getString ();
439
+ return ds .getChecksum ().equals (incomingChecksum );
440
+ }
441
+
442
+ return true ;
443
+ } catch (FedoraException | IOException | URISyntaxException e1 ) {
444
+ log .debug ("Failed to get datastream info while checking on duplicate for {}" , pid , e1 );
445
+ } finally {
446
+ closeModel ();
447
+ }
448
+
449
+ return false ;
450
+ }
385
451
386
452
/**
387
453
* Uploads locally held files and PREMIS referenced by an objects FOXML. As a side effect, updates the FOXML
0 commit comments