Skip to content

Commit ae88961

Browse files
committed
Merge pull request #351 from UNC-Libraries/resist-existing-file
Prevent failure on duplicate pid
2 parents f02cd1c + d2988a6 commit ae88961

File tree

1 file changed

+67
-1
lines changed

1 file changed

+67
-1
lines changed

deposit/src/main/java/edu/unc/lib/deposit/fcrepo3/IngestDeposit.java

+67-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
package edu.unc.lib.deposit.fcrepo3;
22

3+
import static edu.unc.lib.deposit.work.DepositGraphUtils.dprop;
4+
import static edu.unc.lib.dl.util.ContentModelHelper.Datastream.DATA_FILE;
5+
36
import java.io.ByteArrayOutputStream;
47
import java.io.File;
58
import java.io.IOException;
69
import java.net.ConnectException;
710
import java.net.URI;
811
import java.net.URISyntaxException;
12+
import java.nio.file.Files;
13+
import java.nio.file.Paths;
914
import java.util.ArrayDeque;
1015
import java.util.ArrayList;
1116
import java.util.Collection;
@@ -27,6 +32,8 @@
2732

2833
import com.hp.hpl.jena.rdf.model.Bag;
2934
import com.hp.hpl.jena.rdf.model.Model;
35+
import com.hp.hpl.jena.rdf.model.Property;
36+
import com.hp.hpl.jena.rdf.model.Resource;
3037

3138
import edu.unc.lib.deposit.work.AbstractDepositJob;
3239
import edu.unc.lib.deposit.work.DepositGraphUtils;
@@ -43,6 +50,8 @@
4350
import edu.unc.lib.dl.fedora.ObjectIntegrityException;
4451
import edu.unc.lib.dl.fedora.PID;
4552
import edu.unc.lib.dl.fedora.ServiceException;
53+
import edu.unc.lib.dl.fedora.types.Datastream;
54+
import edu.unc.lib.dl.util.ContentModelHelper.DepositRelationship;
4655
import edu.unc.lib.dl.util.ContentModelHelper.Relationship;
4756
import edu.unc.lib.dl.util.DepositConstants;
4857
import edu.unc.lib.dl.util.DepositException;
@@ -51,6 +60,7 @@
5160
import edu.unc.lib.dl.util.JMSMessageUtil.FedoraActions;
5261
import edu.unc.lib.dl.util.PremisEventLogger;
5362
import edu.unc.lib.dl.util.RedisWorkerConstants.DepositField;
63+
import edu.unc.lib.dl.util.TripleStoreQueryService;
5464
import edu.unc.lib.dl.xml.FOXMLJDOMUtil;
5565

5666
/**
@@ -77,6 +87,9 @@ public class IngestDeposit extends AbstractDepositJob implements ListenerJob {
7787

7888
@Autowired
7989
private AccessClient accessClient;
90+
91+
@Autowired
92+
private TripleStoreQueryService tsqs;
8093

8194
private int ingestObjectCount;
8295

@@ -172,6 +185,8 @@ private void processDepositStructure() {
172185

173186
// Capture the top level pids
174187
DepositGraphUtils.walkChildrenDepthFirst(depositBag, topLevelPids, false);
188+
189+
closeModel();
175190

176191
// TODO capture structure for ordered sequences instead of just bags
177192
}
@@ -369,7 +384,7 @@ private void ingestObject(String ingestPid, boolean confirmExisting) throws Depo
369384
log.info("Fedora ingest timed out, awaiting ingest confirmation and proceeding with the remainder of the deposit: "
370385
+ e.getLocalizedMessage());
371386
} catch (ObjectExistsException e) {
372-
if (confirmExisting) {
387+
if (confirmExisting || isDuplicateOkay(pid)) {
373388
ingestsAwaitingConfirmation.remove(ingestPid);
374389
} else {
375390
throw new DepositException("Object " + pid.getPid() + " already exists in the repository.", e);
@@ -382,6 +397,57 @@ private void ingestObject(String ingestPid, boolean confirmExisting) throws Depo
382397
// TODO increment ingestedOctets
383398

384399
}
400+
401+
private boolean isDuplicateOkay(PID pid) {
402+
// Get the deposit ID for the repository copy of pid
403+
List<String> deposits = tsqs.fetchBySubjectAndPredicate(pid, Relationship.originalDeposit.toString());
404+
405+
// Ensure that the deposit id as record by fedora matches the current deposit or is not present
406+
if (deposits != null && !deposits.contains(this.getDepositPID().getURI())) {
407+
return false;
408+
}
409+
410+
Model model = getReadOnlyModel();
411+
try {
412+
Resource objectResc = model.getResource(pid.getURI());
413+
414+
Property stagingLocation = dprop(model, DepositRelationship.stagingLocation);
415+
if (!objectResc.hasProperty(stagingLocation)) {
416+
// No staging location, so nothing further to check
417+
return true;
418+
}
419+
420+
String fileLocation = objectResc.getProperty(stagingLocation).getString();
421+
fileLocation = new URI(fileLocation).getPath();
422+
423+
// Confirm that incoming file is the same size as the one in the repository
424+
long incomingSize = Files.size(
425+
Paths.get(this.getDepositDirectory().getAbsolutePath(), fileLocation));
426+
427+
// Get information for copy in the repository
428+
Datastream ds = client.getDatastream(pid, DATA_FILE.getName());
429+
430+
if (incomingSize != ds.getSize() && !(ds.getSize() == -1 && incomingSize == 0)) {
431+
// File sizes didn't match, so this is not the correct file
432+
return false;
433+
}
434+
435+
// If a checksum is available, make sure it matches the one in the repository
436+
Property md5sum = dprop(model, DepositRelationship.md5sum);
437+
if (objectResc.hasProperty(md5sum)) {
438+
String incomingChecksum = objectResc.getProperty(md5sum).getString();
439+
return ds.getChecksum().equals(incomingChecksum);
440+
}
441+
442+
return true;
443+
} catch (FedoraException | IOException | URISyntaxException e1) {
444+
log.debug("Failed to get datastream info while checking on duplicate for {}", pid, e1);
445+
} finally {
446+
closeModel();
447+
}
448+
449+
return false;
450+
}
385451

386452
/**
387453
* Uploads locally held files and PREMIS referenced by an objects FOXML. As a side effect, updates the FOXML

0 commit comments

Comments
 (0)