Skip to content

Commit 7550bb5

Browse files
committed
Merge pull request #350 from UNC-Libraries/xml-import-2
Bulk Metadata Importing
2 parents 3206409 + c260872 commit 7550bb5

File tree

26 files changed

+1580
-115
lines changed

26 files changed

+1580
-115
lines changed

fcrepo-clients/src/main/java/edu/unc/lib/dl/util/VocabularyHelperManager.java

Lines changed: 35 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
*/
1616
package edu.unc.lib.dl.util;
1717

18+
import static edu.unc.lib.dl.util.ContentModelHelper.CDRProperty.invalidTerm;
1819
import static edu.unc.lib.dl.util.ContentModelHelper.CDRProperty.replaceInvalidTerms;
20+
import static edu.unc.lib.dl.util.ContentModelHelper.Datastream.RELS_EXT;
1921
import static edu.unc.lib.dl.util.ContentModelHelper.Model.COLLECTION;
2022

2123
import java.net.URI;
@@ -37,10 +39,14 @@
3739
import org.springframework.beans.factory.annotation.Autowired;
3840

3941
import edu.unc.lib.dl.fedora.AccessClient;
42+
import edu.unc.lib.dl.fedora.DatastreamDocument;
4043
import edu.unc.lib.dl.fedora.FedoraException;
4144
import edu.unc.lib.dl.fedora.ManagementClient;
45+
import edu.unc.lib.dl.fedora.OptimisticLockException;
4246
import edu.unc.lib.dl.fedora.PID;
4347
import edu.unc.lib.dl.util.ContentModelHelper.CDRProperty;
48+
import edu.unc.lib.dl.util.ContentModelHelper.Datastream;
49+
import edu.unc.lib.dl.xml.JDOMNamespaceUtil;
4450
import edu.unc.lib.dl.xml.VocabularyHelper;
4551

4652
/**
@@ -196,57 +202,45 @@ public void updateInvalidTermsRelations(PID pid, Element docElement) throws Fedo
196202
if (helpers == null)
197203
return;
198204

199-
String invalidTermPred = CDRProperty.invalidTerm.toString();
200-
List<String> allExistingTerms = queryService.fetchBySubjectAndPredicate(pid, invalidTermPred);
201-
202-
// Decompose triple values and group terms by vocabulary prefix
203-
Map<String, List<String>> termMap = new HashMap<>();
204-
for (String term : allExistingTerms) {
205-
String parts[] = term.split("\\|", 2);
206-
207-
List<String> terms = termMap.get(parts[0]);
208-
if (terms == null) {
209-
terms = new ArrayList<>();
210-
termMap.put(parts[0], terms);
211-
}
212-
213-
terms.add(term);
214-
}
205+
DatastreamDocument relsDs = managementClient.getXMLDatastreamIfExists(pid, Datastream.RELS_EXT.getName());
206+
207+
Element descEl = relsDs.getDocument().getRootElement().getChild("Description", JDOMNamespaceUtil.RDF_NS);
208+
209+
// Remove all existing invalid term predicates
210+
boolean termsChanged =
211+
descEl.removeChildren(invalidTerm.getPredicate(), invalidTerm.getNamespace());
215212

216213
for (VocabularyHelper helper : helpers) {
217-
List<String> existingTerms = termMap.get(helper.getInvalidTermPrefix());
218-
219214
Set<String> invalidTerms;
220215
try {
221216
invalidTerms = helper.getInvalidTermsWithPrefix(docElement);
217+
218+
if (invalidTerms != null && invalidTerms.size() > 0) {
219+
termsChanged = true;
220+
221+
for (String term : invalidTerms) {
222+
Element invTermEl = new Element(invalidTerm.getPredicate(), invalidTerm.getNamespace());
223+
invTermEl.setText(term);
224+
descEl.addContent(invTermEl);
225+
}
226+
}
222227
} catch (JDOMException e) {
223228
log.error("Failed to extract invalid terms from {}", pid.getPid(), e);
224229
continue;
225230
}
226-
227-
if (existingTerms != null && invalidTerms.size() == existingTerms.size()
228-
&& invalidTerms.containsAll(existingTerms)) {
229-
continue;
230-
}
231-
232-
if (existingTerms != null) {
233-
// Remove any terms which are no longer present
234-
List<String> removeTerms = new ArrayList<String>(existingTerms);
235-
removeTerms.removeAll(invalidTerms);
236-
237-
for (String term : removeTerms) {
238-
managementClient.purgeLiteralStatement(pid, invalidTermPred, term, null);
239-
}
240-
241-
// Calculate the set of newly invalid terms which need to be added
242-
invalidTerms.removeAll(existingTerms);
243-
}
244-
245-
if (invalidTerms.size() > 0) {
246-
for (String term : invalidTerms) {
247-
managementClient.addLiteralStatement(pid, invalidTermPred, term, null);
231+
}
232+
233+
// If any terms changed, then update RELS-EXT with optimistic locking
234+
if (termsChanged) {
235+
do {
236+
try {
237+
managementClient.modifyDatastream(pid, RELS_EXT.getName(), "Setting invalid vocabulary terms",
238+
relsDs.getLastModified(), relsDs.getDocument());
239+
return;
240+
} catch (OptimisticLockException e) {
241+
log.debug("Unable to update RELS-EXT for {}, retrying", pid);
248242
}
249-
}
243+
} while (true);
250244
}
251245
}
252246

fcrepo-clients/src/test/java/edu/unc/lib/dl/util/VocabularyHelperManagerTest.java

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,35 +17,41 @@
1717

1818
import static edu.unc.lib.dl.test.TestHelpers.setField;
1919
import static edu.unc.lib.dl.util.ContentModelHelper.CDRProperty.indexValidTerms;
20+
import static edu.unc.lib.dl.util.ContentModelHelper.CDRProperty.invalidTerm;
2021
import static edu.unc.lib.dl.util.ContentModelHelper.CDRProperty.replaceInvalidTerms;
2122
import static edu.unc.lib.dl.util.ContentModelHelper.CDRProperty.warnInvalidTerms;
2223
import static edu.unc.lib.dl.util.ContentModelHelper.Datastream.DATA_FILE;
24+
import static edu.unc.lib.dl.util.ContentModelHelper.Datastream.RELS_EXT;
2325
import static org.junit.Assert.assertEquals;
2426
import static org.junit.Assert.assertTrue;
2527
import static org.mockito.Matchers.any;
2628
import static org.mockito.Matchers.anyString;
2729
import static org.mockito.Matchers.eq;
2830
import static org.mockito.Mockito.mock;
29-
import static org.mockito.Mockito.never;
3031
import static org.mockito.Mockito.verify;
3132
import static org.mockito.Mockito.when;
3233
import static org.mockito.MockitoAnnotations.initMocks;
3334

3435
import java.util.Arrays;
3536
import java.util.HashMap;
3637
import java.util.HashSet;
38+
import java.util.List;
3739
import java.util.Map;
3840
import java.util.Set;
3941

42+
import org.jdom2.Document;
4043
import org.jdom2.Element;
4144
import org.junit.Before;
4245
import org.junit.Test;
46+
import org.mockito.ArgumentCaptor;
4347
import org.mockito.Mock;
4448

4549
import edu.unc.lib.dl.fedora.AccessClient;
50+
import edu.unc.lib.dl.fedora.DatastreamDocument;
4651
import edu.unc.lib.dl.fedora.ManagementClient;
4752
import edu.unc.lib.dl.fedora.PID;
4853
import edu.unc.lib.dl.fedora.types.MIMETypedStream;
54+
import edu.unc.lib.dl.xml.JDOMNamespaceUtil;
4955
import edu.unc.lib.dl.xml.VocabularyHelper;
5056

5157
/**
@@ -151,14 +157,26 @@ public void updateInvalidTermsTest() throws Exception {
151157
helper.setInvalidTerms(new HashSet<>(Arrays.asList("term", "term2")));
152158
helper.setPrefix(VOCAB_TYPE);
153159

154-
when(queryService.fetchBySubjectAndPredicate(any(PID.class), anyString())).thenReturn(
155-
Arrays.asList(VOCAB_TYPE + "|term"));
160+
Document relsDoc = new Document()
161+
.addContent(new Element("RDF", JDOMNamespaceUtil.RDF_NS)
162+
.addContent(new Element("Description", JDOMNamespaceUtil.RDF_NS)
163+
.addContent(new Element(invalidTerm.getPredicate(), invalidTerm.getNamespace()))
164+
.setText(VOCAB_TYPE + "|term")));
165+
166+
when(managementClient.getXMLDatastreamIfExists(any(PID.class), eq(RELS_EXT.getName())))
167+
.thenReturn(new DatastreamDocument(relsDoc, "2015-07-29"));
156168

157169
Element doc = mock(Element.class);
158170
manager.updateInvalidTermsRelations(new PID(ITEM_PID), doc);
159171

160-
verify(managementClient, never()).purgeLiteralStatement(any(PID.class), anyString(), anyString(), anyString());
161-
162-
verify(managementClient).addLiteralStatement(any(PID.class), anyString(), anyString(), anyString());
172+
ArgumentCaptor<Document> captor = ArgumentCaptor.forClass(Document.class);
173+
verify(managementClient).modifyDatastream(any(PID.class), eq(RELS_EXT.getName()),
174+
anyString(), anyString(), captor.capture());
175+
176+
Document modified = captor.getValue();
177+
List<Element> invTerms = modified.getRootElement().getChild("Description", JDOMNamespaceUtil.RDF_NS)
178+
.getChildren(invalidTerm.getPredicate(), invalidTerm.getNamespace());
179+
180+
assertEquals("Incorrect number of invalid terms after update", 2, invTerms.size());
163181
}
164182
}

metadata/src/main/java/edu/unc/lib/dl/util/RedisWorkerConstants.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ public class RedisWorkerConstants {
88
public static final String INGESTS_UPLOADED_PREFIX = "ingests-uploaded:";
99
public static final String DEPOSIT_TO_JOBS_PREFIX = "deposit-to-jobs:";
1010
public static final String JOB_STATUS_PREFIX = "job-status:";
11+
public static final String BULK_UPDATE_PREFIX = "bulk-update:";
12+
public static final String BULK_RESUME_PREFIX = "bulk-resume:";
13+
public static final String BULK_UPDATE_QUEUE = "bulk-md-update";
1114

1215
public static enum DepositField {
1316
uuid, state, actionRequest, contactName, depositorName, intSenderIdentifier, intSenderDescription,

persistence/pom.xml

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,17 @@
4848
<version>1.3.4</version>
4949
</dependency>
5050
<dependency>
51-
<groupId>commons-logging</groupId>
52-
<artifactId>commons-logging</artifactId>
53-
<version>1.1.1</version>
51+
<groupId>org.slf4j</groupId>
52+
<artifactId>slf4j-api</artifactId>
53+
<version>1.6.1</version>
54+
<type>jar</type>
55+
<scope>compile</scope>
5456
</dependency>
5557
<dependency>
56-
<groupId>log4j</groupId>
57-
<artifactId>log4j</artifactId>
58-
<version>1.2.16</version>
58+
<groupId>org.slf4j</groupId>
59+
<artifactId>slf4j-log4j12</artifactId>
60+
<version>1.6.1</version>
61+
<scope>compile</scope>
5962
</dependency>
6063
<dependency>
6164
<groupId>xerces</groupId>
@@ -187,5 +190,11 @@
187190
<artifactId>staging-areas</artifactId>
188191
<version>0.0.1-SNAPSHOT</version>
189192
</dependency>
193+
<dependency>
194+
<groupId>com.samskivert</groupId>
195+
<artifactId>jmustache</artifactId>
196+
<version>1.9</version>
197+
<type>jar</type>
198+
</dependency>
190199
</dependencies>
191200
</project>

persistence/src/main/java/edu/unc/lib/dl/update/AtomDCToMODSFilter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import edu.unc.lib.dl.util.ContentModelHelper;
2525
import edu.unc.lib.dl.xml.ModsXmlHelper;
2626

27-
public class AtomDCToMODSFilter extends MODSUIPFilter {
27+
public class AtomDCToMODSFilter extends MODSValidationUIPFilter {
2828
private static Logger log = Logger.getLogger(AtomDCToMODSFilter.class);
2929
private final String datastreamName = AtomPubMetadataParserUtil.ATOM_DC_DATASTREAM;
3030

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/**
2+
* Copyright 2008 The University of North Carolina at Chapel Hill
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package edu.unc.lib.dl.update;
17+
18+
import org.jdom2.Document;
19+
20+
import edu.unc.lib.dl.fedora.PID;
21+
22+
/**
23+
* @author bbpennel
24+
* @date Jul 13, 2015
25+
*/
26+
public class BulkMetadataDatastreamUIP extends MetadataUIP {
27+
private String lastModified;
28+
private String datastream;
29+
30+
/**
31+
* @param pid
32+
* @param user
33+
* @param operation
34+
*/
35+
public BulkMetadataDatastreamUIP(PID pid, String user, UpdateOperation operation,
36+
String datastream, String lastModified, Document content) {
37+
super(pid, user, operation);
38+
this.lastModified = lastModified;
39+
this.datastream = datastream;
40+
41+
getIncomingData().put(datastream, content.getRootElement());
42+
}
43+
44+
public String getLastModified() {
45+
return lastModified;
46+
}
47+
48+
public void setLastModified(String lastModified) {
49+
this.lastModified = lastModified;
50+
}
51+
52+
public String getDatastream() {
53+
return datastream;
54+
}
55+
56+
public void setDatastream(String datastream) {
57+
this.datastream = datastream;
58+
}
59+
}

0 commit comments

Comments
 (0)