Skip to content

Commit 89fdd7f

Browse files
committed
Merge pull request #372 from UNC-Libraries/is-applicable-fedora
getDatastream to determine if enhancements applicable
2 parents 5e5bd08 + 66f6b5a commit 89fdd7f

File tree

46 files changed

+120
-1320
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+120
-1320
lines changed

services-worker/src/main/java/edu/unc/lib/dl/cdr/services/AbstractDatastreamEnhancementService.java

+55-30
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,25 @@
1515
*/
1616
package edu.unc.lib.dl.cdr.services;
1717

18-
import java.util.List;
19-
import java.util.Map;
18+
import static edu.unc.lib.dl.util.JMSMessageUtil.ServicesActions.APPLY_SERVICE;
19+
import static edu.unc.lib.dl.util.JMSMessageUtil.ServicesActions.APPLY_SERVICE_STACK;
20+
21+
import java.util.regex.Pattern;
2022

2123
import edu.unc.lib.dl.cdr.services.exception.EnhancementException;
22-
import edu.unc.lib.dl.cdr.services.model.EnhancementApplication;
2324
import edu.unc.lib.dl.cdr.services.model.EnhancementMessage;
2425
import edu.unc.lib.dl.cdr.services.model.FedoraEventMessage;
26+
import edu.unc.lib.dl.fedora.FedoraException;
2527
import edu.unc.lib.dl.fedora.PID;
2628
import edu.unc.lib.dl.util.ContentModelHelper;
29+
import edu.unc.lib.dl.util.ContentModelHelper.Datastream;
2730
import edu.unc.lib.dl.util.JMSMessageUtil;
31+
import edu.unc.lib.dl.util.JMSMessageUtil.FedoraActions;
2832

2933
public abstract class AbstractDatastreamEnhancementService extends AbstractIrodsObjectEnhancementService {
3034

31-
protected String lastAppliedQuery;
32-
protected String applicableNoDSQuery;
33-
protected String applicableStaleDSQuery;
35+
protected String derivativeDatastream;
36+
protected Pattern mimetypePattern;
3437

3538
@Override
3639
public boolean prefilterMessage(EnhancementMessage message) throws EnhancementException {
@@ -55,32 +58,54 @@ public boolean prefilterMessage(EnhancementMessage message) throws EnhancementEx
5558

5659
return ContentModelHelper.Datastream.DATA_FILE.equals(datastream);
5760
}
58-
59-
@Override
60-
public boolean isStale(PID pid) throws EnhancementException {
61-
return false;
61+
62+
protected boolean isDatastreamApplicable(PID pid) throws FedoraException {
63+
edu.unc.lib.dl.fedora.types.Datastream dataDoc
64+
= managementClient.getDatastream(pid, Datastream.DATA_FILE.getName());
65+
66+
// Don't process if there is no original data
67+
if (dataDoc == null) {
68+
return false;
69+
}
70+
71+
// Filter out objects with non-applicable mimetypes
72+
if (mimetypePattern != null && !mimetypePattern.matcher(dataDoc.getMIMEType()).matches()){
73+
return false;
74+
}
75+
76+
edu.unc.lib.dl.fedora.types.Datastream derivDoc
77+
= managementClient.getDatastream(pid, derivativeDatastream);
78+
79+
// No derivative present
80+
if (derivDoc == null) {
81+
return true;
82+
}
83+
84+
// Derivative is older than the original data, need to reperform the enhancement
85+
// Dates are in iso8601/UTC format, so lexographic string comparison is sufficient
86+
return dataDoc.getCreateDate().compareTo(derivDoc.getCreateDate()) > 0;
6287
}
63-
64-
@SuppressWarnings("rawtypes")
88+
6589
@Override
66-
public EnhancementApplication getLastApplied(PID pid) throws EnhancementException {
67-
// replace model URI and PID tokens
68-
String query = String.format(this.lastAppliedQuery, this.getTripleStoreQueryService().getResourceIndexModelUri(),
69-
pid.getURI());
70-
71-
@SuppressWarnings("unchecked")
72-
List<Map> bindings = (List<Map>) ((Map) this.getTripleStoreQueryService().sendSPARQL(query).get("results"))
73-
.get("bindings");
74-
if (bindings.size() == 0)
75-
return null;
76-
77-
EnhancementApplication lastApplied = new EnhancementApplication();
78-
String lastModified = (String) ((Map) bindings.get(0).get("lastModified")).get("value");
79-
lastApplied.setLastAppliedFromISO8601(lastModified);
80-
lastApplied.setPid(pid);
81-
lastApplied.setEnhancementClass(this.getClass());
82-
83-
return lastApplied;
90+
public boolean isApplicable(EnhancementMessage message) throws EnhancementException {
91+
String action = message.getQualifiedAction();
92+
93+
// Only need to check further if this is an ingest message or the DATA_FILE was changed
94+
if (!(FedoraActions.INGEST.equals(action)
95+
|| FedoraActions.MODIFY_DATASTREAM_BY_REFERENCE.equals(action)
96+
|| FedoraActions.ADD_DATASTREAM.equals(action)
97+
|| FedoraActions.MODIFY_DATASTREAM_BY_VALUE.equals(action)
98+
|| APPLY_SERVICE_STACK.equals(action)
99+
|| (APPLY_SERVICE.equals(action) && getClass().getName()
100+
.equals(message.getServiceName())))) {
101+
return false;
102+
}
103+
104+
try {
105+
return isDatastreamApplicable(message.getPid());
106+
} catch (FedoraException e) {
107+
throw new EnhancementException("Failed to check if enhancement was applicable for " + message.getPid(), e);
108+
}
84109
}
85110

86111
}

services-worker/src/main/java/edu/unc/lib/dl/cdr/services/AbstractFedoraEnhancementService.java

-161
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,15 @@
1515
*/
1616
package edu.unc.lib.dl.cdr.services;
1717

18-
import java.util.ArrayList;
19-
import java.util.Collection;
20-
import java.util.Iterator;
21-
import java.util.List;
22-
import java.util.Map;
23-
24-
import org.apache.commons.io.IOUtils;
2518
import org.slf4j.Logger;
2619
import org.slf4j.LoggerFactory;
2720
import org.springframework.context.ApplicationContext;
2821
import org.springframework.context.ApplicationContextAware;
2922

3023
import edu.unc.lib.dl.cdr.services.exception.EnhancementException;
3124
import edu.unc.lib.dl.cdr.services.model.EnhancementMessage;
32-
import edu.unc.lib.dl.cdr.services.model.LabeledPID;
3325
import edu.unc.lib.dl.cdr.services.processing.MessageDirector;
3426
import edu.unc.lib.dl.fedora.ManagementClient;
35-
import edu.unc.lib.dl.fedora.PID;
3627
import edu.unc.lib.dl.util.JMSMessageUtil;
3728
import edu.unc.lib.dl.util.TripleStoreQueryService;
3829

@@ -42,9 +33,6 @@ public abstract class AbstractFedoraEnhancementService implements ObjectEnhancem
4233
protected TripleStoreQueryService tripleStoreQueryService = null;
4334
protected ManagementClient managementClient = null;
4435
protected boolean active = false;
45-
protected List<String> findCandidatesQueries;
46-
protected String findStaleCandidatesQuery;
47-
protected List<String> isApplicableQueries;
4836

4937
private ApplicationContext applicationContext;
5038

@@ -94,153 +82,4 @@ public ManagementClient getManagementClient() {
9482
public void setManagementClient(ManagementClient managementClient) {
9583
this.managementClient = managementClient;
9684
}
97-
98-
@SuppressWarnings("unchecked")
99-
@Override
100-
public List<PID> findStaleCandidateObjects(int maxResults, String priorToDate) throws EnhancementException {
101-
return (List<PID>) this.findCandidateObjects(maxResults, 0, priorToDate, false);
102-
}
103-
104-
@SuppressWarnings("unchecked")
105-
@Override
106-
public List<PID> findCandidateObjects(int maxResults, int offset) throws EnhancementException {
107-
return (List<PID>) this.findCandidateObjects(maxResults, offset, null, false);
108-
}
109-
110-
@Override
111-
public int countCandidateObjects() throws EnhancementException {
112-
return (Integer) this.findCandidateObjects(-1, 0, null, true);
113-
}
114-
115-
public Object findCandidateObjects(int maxResults, int offset, String priorToDate, boolean countQuery)
116-
throws EnhancementException {
117-
if (priorToDate == null) {
118-
return this.executeCandidateQueries(this.findCandidatesQueries, countQuery, maxResults, offset);
119-
} else {
120-
String limitClause = "";
121-
if (maxResults >= 0 && !countQuery)
122-
limitClause = "LIMIT " + maxResults;
123-
return this.executeCandidateQuery(String.format(this.findStaleCandidatesQuery, this.getTripleStoreQueryService()
124-
.getResourceIndexModelUri(), priorToDate, limitClause) + limitClause, countQuery);
125-
}
126-
}
127-
128-
@SuppressWarnings("unchecked")
129-
protected Object executeCandidateQueries(List<String> queries, boolean count, int limit, int offset) {
130-
int resultCount = 0;
131-
List<PID> results = new MaxSizeList<PID>(limit);
132-
for (String queryOriginal: queries) {
133-
String query = queryOriginal;
134-
if (!count)
135-
query += " LIMIT " + limit;
136-
query += " OFFSET " + offset;
137-
Object result = this.executeCandidateQuery(query, count);
138-
if (count) {
139-
resultCount += ((Integer)result).intValue();
140-
} else {
141-
List<PID> queryResults = (List<PID>) result;
142-
results.addAll(queryResults);
143-
if (results.size() >= limit)
144-
return results;
145-
}
146-
}
147-
148-
if (count)
149-
return resultCount;
150-
return results;
151-
}
152-
153-
@SuppressWarnings({ "unchecked", "rawtypes" })
154-
protected Object executeCandidateQuery(String query, boolean countQuery) {
155-
String format = "json";//((countQuery) ? "count/json" : "json");
156-
Map results = this.getTripleStoreQueryService().sendSPARQL(query, format);
157-
List<Map> bindings = (List<Map>) ((Map) results.get("results")).get("bindings");
158-
159-
if (LOG.isDebugEnabled())
160-
LOG.debug(results.toString());
161-
if (countQuery) {
162-
// TODO Mulgara doesn't support count queries in SPARQL, will need to redo for other triple stores
163-
return bindings.size();
164-
/*Map binding = bindings.get(0);
165-
int count = Integer.parseInt((String) ((Map) binding.get("count")).get("value"));
166-
return count;*/
167-
} else {
168-
List<PID> result = new ArrayList<PID>();
169-
for (Map binding : bindings) {
170-
String pidURI = (String) ((Map) binding.get("pid")).get("value");
171-
String label = (String) ((Map) binding.get("label")).get("value");
172-
result.add(new LabeledPID(pidURI, label));
173-
}
174-
175-
return result;
176-
}
177-
}
178-
179-
@Override
180-
public boolean isApplicable(EnhancementMessage message) throws EnhancementException {
181-
// Automatically isApplicable if the message is specifically asking for this service.
182-
String action = message.getQualifiedAction();
183-
if ((JMSMessageUtil.ServicesActions.APPLY_SERVICE_STACK.equals(action) || JMSMessageUtil.ServicesActions.APPLY_SERVICE.equals(action))
184-
&& this.getClass().getName().equals(message.getServiceName()))
185-
return true;
186-
187-
return askQueries(this.isApplicableQueries, message);
188-
}
189-
190-
protected boolean askQueries(List<String> queries, EnhancementMessage message) {
191-
for (String query: queries)
192-
if (askQuery(query, message))
193-
return true;
194-
return false;
195-
}
196-
197-
@SuppressWarnings("unchecked")
198-
protected boolean askQuery(String query, EnhancementMessage message) {
199-
query = String.format(query,
200-
this.tripleStoreQueryService.getResourceIndexModelUri(), message.getPid().getURI());
201-
Map<String, Object> result = this.getTripleStoreQueryService().sendSPARQL(query);
202-
return (Boolean.TRUE.equals(result.get("boolean")));
203-
}
204-
205-
/**
206-
* @param filePath
207-
* name of file to open. The file can reside anywhere in the classpath
208-
*/
209-
protected String readFileAsString(String filePath) throws java.io.IOException {
210-
return IOUtils.toString(this.getClass().getResourceAsStream(filePath), "UTF-8");
211-
}
212-
213-
protected class MaxSizeList<E> extends ArrayList<E> {
214-
private static final long serialVersionUID = 1L;
215-
private int limit = 10;
216-
217-
public MaxSizeList(int limit) {
218-
this.limit = limit;
219-
}
220-
221-
@Override
222-
public boolean add(E element) {
223-
if (this.size() >= limit) return true;
224-
return super.add(element);
225-
}
226-
227-
@Override
228-
public void add(int index, E element) {
229-
if (this.size() >= limit) return;
230-
super.add(index, element);
231-
}
232-
233-
@Override
234-
public boolean addAll(Collection<? extends E> c) {
235-
if (c.size() + this.size() < limit)
236-
return super.addAll(c);
237-
Iterator<? extends E> it = c.iterator();
238-
while (it.hasNext()) {
239-
this.add(it.next());
240-
if (this.size() == limit)
241-
return true;
242-
}
243-
return false;
244-
}
245-
}
24685
}

services-worker/src/main/java/edu/unc/lib/dl/cdr/services/ObjectEnhancementService.java

-49
Original file line numberDiff line numberDiff line change
@@ -15,47 +15,16 @@
1515
*/
1616
package edu.unc.lib.dl.cdr.services;
1717

18-
import java.util.List;
19-
2018
import org.jdom2.Element;
2119

2220
import edu.unc.lib.dl.cdr.services.exception.EnhancementException;
23-
import edu.unc.lib.dl.cdr.services.model.EnhancementApplication;
2421
import edu.unc.lib.dl.cdr.services.model.EnhancementMessage;
25-
import edu.unc.lib.dl.fedora.PID;
2622

2723
/**
2824
* @author Gregory Jansen
2925
*
3026
*/
3127
public interface ObjectEnhancementService {
32-
/**
33-
* Returns a list of candidate objects to which this service may apply. This method is allowed to return some false
34-
* positives. However, if the maxResults is equal or greater than the number of objects in the repository, then it
35-
* must include all applicable objects.
36-
*
37-
* @return
38-
*/
39-
public List<PID> findCandidateObjects(int maxResults, int offset) throws EnhancementException;
40-
41-
/**
42-
* Returns the total number of objects that would be returned by findCandidateObjects.
43-
*
44-
* @return
45-
* @throws EnhancementException
46-
*/
47-
public int countCandidateObjects() throws EnhancementException;
48-
49-
/**
50-
* Returns a list of candidate objects to which this service may apply to, including objects that it has applied to
51-
* in the past but which are now stale.
52-
*
53-
* @param maxResults
54-
* @param priorToDate
55-
* @return
56-
* @throws EnhancementException
57-
*/
58-
public List<PID> findStaleCandidateObjects(int maxResults, String priorToDate) throws EnhancementException;
5928

6029
/**
6130
* Creates a task for running this service on the object in question.
@@ -82,24 +51,6 @@ public interface ObjectEnhancementService {
8251
*/
8352
public boolean prefilterMessage(EnhancementMessage pid) throws EnhancementException;
8453

85-
/**
86-
* Checks to see if the enhancement should be re-applied. Generally a comparison of timestamps or software agent
87-
* strings. Returns true if this enhancement has never been run or will provide greater enhancement by running again.
88-
*
89-
* @param pid
90-
* @return true if the object is stale w/respect to this enhancement
91-
*/
92-
public boolean isStale(PID pid) throws EnhancementException;
93-
94-
/**
95-
* Determines the last date on which this service was applied to the object represented by pid.
96-
*
97-
* @param pid
98-
* @return the most recent date this service was applied to object pid, or null if it has never been applied.
99-
* @throws EnhancementException
100-
*/
101-
public EnhancementApplication getLastApplied(PID pid) throws EnhancementException;
102-
10354
/**
10455
* @return true if this service is currently active
10556
*/

0 commit comments

Comments
 (0)