Skip to content

Commit

Permalink
A simple implementation of a feature where an OAI client can be instr…
Browse files Browse the repository at this point in the history
…ucted to return harvested metadata records unparsed #284
  • Loading branch information
landreev committed Feb 6, 2025
1 parent 9e22f93 commit 8b064ba
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ public XOAIMetadata getXoaiMetadata() {
if (element instanceof XOAIMetadata) return (XOAIMetadata) element;
else return null;
}

public String getMetadataAsString() {
if (element instanceof EchoElement) return ((EchoElement)element).asUnparsedString();
return null;
}

/**
* This is here for Dataverse 4/5 backward compatibility.
Expand Down
7 changes: 7 additions & 0 deletions xoai-common/src/main/java/io/gdcc/xoai/xml/EchoElement.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ public void write(final XmlWriter writer) throws XmlWriteException {
"Cannot write XML when none given (both stream and string null)");
}
}

public String asUnparsedString() {
if (xmlString != null) {
return xmlString;
}
return null;
}

private void write(final XmlWriter writer, final InputStream inStream)
throws XmlWriteException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class Context {
private final Map<String, Transformer> metadataTransformers = new HashMap<>();
private String baseUrl;
private Granularity granularity;
private boolean saveUnparsedMetadata = false;
private OAIClient client;

public Context() {
Expand Down Expand Up @@ -90,6 +91,29 @@ public Context withOAIClient(OAIClient client) {
public OAIClient getClient() {
return client;
}

/**
* Should this harvester skip parsing the "metadata" sections of oai records
* in the bodies of GetRecord and ListRecords responses, and cache and make
* them available as unparsed Strings instead.
*
* @return boolean
*/
public boolean isSaveUnparsedMetadata() {
return this.saveUnparsedMetadata;
}

/**
* Instruct this harvester not to attempt to parse the "metadata" sections of
* oai records in the bodies of GetRecord and ListRecords responses, but
* cache and make them available as Strings instead.
*
* @return A Context
*/
public Context withSaveUnparsedMetadata() {
this.saveUnparsedMetadata = true;
return this;
}

public enum KnownTransformer {
OAI_DC("to_xoai/oai_dc.xsl");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,27 @@ public Record parse(XmlReader reader) throws XmlReaderException {
if (!record.getHeader().isDeleted()) {
reader.next(elementName(localPart(equalTo("metadata")))).next(aStartElement());
String content = reader.retrieveCurrentAsString();
ByteArrayInputStream inputStream =
new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8));
XSLPipeline pipeline =
new XSLPipeline(inputStream, true)
.apply(context.getMetadataTransformer(metadataPrefix));
System.out.println("Metadata content: "+content);

if (this.context.isSaveUnparsedMetadata()) {
record.withMetadata(new Metadata(content));
} else {
ByteArrayInputStream inputStream
= new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8));

if (context.hasTransformer()) pipeline.apply(context.getTransformer());
XSLPipeline pipeline
= new XSLPipeline(inputStream, true)
.apply(context.getMetadataTransformer(metadataPrefix));

try {
record.withMetadata(new Metadata(new MetadataParser().parse(pipeline.process())));
} catch (TransformerException e) {
throw new InternalHarvestException("Unable to process transformer", e);
if (context.hasTransformer()) {
pipeline.apply(context.getTransformer());
}

try {
record.withMetadata(new Metadata(new MetadataParser().parse(pipeline.process())));
} catch (TransformerException e) {
throw new InternalHarvestException("Unable to process transformer", e);
}
}
}

Expand Down

0 comments on commit 8b064ba

Please sign in to comment.