From c7a152b9daf9b244648cc7432298afa8635f61a9 Mon Sep 17 00:00:00 2001 From: ppodsednik Date: Sat, 28 Dec 2024 16:10:25 +0100 Subject: [PATCH] Refactor: RepositoryAccess facade --- .../fedora/impl/RepositoryAccessImpl.java | 276 +++++++++++------- .../fedora/impl/tmp/ContentFormat.java | 25 ++ .../UnsupportedContentFormatException.java | 7 + 3 files changed, 206 insertions(+), 102 deletions(-) create mode 100644 shared/common/src/main/java/cz/incad/kramerius/fedora/impl/tmp/ContentFormat.java create mode 100644 shared/common/src/main/java/cz/incad/kramerius/fedora/impl/tmp/UnsupportedContentFormatException.java diff --git a/shared/common/src/main/java/cz/incad/kramerius/fedora/impl/RepositoryAccessImpl.java b/shared/common/src/main/java/cz/incad/kramerius/fedora/impl/RepositoryAccessImpl.java index ab430103e..98dd677d7 100644 --- a/shared/common/src/main/java/cz/incad/kramerius/fedora/impl/RepositoryAccessImpl.java +++ b/shared/common/src/main/java/cz/incad/kramerius/fedora/impl/RepositoryAccessImpl.java @@ -6,6 +6,8 @@ import com.qbizm.kramerius.imp.jaxb.DigitalObject; import cz.incad.kramerius.StreamHeadersObserver; import cz.incad.kramerius.fedora.RepositoryAccess; +import cz.incad.kramerius.fedora.impl.tmp.ContentFormat; +import cz.incad.kramerius.fedora.impl.tmp.UnsupportedContentFormatException; import cz.incad.kramerius.fedora.om.repository.AkubraRepository; import cz.incad.kramerius.fedora.om.repository.RepositoryDatastream; import cz.incad.kramerius.fedora.om.repository.RepositoryException; @@ -33,12 +35,14 @@ import javax.annotation.Nullable; import javax.xml.bind.JAXBException; +import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPathExpressionException; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.time.LocalDateTime; import java.time.format.DateTimeParseException; import java.util.*; @@ -91,30 +95,6 @@ public boolean isPidAvailable(String pid) throws IOException, RepositoryExceptio boolean exists = this.repositoryApi.objectExists(pid); return exists; } - @Override - public InputStream getFoxml(String pid, boolean archive) throws IOException { - try { - if (archive){ - DigitalObject obj = manager.readObjectCloneFromStorage(pid); - manager.resolveArchivedDatastreams(obj); - return this.manager.marshallObject(obj); - }else { - return this.manager.retrieveObject(pid); - } - } catch (Exception e) { - throw new IOException(e); - } - } - @Override - public org.dom4j.Document getFoxml(String pid) throws RepositoryException, IOException { - Lock readLock = AkubraDOManager.getReadLock(pid); - try { - RepositoryObject object = akubraRepositoryImpl.getObject(pid); - return Utils.inputstreamToDocument(object.getFoxml(), true); - } finally { - readLock.unlock(); - } - } //-------- get object property @Override public String getProperty(String pid, String propertyName) throws IOException, RepositoryException { @@ -187,70 +167,6 @@ public List getDatastreamNames(String pid) throws RepositoryException, I readLock.unlock(); } } - // TODO here we always use AkubraUtils.getStreamContent but we have also AkubraObject.AkubraDatastream for fetching stream content - @Override - public InputStream getDataStream(String pid, String datastreamName) throws IOException { - try { - pid = makeSureObjectPid(pid); - if (this.accessLog != null && this.accessLog.isReportingAccess(pid, datastreamName)) { - reportAccess(pid, datastreamName); - } - DigitalObject object = manager.readObjectFromStorage(pid); - if (object != null) { - DatastreamVersionType stream = AkubraUtils.getLastStreamVersion(object, datastreamName); - if (stream != null) { - return AkubraUtils.getStreamContent(stream, manager); - } else { - throw new IOException("cannot find stream '" + datastreamName + "' for pid '" + pid + "'"); - } - } else { - throw new IOException("cannot find pid '" + pid + "'"); - } - } catch (Exception e) { - throw new IOException(e); - } - } - // XML data stream - @Override - public Document getStream(String pid, String streamName) throws IOException { - DigitalObject object = manager.readObjectFromStorage(pid); - if (object != null) { - DatastreamVersionType stream = AkubraUtils.getLastStreamVersion(object, streamName); - if (stream != null) { - if (stream.getXmlContent() != null) { - List elementList = stream.getXmlContent().getAny(); - if (!elementList.isEmpty()) { - return elementList.get(0).getOwnerDocument(); - } else { - throw new IOException("Datastream not found: " + pid + " - " + streamName); - } - } else { - throw new IOException("Expected XML datastream: " + pid + " - " + streamName); - } - } - throw new IOException("Datastream not found: " + pid + " - " + streamName); - } - throw new IOException("Object not found: " + pid); - } - @Override - public org.dom4j.Document getDatastreamXml(String pid, String dsId) throws RepositoryException, IOException { - Lock readLock = AkubraDOManager.getReadLock(pid); - try { - RepositoryObject object = akubraRepositoryImpl.getObject(pid); - if (object.streamExists(dsId)) { - org.dom4j.Document foxml = Utils.inputstreamToDocument(object.getFoxml(), true); - org.dom4j.Element dcEl = (org.dom4j.Element) Dom4jUtils.buildXpath(String.format("/foxml:digitalObject/foxml:datastream[@ID='%s']", dsId)).selectSingleNode(foxml); - org.dom4j.Element detached = (org.dom4j.Element) dcEl.detach(); - org.dom4j.Document result = DocumentHelper.createDocument(); - result.add(detached); - return result; - } else { - return null; - } - } finally { - readLock.unlock(); - } - } @Override public List> getStreamsOfObject(String pid) throws IOException { try { @@ -320,9 +236,120 @@ public Date getStreamLastmodifiedFlag(String pid, String streamName) throws IOEx } throw new IOException("Object not found: " + pid); } - //------ get stream CONTENT - // input data stream + //------------------------------------------------------------------------------------------------------------ + // NEW !!!!!!!!!!!!!!!! + public T getStreamContent(String pid, KnownDatastreams dsId, Class returnType) throws IOException, UnsupportedContentFormatException { + // Determine supported formats for the content + ContentFormat supportedFormat = determineSupportedFormat(id); + // Validate the requested format + if ((contentType == String.class && !supportedFormat.supportsString()) || + (contentType == InputStream.class && !supportedFormat.supportsStream()) || + (contentType == Document.class && !supportedFormat.supportsXml())) { + throw new UnsupportedContentFormatException("Format not supported for content ID: " + id); + } + // Retrieve content as bytes + byte[] rawContent = fetchContentFromStorage(id); + // Convert content to the requested format + if (contentType == String.class) { + return contentType.cast(new String(rawContent, StandardCharsets.UTF_8)); + } else if (contentType == InputStream.class) { + return contentType.cast(new ByteArrayInputStream(rawContent)); + } else if (contentType == Document.class) { + return contentType.cast(parseXml(rawContent)); + } + throw new IllegalArgumentException("Unsupported content type: " + contentType); + } + public T getFoxml(String pid, KnownDatastreams dsId, Class returnType) throws IOException, UnsupportedContentFormatException { + // Determine supported formats for the content + ContentFormat supportedFormat = determineSupportedFormat(id); + // Validate the requested format + if ((contentType == String.class && !supportedFormat.supportsString()) || + (contentType == InputStream.class && !supportedFormat.supportsStream()) || + (contentType == Document.class && !supportedFormat.supportsXml())) { + throw new UnsupportedContentFormatException("Format not supported for content ID: " + id); + } + // Retrieve content as bytes + byte[] rawContent = fetchContentFromStorage(id); + // Convert content to the requested format + if (contentType == String.class) { + return contentType.cast(new String(rawContent, StandardCharsets.UTF_8)); + } else if (contentType == InputStream.class) { + return contentType.cast(new ByteArrayInputStream(rawContent)); + } else if (contentType == Document.class) { + return contentType.cast(parseXml(rawContent)); + } + throw new IllegalArgumentException("Unsupported content type: " + contentType); + } + StreamContentHelper getStreamContentHelper(); + + // TODO here we always use AkubraUtils.getStreamContent but we have also AkubraObject.AkubraDatastream for fetching stream content + private ContentFormat determineSupportedFormat(String id) { + // Example logic to determine supported formats + if (id.startsWith("streamOnly")) { + return new ContentFormat(false, true, false); + } else { + return new ContentFormat(true, true, true); + } + } + private byte[] fetchContentFromStorage(String id) { + // Mock: Fetch content as bytes from your storage + return ("Content for ID: " + id + "").getBytes(StandardCharsets.UTF_8); + } + private Document parseXml(byte[] content) throws IOException { + try { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + return factory.newDocumentBuilder().parse(new ByteArrayInputStream(content)); + } catch (Exception e) { + throw new IOException("Failed to parse XML", e); + } + } + + // TODO just one method rendering 2 content types; also we can add new par dsId and if null the whole foxml will be returned + // <--- AkubraObject.getFoXml + public org.dom4j.Document getDatastreamXml(String pid, String dsId) throws RepositoryException, IOException { + Lock readLock = AkubraDOManager.getReadLock(pid); + try { + RepositoryObject object = akubraRepositoryImpl.getObject(pid); + if (object.streamExists(dsId)) { + org.dom4j.Document foxml = Utils.inputstreamToDocument(object.getFoxml(), true); + org.dom4j.Element dcEl = (org.dom4j.Element) Dom4jUtils.buildXpath(String.format("/foxml:digitalObject/foxml:datastream[@ID='%s']", dsId)).selectSingleNode(foxml); + org.dom4j.Element detached = (org.dom4j.Element) dcEl.detach(); + org.dom4j.Document result = DocumentHelper.createDocument(); + result.add(detached); + return result; + } else { + return null; + } + } finally { + readLock.unlock(); + } + } + @Override + public InputStream getFoxml(String pid, boolean archive) throws IOException { + try { + if (archive){ + DigitalObject obj = manager.readObjectCloneFromStorage(pid); + manager.resolveArchivedDatastreams(obj); + return this.manager.marshallObject(obj); + }else { + return this.manager.retrieveObject(pid); + } + } catch (Exception e) { + throw new IOException(e); + } + } @Override + public org.dom4j.Document getFoxml(String pid) throws RepositoryException, IOException { + Lock readLock = AkubraDOManager.getReadLock(pid); + try { + RepositoryObject object = akubraRepositoryImpl.getObject(pid); + return Utils.inputstreamToDocument(object.getFoxml(), true); + } finally { + readLock.unlock(); + } + } + + // <--- AkubraObject.getStream.getContent (6x) public InputStream getLatestVersionOfDatastream(String pid, String dsId) throws RepositoryException, IOException { Lock readLock = AkubraDOManager.getReadLock(pid); try { @@ -337,6 +364,62 @@ public InputStream getLatestVersionOfDatastream(String pid, String dsId) throws readLock.unlock(); } } + // <-- DigitalObject, AkubraUtils.getLastStreamVersion (3x) + public InputStream getDataStream(String pid, String datastreamName) throws IOException { + try { + pid = makeSureObjectPid(pid); + if (this.accessLog != null && this.accessLog.isReportingAccess(pid, datastreamName)) { + reportAccess(pid, datastreamName); + } + DigitalObject object = manager.readObjectFromStorage(pid); + if (object != null) { + DatastreamVersionType stream = AkubraUtils.getLastStreamVersion(object, datastreamName); + if (stream != null) { + return AkubraUtils.getStreamContent(stream, manager); + } else { + throw new IOException("cannot find stream '" + datastreamName + "' for pid '" + pid + "'"); + } + } else { + throw new IOException("cannot find pid '" + pid + "'"); + } + } catch (Exception e) { + throw new IOException(e); + } + } + + // getLatestVersionOfDatastream (4x) + public org.dom4j.Document getLatestVersionOfInlineXmlDatastream(String pid, String dsId) throws RepositoryException, IOException { + InputStream is = getLatestVersionOfDatastream(pid, dsId); + return is == null ? null : Utils.inputstreamToDocument(is, true); + } + // getLatestVersionOfDatastream (1x) + public String getLatestVersionOfManagedTextDatastream(String pid, String dsId) throws RepositoryException, IOException { + InputStream is = getLatestVersionOfDatastream(pid, dsId); + return is == null ? null : Utils.inputstreamToString(is); + } + + // <-- DigitalObject, AkubraUtils.getLastStreamVersion (3x) + public Document getStream(String pid, String streamName) throws IOException { + DigitalObject object = manager.readObjectFromStorage(pid); + if (object != null) { + DatastreamVersionType stream = AkubraUtils.getLastStreamVersion(object, streamName); + if (stream != null) { + if (stream.getXmlContent() != null) { + List elementList = stream.getXmlContent().getAny(); + if (!elementList.isEmpty()) { + return elementList.get(0).getOwnerDocument(); + } else { + throw new IOException("Datastream not found: " + pid + " - " + streamName); + } + } else { + throw new IOException("Expected XML datastream: " + pid + " - " + streamName); + } + } + throw new IOException("Datastream not found: " + pid + " - " + streamName); + } + throw new IOException("Object not found: " + pid); + } + @Override public InputStream getImgFull(String pid) throws IOException, RepositoryException { this.accessLog.reportAccess(pid, KnownDatastreams.IMG_FULL.toString()); @@ -385,22 +468,10 @@ public InputStream getSmallThumbnail(String pid) throws IOException { public InputStream getImageFULL(String pid) throws IOException { return getDataStream(pid, FedoraUtils.IMG_FULL_STREAM); } - // text data stream - @Override - public String getLatestVersionOfManagedTextDatastream(String pid, String dsId) throws RepositoryException, IOException { - InputStream is = getLatestVersionOfDatastream(pid, dsId); - return is == null ? null : Utils.inputstreamToString(is); - } @Override public String getOcrText(String pid) throws IOException, RepositoryException { return getLatestVersionOfManagedTextDatastream(pid, KnownDatastreams.OCR_TEXT.toString()); } - // XML data stream - @Override - public org.dom4j.Document getLatestVersionOfInlineXmlDatastream(String pid, String dsId) throws RepositoryException, IOException { - InputStream is = getLatestVersionOfDatastream(pid, dsId); - return is == null ? null : Utils.inputstreamToDocument(is, true); - } @Override public org.dom4j.Document getRelsExt(String pid, boolean namespaceAware) throws IOException, RepositoryException { org.dom4j.Document doc = getLatestVersionOfInlineXmlDatastream(pid, KnownDatastreams.RELS_EXT.toString()); @@ -458,6 +529,7 @@ public org.dom4j.Document getOcrAlto(String pid, boolean namespaceAware) throws } return doc; } + //---------------------------------------------------------------------------------------------------------------- // --- check stream exists @Override public boolean isStreamAvailable(String pid, String dsId) throws IOException, RepositoryException { diff --git a/shared/common/src/main/java/cz/incad/kramerius/fedora/impl/tmp/ContentFormat.java b/shared/common/src/main/java/cz/incad/kramerius/fedora/impl/tmp/ContentFormat.java new file mode 100644 index 000000000..fab601350 --- /dev/null +++ b/shared/common/src/main/java/cz/incad/kramerius/fedora/impl/tmp/ContentFormat.java @@ -0,0 +1,25 @@ +package cz.incad.kramerius.fedora.impl.tmp; + +public class ContentFormat { + private final boolean supportsString; + private final boolean supportsStream; + private final boolean supportsXml; + + public ContentFormat(boolean supportsString, boolean supportsStream, boolean supportsXml) { + this.supportsString = supportsString; + this.supportsStream = supportsStream; + this.supportsXml = supportsXml; + } + + public boolean supportsString() { + return supportsString; + } + + public boolean supportsStream() { + return supportsStream; + } + + public boolean supportsXml() { + return supportsXml; + } +} \ No newline at end of file diff --git a/shared/common/src/main/java/cz/incad/kramerius/fedora/impl/tmp/UnsupportedContentFormatException.java b/shared/common/src/main/java/cz/incad/kramerius/fedora/impl/tmp/UnsupportedContentFormatException.java new file mode 100644 index 000000000..8520d170e --- /dev/null +++ b/shared/common/src/main/java/cz/incad/kramerius/fedora/impl/tmp/UnsupportedContentFormatException.java @@ -0,0 +1,7 @@ +package cz.incad.kramerius.fedora.impl.tmp; + +public class UnsupportedContentFormatException extends Exception { + public UnsupportedContentFormatException(String message) { + super(message); + } +} \ No newline at end of file